Skip to main content

libsais_rs/
libsais16.rs

1//! Rust translation of upstream [libsais](https://github.com/IlyaGrebnov/libsais)
2//! 2.10.4 by Ilya Grebnov.
3//!
4//! This module exposes the 16-bit alphabet suffix array, BWT, unBWT, PLCP and
5//! LCP entry points (mirroring `libsais16.h`).
6
7use std::mem;
8
9pub type SaSint = i32;
10pub type SaUint = u32;
11
12pub const ALPHABET_SIZE: usize = 1usize << 16;
13const SAINT_MAX: SaSint = SaSint::MAX;
14const SAINT_MIN: SaSint = SaSint::MIN;
15const SAINT_BIT: u32 = 32;
16const SUFFIX_GROUP_BIT: u32 = SAINT_BIT - 1;
17const SUFFIX_GROUP_MARKER: SaSint = 1_i32 << (SUFFIX_GROUP_BIT - 1);
18const LIBSAIS_FLAGS_BWT: SaSint = 1;
19const LIBSAIS_FLAGS_GSA: SaSint = 2;
20const LIBSAIS_LOCAL_BUFFER_SIZE: usize = 2000;
21const UNBWT_FASTBITS: usize = 17;
22const PER_THREAD_CACHE_SIZE: usize = 2_097_184;
23
24#[repr(C)]
25#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
26struct ThreadCache {
27    symbol: SaSint,
28    index: SaSint,
29}
30
31#[derive(Clone, Debug, Default, PartialEq, Eq)]
32pub struct ThreadState {
33    position: SaSint,
34    m: SaSint,
35    last_lms_suffix: SaSint,
36    count: SaSint,
37    buckets: Vec<SaSint>,
38    cache: Vec<ThreadCache>,
39    cache_entries: usize,
40}
41
42#[derive(Clone, Debug, Default, PartialEq, Eq)]
43pub struct Context {
44    buckets: Vec<SaSint>,
45    thread_state: Option<Vec<ThreadState>>,
46    threads: SaSint,
47}
48
49#[derive(Clone, Debug, Default, PartialEq, Eq)]
50pub struct UnbwtContext {
51    bucket2: Vec<usize>,
52    fastbits: Vec<u16>,
53    buckets: Option<Vec<usize>>,
54    threads: SaSint,
55}
56
57/// Creates the libsais16 context that allows reusing allocated memory with each libsais16 operation.
58///
59/// In multi-threaded environments, use one context per thread for parallel executions.
60///
61/// Returns the context, or `None` on allocation failure.
62pub fn create_ctx() -> Option<Context> {
63    create_ctx_main(1)
64}
65
66/// Creates the libsais16 context for parallel operations using OpenMP-style threading.
67///
68/// In multi-threaded environments, use one context per thread for parallel executions.
69///
70/// - `threads`: number of worker threads (can be 0 for the implementation default).
71///
72/// Returns the context, or `None` on allocation failure.
73pub fn create_ctx_omp(threads: SaSint) -> Option<Context> {
74    if threads < 0 {
75        None
76    } else {
77        create_ctx_main(normalize_threads(threads))
78    }
79}
80
81/// Destroys the libsais16 context and frees previously allocated memory.
82pub fn free_ctx(_ctx: Context) {}
83
84/// Creates the libsais16 reverse-BWT context that allows reusing allocated memory with each `libsais16_unbwt_*` operation.
85///
86/// In multi-threaded environments, use one context per thread for parallel executions.
87///
88/// Returns the context, or `None` on allocation failure.
89pub fn unbwt_create_ctx() -> Option<UnbwtContext> {
90    unbwt_create_ctx_main(1)
91}
92
93/// Creates the libsais16 reverse-BWT context for parallel `libsais16_unbwt_*` operations using OpenMP-style threading.
94///
95/// In multi-threaded environments, use one context per thread for parallel executions.
96///
97/// - `threads`: number of worker threads (can be 0 for the implementation default).
98///
99/// Returns the context, or `None` on allocation failure.
100pub fn unbwt_create_ctx_omp(threads: SaSint) -> Option<UnbwtContext> {
101    if threads < 0 {
102        None
103    } else {
104        unbwt_create_ctx_main(normalize_threads(threads))
105    }
106}
107
108/// Destroys the libsais16 reverse-BWT context and frees previously allocated memory.
109pub fn unbwt_free_ctx(_ctx: UnbwtContext) {}
110
111fn normalize_threads(threads: SaSint) -> SaSint {
112    if threads > 0 {
113        threads
114    } else {
115        1
116    }
117}
118
119fn align_up(value: usize, alignment: usize) -> usize {
120    (value + (alignment - 1)) & !(alignment - 1)
121}
122
123fn alloc_thread_state(threads: SaSint) -> Option<Vec<ThreadState>> {
124    let threads = usize::try_from(threads).ok()?;
125    let mut thread_state = Vec::with_capacity(threads);
126    for _ in 0..threads {
127        thread_state.push(ThreadState {
128            position: 0,
129            m: 0,
130            last_lms_suffix: 0,
131            count: 0,
132            buckets: vec![0; 4 * ALPHABET_SIZE],
133            cache: vec![ThreadCache::default(); PER_THREAD_CACHE_SIZE],
134            cache_entries: PER_THREAD_CACHE_SIZE,
135        });
136    }
137    Some(thread_state)
138}
139
140fn create_ctx_main(threads: SaSint) -> Option<Context> {
141    let buckets = vec![0; 8 * ALPHABET_SIZE];
142    let thread_state = if threads > 1 {
143        Some(alloc_thread_state(threads)?)
144    } else {
145        None
146    };
147
148    Some(Context {
149        buckets,
150        thread_state,
151        threads,
152    })
153}
154
155fn unbwt_create_ctx_main(threads: SaSint) -> Option<UnbwtContext> {
156    let bucket2 = vec![0; ALPHABET_SIZE];
157    let fastbits = vec![0; 1 + (1 << UNBWT_FASTBITS)];
158    let buckets = if threads > 1 {
159        Some(vec![0; usize::try_from(threads).ok()? * ALPHABET_SIZE])
160    } else {
161        None
162    };
163
164    Some(UnbwtContext {
165        bucket2,
166        fastbits,
167        buckets,
168        threads,
169    })
170}
171
172fn fill_freq(t: &[u16], freq: Option<&mut [SaSint]>) {
173    if let Some(freq) = freq {
174        freq[..ALPHABET_SIZE].fill(0);
175        for &symbol in t {
176            freq[symbol as usize] += 1;
177        }
178    }
179}
180
181#[allow(dead_code)]
182fn buckets_index4(c: usize, s: usize) -> usize {
183    (c << 2) + s
184}
185
186#[allow(dead_code)]
187fn buckets_index2(c: usize, s: usize) -> usize {
188    (c << 1) + s
189}
190
191#[allow(dead_code)]
192fn place_cached_suffixes(
193    sa: &mut [SaSint],
194    cache: &[ThreadCache],
195    block_start: SaSint,
196    block_size: SaSint,
197) {
198    let start = usize::try_from(block_start).expect("block_start must be non-negative");
199    let len = usize::try_from(block_size).expect("block_size must be non-negative");
200    let entries = if cache.len() >= start + len {
201        &cache[start..start + len]
202    } else {
203        &cache[..len]
204    };
205
206    for entry in entries {
207        sa[entry.symbol as usize] = entry.index;
208    }
209}
210
211#[allow(dead_code)]
212fn compact_and_place_cached_suffixes(
213    sa: &mut [SaSint],
214    cache: &mut [ThreadCache],
215    block_start: SaSint,
216    block_size: SaSint,
217) {
218    let start = usize::try_from(block_start).expect("block_start must be non-negative");
219    let len = usize::try_from(block_size).expect("block_size must be non-negative");
220    let read_start = if cache.len() >= start + len { start } else { 0 };
221    let read_end = read_start + len;
222
223    let mut write = read_start;
224    for read in read_start..read_end {
225        let entry = cache[read];
226        if entry.symbol >= 0 {
227            cache[write] = entry;
228            write += 1;
229        }
230    }
231    place_cached_suffixes(sa, cache, block_start, (write - read_start) as SaSint);
232}
233
234#[allow(dead_code)]
235fn count_negative_marked_suffixes(
236    sa: &[SaSint],
237    block_start: SaSint,
238    block_size: SaSint,
239) -> SaSint {
240    let start = block_start as usize;
241    let end = start + block_size as usize;
242    sa[start..end].iter().filter(|&&value| value < 0).count() as SaSint
243}
244
245#[allow(dead_code)]
246fn count_zero_marked_suffixes(sa: &[SaSint], block_start: SaSint, block_size: SaSint) -> SaSint {
247    let start = block_start as usize;
248    let end = start + block_size as usize;
249    sa[start..end].iter().filter(|&&value| value == 0).count() as SaSint
250}
251
252#[allow(dead_code)]
253fn accumulate_counts_s32_n(
254    buckets: &mut [SaSint],
255    bucket00: usize,
256    bucket_size: usize,
257    bucket_stride: usize,
258    num_buckets: usize,
259) {
260    for s in 0..bucket_size {
261        let mut sum = buckets[bucket00 + s];
262        for bucket in 1..num_buckets {
263            sum += buckets[bucket00 - bucket * bucket_stride + s];
264        }
265        buckets[bucket00 + s] = sum;
266    }
267}
268
269#[allow(dead_code)]
270fn accumulate_counts_s32_2(
271    buckets: &mut [SaSint],
272    bucket00: usize,
273    bucket_size: usize,
274    bucket_stride: usize,
275) {
276    accumulate_counts_s32_n(buckets, bucket00, bucket_size, bucket_stride, 2);
277}
278
279#[allow(dead_code)]
280fn accumulate_counts_s32_3(
281    buckets: &mut [SaSint],
282    bucket00: usize,
283    bucket_size: usize,
284    bucket_stride: usize,
285) {
286    accumulate_counts_s32_n(buckets, bucket00, bucket_size, bucket_stride, 3);
287}
288
289#[allow(dead_code)]
290fn accumulate_counts_s32_4(
291    buckets: &mut [SaSint],
292    bucket00: usize,
293    bucket_size: usize,
294    bucket_stride: usize,
295) {
296    accumulate_counts_s32_n(buckets, bucket00, bucket_size, bucket_stride, 4);
297}
298
299#[allow(dead_code)]
300fn accumulate_counts_s32_5(
301    buckets: &mut [SaSint],
302    bucket00: usize,
303    bucket_size: usize,
304    bucket_stride: usize,
305) {
306    accumulate_counts_s32_n(buckets, bucket00, bucket_size, bucket_stride, 5);
307}
308
309#[allow(dead_code)]
310fn accumulate_counts_s32_6(
311    buckets: &mut [SaSint],
312    bucket00: usize,
313    bucket_size: usize,
314    bucket_stride: usize,
315) {
316    accumulate_counts_s32_n(buckets, bucket00, bucket_size, bucket_stride, 6);
317}
318
319#[allow(dead_code)]
320fn accumulate_counts_s32_7(
321    buckets: &mut [SaSint],
322    bucket00: usize,
323    bucket_size: usize,
324    bucket_stride: usize,
325) {
326    accumulate_counts_s32_n(buckets, bucket00, bucket_size, bucket_stride, 7);
327}
328
329#[allow(dead_code)]
330fn accumulate_counts_s32_8(
331    buckets: &mut [SaSint],
332    bucket00: usize,
333    bucket_size: usize,
334    bucket_stride: usize,
335) {
336    accumulate_counts_s32_n(buckets, bucket00, bucket_size, bucket_stride, 8);
337}
338
339#[allow(dead_code)]
340fn accumulate_counts_s32_9(
341    buckets: &mut [SaSint],
342    bucket00: usize,
343    bucket_size: usize,
344    bucket_stride: usize,
345) {
346    accumulate_counts_s32_n(buckets, bucket00, bucket_size, bucket_stride, 9);
347}
348
349#[allow(dead_code)]
350fn accumulate_counts_s32(
351    buckets: &mut [SaSint],
352    bucket00: usize,
353    bucket_size: usize,
354    bucket_stride: usize,
355    mut num_buckets: usize,
356) {
357    while num_buckets >= 9 {
358        accumulate_counts_s32_9(
359            buckets,
360            bucket00 - (num_buckets - 9) * bucket_stride,
361            bucket_size,
362            bucket_stride,
363        );
364        num_buckets -= 8;
365    }
366
367    match num_buckets {
368        2 => accumulate_counts_s32_2(buckets, bucket00, bucket_size, bucket_stride),
369        3 => accumulate_counts_s32_3(buckets, bucket00, bucket_size, bucket_stride),
370        4 => accumulate_counts_s32_4(buckets, bucket00, bucket_size, bucket_stride),
371        5 => accumulate_counts_s32_5(buckets, bucket00, bucket_size, bucket_stride),
372        6 => accumulate_counts_s32_6(buckets, bucket00, bucket_size, bucket_stride),
373        7 => accumulate_counts_s32_7(buckets, bucket00, bucket_size, bucket_stride),
374        8 => accumulate_counts_s32_8(buckets, bucket00, bucket_size, bucket_stride),
375        _ => {}
376    }
377}
378
379#[allow(dead_code)]
380fn flip_suffix_markers_omp(sa: &mut [SaSint], l: SaSint, threads: SaSint) {
381    let len = usize::try_from(l).expect("l must be non-negative");
382    let omp_num_threads = if threads > 1 && l >= 65_536 {
383        usize::try_from(threads).expect("threads must be non-negative")
384    } else {
385        1
386    };
387    let omp_block_stride = (len / omp_num_threads) & !15usize;
388    for omp_thread_num in 0..omp_num_threads {
389        let omp_block_start = omp_thread_num * omp_block_stride;
390        let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
391            omp_block_stride
392        } else {
393            len - omp_block_start
394        };
395        for value in &mut sa[omp_block_start..omp_block_start + omp_block_size] {
396            *value ^= SAINT_MIN;
397        }
398    }
399}
400
401#[allow(dead_code)]
402fn gather_lms_suffixes_32s(t: &[SaSint], sa: &mut [SaSint], n: SaSint) -> SaSint {
403    let mut i = n - 2;
404    let mut m = n - 1;
405    let mut f0 = 1usize;
406    let mut f1: usize;
407    let mut c0 = t[(n - 1) as usize] as isize;
408    let mut c1: isize;
409
410    while i >= 3 {
411        c1 = t[i as usize] as isize;
412        f1 = usize::from(c1 > c0 - f0 as isize);
413        sa[m as usize] = i + 1;
414        m -= (f1 & !f0) as SaSint;
415
416        c0 = t[(i - 1) as usize] as isize;
417        f0 = usize::from(c0 > c1 - f1 as isize);
418        sa[m as usize] = i;
419        m -= (f0 & !f1) as SaSint;
420
421        c1 = t[(i - 2) as usize] as isize;
422        f1 = usize::from(c1 > c0 - f0 as isize);
423        sa[m as usize] = i - 1;
424        m -= (f1 & !f0) as SaSint;
425
426        c0 = t[(i - 3) as usize] as isize;
427        f0 = usize::from(c0 > c1 - f1 as isize);
428        sa[m as usize] = i - 2;
429        m -= (f0 & !f1) as SaSint;
430
431        i -= 4;
432    }
433
434    while i >= 0 {
435        c1 = c0;
436        c0 = t[i as usize] as isize;
437        f1 = f0;
438        f0 = usize::from(c0 > c1 - f1 as isize);
439        sa[m as usize] = i + 1;
440        m -= (f0 & !f1) as SaSint;
441        i -= 1;
442    }
443
444    n - 1 - m
445}
446
447#[allow(dead_code)]
448fn gather_compacted_lms_suffixes_32s(t: &[SaSint], sa: &mut [SaSint], n: SaSint) -> SaSint {
449    let mut i = n - 2;
450    let mut m = n - 1;
451    let mut f0 = 1usize;
452    let mut f1: usize;
453    let mut c0 = t[(n - 1) as usize] as isize;
454    let mut c1: isize;
455
456    while i >= 3 {
457        c1 = t[i as usize] as isize;
458        f1 = usize::from(c1 > c0 - f0 as isize);
459        sa[m as usize] = i + 1;
460        m -= (f1 & !f0 & usize::from(c0 >= 0)) as SaSint;
461
462        c0 = t[(i - 1) as usize] as isize;
463        f0 = usize::from(c0 > c1 - f1 as isize);
464        sa[m as usize] = i;
465        m -= (f0 & !f1 & usize::from(c1 >= 0)) as SaSint;
466
467        c1 = t[(i - 2) as usize] as isize;
468        f1 = usize::from(c1 > c0 - f0 as isize);
469        sa[m as usize] = i - 1;
470        m -= (f1 & !f0 & usize::from(c0 >= 0)) as SaSint;
471
472        c0 = t[(i - 3) as usize] as isize;
473        f0 = usize::from(c0 > c1 - f1 as isize);
474        sa[m as usize] = i - 2;
475        m -= (f0 & !f1 & usize::from(c1 >= 0)) as SaSint;
476
477        i -= 4;
478    }
479
480    while i >= 0 {
481        c1 = c0;
482        c0 = t[i as usize] as isize;
483        f1 = f0;
484        f0 = usize::from(c0 > c1 - f1 as isize);
485        sa[m as usize] = i + 1;
486        m -= (f0 & !f1 & usize::from(c1 >= 0)) as SaSint;
487        i -= 1;
488    }
489
490    n - 1 - m
491}
492
493#[allow(dead_code)]
494fn count_lms_suffixes_32s_4k(t: &[SaSint], n: SaSint, k: SaSint, buckets: &mut [SaSint]) {
495    buckets[..4 * k as usize].fill(0);
496    let mut i = n - 2;
497    let mut f0 = 1usize;
498    let mut f1: usize;
499    let mut c0 = t[(n - 1) as usize] as isize;
500    let mut c1: isize;
501
502    while i >= 3 {
503        c1 = t[i as usize] as isize;
504        f1 = usize::from(c1 > c0 - f0 as isize);
505        buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
506
507        c0 = t[(i - 1) as usize] as isize;
508        f0 = usize::from(c0 > c1 - f1 as isize);
509        buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
510
511        c1 = t[(i - 2) as usize] as isize;
512        f1 = usize::from(c1 > c0 - f0 as isize);
513        buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
514
515        c0 = t[(i - 3) as usize] as isize;
516        f0 = usize::from(c0 > c1 - f1 as isize);
517        buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
518
519        i -= 4;
520    }
521
522    while i >= 0 {
523        c1 = c0;
524        c0 = t[i as usize] as isize;
525        f1 = f0;
526        f0 = usize::from(c0 > c1 - f1 as isize);
527        buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
528        i -= 1;
529    }
530
531    buckets[buckets_index4(c0 as usize, f0 + f0)] += 1;
532}
533
534#[allow(dead_code)]
535fn count_lms_suffixes_32s_2k(t: &[SaSint], n: SaSint, k: SaSint, buckets: &mut [SaSint]) {
536    buckets[..2 * k as usize].fill(0);
537    let mut i = n - 2;
538    let mut f0 = 1usize;
539    let mut f1: usize;
540    let mut c0 = t[(n - 1) as usize] as isize;
541    let mut c1: isize;
542
543    while i >= 3 {
544        c1 = t[i as usize] as isize;
545        f1 = usize::from(c1 > c0 - f0 as isize);
546        buckets[buckets_index2(c0 as usize, f1 & !f0)] += 1;
547
548        c0 = t[(i - 1) as usize] as isize;
549        f0 = usize::from(c0 > c1 - f1 as isize);
550        buckets[buckets_index2(c1 as usize, f0 & !f1)] += 1;
551
552        c1 = t[(i - 2) as usize] as isize;
553        f1 = usize::from(c1 > c0 - f0 as isize);
554        buckets[buckets_index2(c0 as usize, f1 & !f0)] += 1;
555
556        c0 = t[(i - 3) as usize] as isize;
557        f0 = usize::from(c0 > c1 - f1 as isize);
558        buckets[buckets_index2(c1 as usize, f0 & !f1)] += 1;
559
560        i -= 4;
561    }
562
563    while i >= 0 {
564        c1 = c0;
565        c0 = t[i as usize] as isize;
566        f1 = f0;
567        f0 = usize::from(c0 > c1 - f1 as isize);
568        buckets[buckets_index2(c1 as usize, f0 & !f1)] += 1;
569        i -= 1;
570    }
571
572    buckets[buckets_index2(c0 as usize, 0)] += 1;
573}
574
575#[allow(dead_code)]
576fn count_compacted_lms_suffixes_32s_2k(t: &[SaSint], n: SaSint, k: SaSint, buckets: &mut [SaSint]) {
577    buckets[..2 * k as usize].fill(0);
578    let mut i = n - 2;
579    let mut f0 = 1usize;
580    let mut f1: usize;
581    let mut c0 = t[(n - 1) as usize] as isize;
582    let mut c1: isize;
583
584    while i >= 3 {
585        c1 = t[i as usize] as isize;
586        f1 = usize::from(c1 > c0 - f0 as isize);
587        buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
588
589        c0 = t[(i - 1) as usize] as isize;
590        f0 = usize::from(c0 > c1 - f1 as isize);
591        buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
592
593        c1 = t[(i - 2) as usize] as isize;
594        f1 = usize::from(c1 > c0 - f0 as isize);
595        buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
596
597        c0 = t[(i - 3) as usize] as isize;
598        f0 = usize::from(c0 > c1 - f1 as isize);
599        buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
600
601        i -= 4;
602    }
603
604    while i >= 0 {
605        c1 = c0;
606        c0 = t[i as usize] as isize;
607        f1 = f0;
608        f0 = usize::from(c0 > c1 - f1 as isize);
609        buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
610        i -= 1;
611    }
612
613    buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, 0)] += 1;
614}
615
616#[allow(dead_code)]
617fn get_bucket_stride(free_space: SaSint, bucket_size: SaSint, num_buckets: SaSint) -> SaSint {
618    let bucket_size_1024 = (bucket_size + 1023) & !1023;
619    if free_space / (num_buckets - 1) >= bucket_size_1024 {
620        return bucket_size_1024;
621    }
622    let bucket_size_16 = (bucket_size + 15) & !15;
623    if free_space / (num_buckets - 1) >= bucket_size_16 {
624        return bucket_size_16;
625    }
626    bucket_size
627}
628
629#[allow(dead_code)]
630fn count_and_gather_lms_suffixes_32s_4k(
631    t: &[SaSint],
632    sa: &mut [SaSint],
633    n: SaSint,
634    k: SaSint,
635    buckets: &mut [SaSint],
636    omp_block_start: isize,
637    omp_block_size: isize,
638) -> SaSint {
639    buckets[..4 * k as usize].fill(0);
640    let mut m = omp_block_start + omp_block_size - 1;
641
642    if omp_block_size > 0 {
643        let mut j = m + 1;
644        let mut c0 = t[m as usize] as isize;
645        let mut c1 = -1isize;
646        while j < n as isize {
647            c1 = t[j as usize] as isize;
648            if c1 != c0 {
649                break;
650            }
651            j += 1;
652        }
653
654        let mut f0 = usize::from(c0 >= c1);
655        let mut f1: usize;
656        let mut i = m - 1;
657        j = omp_block_start + 64 + 3;
658        while i >= j {
659            c1 = t[i as usize] as isize;
660            f1 = usize::from(c1 > c0 - f0 as isize);
661            sa[m as usize] = (i + 1) as SaSint;
662            m -= (f1 & !f0) as isize;
663            buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
664
665            c0 = t[(i - 1) as usize] as isize;
666            f0 = usize::from(c0 > c1 - f1 as isize);
667            sa[m as usize] = i as SaSint;
668            m -= (f0 & !f1) as isize;
669            buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
670
671            c1 = t[(i - 2) as usize] as isize;
672            f1 = usize::from(c1 > c0 - f0 as isize);
673            sa[m as usize] = (i - 1) as SaSint;
674            m -= (f1 & !f0) as isize;
675            buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
676
677            c0 = t[(i - 3) as usize] as isize;
678            f0 = usize::from(c0 > c1 - f1 as isize);
679            sa[m as usize] = (i - 2) as SaSint;
680            m -= (f0 & !f1) as isize;
681            buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
682
683            i -= 4;
684        }
685
686        j -= 64 + 3;
687        while i >= j {
688            c1 = c0;
689            c0 = t[i as usize] as isize;
690            f1 = f0;
691            f0 = usize::from(c0 > c1 - f1 as isize);
692            sa[m as usize] = (i + 1) as SaSint;
693            m -= (f0 & !f1) as isize;
694            buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
695            i -= 1;
696        }
697
698        c1 = if i >= 0 { t[i as usize] as isize } else { -1 };
699        f1 = usize::from(c1 > c0 - f0 as isize);
700        sa[m as usize] = (i + 1) as SaSint;
701        m -= (f1 & !f0) as isize;
702        buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
703    }
704
705    (omp_block_start + omp_block_size - 1 - m) as SaSint
706}
707
708#[allow(dead_code)]
709fn count_and_gather_lms_suffixes_32s_2k(
710    t: &[SaSint],
711    sa: &mut [SaSint],
712    n: SaSint,
713    k: SaSint,
714    buckets: &mut [SaSint],
715    omp_block_start: isize,
716    omp_block_size: isize,
717) -> SaSint {
718    buckets[..2 * k as usize].fill(0);
719    let mut m = omp_block_start + omp_block_size - 1;
720
721    if omp_block_size > 0 {
722        let mut j = m + 1;
723        let mut c0 = t[m as usize] as isize;
724        let mut c1 = -1isize;
725        while j < n as isize {
726            c1 = t[j as usize] as isize;
727            if c1 != c0 {
728                break;
729            }
730            j += 1;
731        }
732
733        let mut f0 = usize::from(c0 >= c1);
734        let mut f1: usize;
735        let mut i = m - 1;
736        j = omp_block_start + 64 + 3;
737        while i >= j {
738            c1 = t[i as usize] as isize;
739            f1 = usize::from(c1 > c0 - f0 as isize);
740            sa[m as usize] = (i + 1) as SaSint;
741            m -= (f1 & !f0) as isize;
742            buckets[buckets_index2(c0 as usize, f1 & !f0)] += 1;
743
744            c0 = t[(i - 1) as usize] as isize;
745            f0 = usize::from(c0 > c1 - f1 as isize);
746            sa[m as usize] = i as SaSint;
747            m -= (f0 & !f1) as isize;
748            buckets[buckets_index2(c1 as usize, f0 & !f1)] += 1;
749
750            c1 = t[(i - 2) as usize] as isize;
751            f1 = usize::from(c1 > c0 - f0 as isize);
752            sa[m as usize] = (i - 1) as SaSint;
753            m -= (f1 & !f0) as isize;
754            buckets[buckets_index2(c0 as usize, f1 & !f0)] += 1;
755
756            c0 = t[(i - 3) as usize] as isize;
757            f0 = usize::from(c0 > c1 - f1 as isize);
758            sa[m as usize] = (i - 2) as SaSint;
759            m -= (f0 & !f1) as isize;
760            buckets[buckets_index2(c1 as usize, f0 & !f1)] += 1;
761
762            i -= 4;
763        }
764
765        j -= 64 + 3;
766        while i >= j {
767            c1 = c0;
768            c0 = t[i as usize] as isize;
769            f1 = f0;
770            f0 = usize::from(c0 > c1 - f1 as isize);
771            sa[m as usize] = (i + 1) as SaSint;
772            m -= (f0 & !f1) as isize;
773            buckets[buckets_index2(c1 as usize, f0 & !f1)] += 1;
774            i -= 1;
775        }
776
777        c1 = if i >= 0 { t[i as usize] as isize } else { -1 };
778        f1 = usize::from(c1 > c0 - f0 as isize);
779        sa[m as usize] = (i + 1) as SaSint;
780        m -= (f1 & !f0) as isize;
781        buckets[buckets_index2(c0 as usize, f1 & !f0)] += 1;
782    }
783
784    (omp_block_start + omp_block_size - 1 - m) as SaSint
785}
786
787#[allow(dead_code)]
788fn count_and_gather_compacted_lms_suffixes_32s_2k(
789    t: &[SaSint],
790    sa: &mut [SaSint],
791    n: SaSint,
792    k: SaSint,
793    buckets: &mut [SaSint],
794    omp_block_start: isize,
795    omp_block_size: isize,
796) -> SaSint {
797    buckets[..2 * k as usize].fill(0);
798    let mut m = omp_block_start + omp_block_size - 1;
799
800    if omp_block_size > 0 {
801        let mut j = m + 1;
802        let mut c0 = t[m as usize] as isize;
803        let mut c1 = -1isize;
804        while j < n as isize {
805            c1 = t[j as usize] as isize;
806            if c1 != c0 {
807                break;
808            }
809            j += 1;
810        }
811
812        let mut f0 = usize::from(c0 >= c1);
813        let mut f1: usize;
814        let mut i = m - 1;
815        j = omp_block_start + 64 + 3;
816        while i >= j {
817            c1 = t[i as usize] as isize;
818            f1 = usize::from(c1 > c0 - f0 as isize);
819            sa[m as usize] = (i + 1) as SaSint;
820            m -= (f1 & !f0 & usize::from(c0 >= 0)) as isize;
821            buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
822
823            c0 = t[(i - 1) as usize] as isize;
824            f0 = usize::from(c0 > c1 - f1 as isize);
825            sa[m as usize] = i as SaSint;
826            m -= (f0 & !f1 & usize::from(c1 >= 0)) as isize;
827            buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
828
829            c1 = t[(i - 2) as usize] as isize;
830            f1 = usize::from(c1 > c0 - f0 as isize);
831            sa[m as usize] = (i - 1) as SaSint;
832            m -= (f1 & !f0 & usize::from(c0 >= 0)) as isize;
833            buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
834
835            c0 = t[(i - 3) as usize] as isize;
836            f0 = usize::from(c0 > c1 - f1 as isize);
837            sa[m as usize] = (i - 2) as SaSint;
838            m -= (f0 & !f1 & usize::from(c1 >= 0)) as isize;
839            buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
840
841            i -= 4;
842        }
843
844        j -= 64 + 3;
845        while i >= j {
846            c1 = c0;
847            c0 = t[i as usize] as isize;
848            f1 = f0;
849            f0 = usize::from(c0 > c1 - f1 as isize);
850            sa[m as usize] = (i + 1) as SaSint;
851            m -= (f0 & !f1 & usize::from(c1 >= 0)) as isize;
852            buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
853            i -= 1;
854        }
855
856        c1 = if i >= 0 { t[i as usize] as isize } else { -1 };
857        f1 = usize::from(c1 > c0 - f0 as isize);
858        sa[m as usize] = (i + 1) as SaSint;
859        m -= (f1 & !f0 & usize::from(c0 >= 0)) as isize;
860        buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
861    }
862
863    (omp_block_start + omp_block_size - 1 - m) as SaSint
864}
865
866#[allow(dead_code)]
867fn count_and_gather_lms_suffixes_32s_4k_fs_omp(
868    t: &[SaSint],
869    sa: &mut [SaSint],
870    n: SaSint,
871    k: SaSint,
872    buckets: &mut [SaSint],
873    local_buckets: SaSint,
874    threads: SaSint,
875    thread_state: &mut [ThreadState],
876) -> SaSint {
877    if threads == 1 || n < 65_536 {
878        return count_and_gather_lms_suffixes_32s_4k(t, sa, n, k, buckets, 0, n as isize);
879    }
880
881    let thread_count = threads as usize;
882    let n_usize = n as usize;
883    let bucket_size = 4 * k as usize;
884    let block_stride = (n / threads) & !15;
885    let free_space = if local_buckets != 0 {
886        LIBSAIS_LOCAL_BUFFER_SIZE as SaSint
887    } else {
888        buckets.len() as SaSint
889    };
890    let bucket_stride = get_bucket_stride(free_space, 4 * k, threads) as usize;
891    let workspace_len = bucket_size + bucket_stride * thread_count.saturating_sub(1);
892    let mut workspace = vec![0; workspace_len];
893
894    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
895        let block_start = thread as SaSint * block_stride;
896        let block_size = if thread + 1 < thread_count {
897            block_stride
898        } else {
899            n - block_start
900        };
901        let workspace_end = workspace_len - thread * bucket_stride;
902        let workspace_start = workspace_end - bucket_size;
903        state.count = count_and_gather_lms_suffixes_32s_4k(
904            t,
905            sa,
906            n,
907            k,
908            &mut workspace[workspace_start..workspace_end],
909            block_start as isize,
910            block_size as isize,
911        );
912        state.position = block_start + block_size;
913    }
914
915    let mut m = 0usize;
916    for thread in (0..thread_count).rev() {
917        let count =
918            usize::try_from(thread_state[thread].count).expect("count must be non-negative");
919        m += count;
920        if thread + 1 != thread_count && count > 0 {
921            let src_end = usize::try_from(thread_state[thread].position)
922                .expect("position must be non-negative");
923            let src_start = src_end - count;
924            let dst_start = n_usize - m;
925            sa.copy_within(src_start..src_end, dst_start);
926        }
927    }
928
929    let accumulation_threads = thread_count - 1;
930    let block_stride = (bucket_size / accumulation_threads) & !15usize;
931    for thread in 0..accumulation_threads {
932        let block_start = thread * block_stride;
933        let block_size = if thread + 1 < accumulation_threads {
934            block_stride
935        } else {
936            bucket_size - block_start
937        };
938        accumulate_counts_s32(
939            &mut workspace,
940            block_start,
941            block_size,
942            bucket_stride,
943            accumulation_threads + 1,
944        );
945    }
946
947    buckets[..bucket_size].copy_from_slice(&workspace[..bucket_size]);
948    m as SaSint
949}
950
951#[allow(dead_code)]
952fn count_and_gather_lms_suffixes_32s_2k_fs_omp(
953    t: &[SaSint],
954    sa: &mut [SaSint],
955    n: SaSint,
956    k: SaSint,
957    buckets: &mut [SaSint],
958    local_buckets: SaSint,
959    threads: SaSint,
960    thread_state: &mut [ThreadState],
961) -> SaSint {
962    if threads == 1 || n < 65_536 {
963        return count_and_gather_lms_suffixes_32s_2k(t, sa, n, k, buckets, 0, n as isize);
964    }
965
966    let thread_count = threads as usize;
967    let n_usize = n as usize;
968    let bucket_size = 2 * k as usize;
969    let block_stride = (n / threads) & !15;
970    let free_space = if local_buckets != 0 {
971        LIBSAIS_LOCAL_BUFFER_SIZE as SaSint
972    } else {
973        buckets.len() as SaSint
974    };
975    let bucket_stride = get_bucket_stride(free_space, 2 * k, threads) as usize;
976    let workspace_len = bucket_size + bucket_stride * thread_count.saturating_sub(1);
977    let mut workspace = vec![0; workspace_len];
978
979    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
980        let block_start = thread as SaSint * block_stride;
981        let block_size = if thread + 1 < thread_count {
982            block_stride
983        } else {
984            n - block_start
985        };
986        let workspace_end = workspace_len - thread * bucket_stride;
987        let workspace_start = workspace_end - bucket_size;
988        state.count = count_and_gather_lms_suffixes_32s_2k(
989            t,
990            sa,
991            n,
992            k,
993            &mut workspace[workspace_start..workspace_end],
994            block_start as isize,
995            block_size as isize,
996        );
997        state.position = block_start + block_size;
998    }
999
1000    let mut m = 0usize;
1001    for thread in (0..thread_count).rev() {
1002        let count =
1003            usize::try_from(thread_state[thread].count).expect("count must be non-negative");
1004        m += count;
1005        if thread + 1 != thread_count && count > 0 {
1006            let src_end = usize::try_from(thread_state[thread].position)
1007                .expect("position must be non-negative");
1008            let src_start = src_end - count;
1009            let dst_start = n_usize - m;
1010            sa.copy_within(src_start..src_end, dst_start);
1011        }
1012    }
1013
1014    let accumulation_threads = thread_count - 1;
1015    let block_stride = (bucket_size / accumulation_threads) & !15usize;
1016    for thread in 0..accumulation_threads {
1017        let block_start = thread * block_stride;
1018        let block_size = if thread + 1 < accumulation_threads {
1019            block_stride
1020        } else {
1021            bucket_size - block_start
1022        };
1023        accumulate_counts_s32(
1024            &mut workspace,
1025            block_start,
1026            block_size,
1027            bucket_stride,
1028            accumulation_threads + 1,
1029        );
1030    }
1031
1032    buckets[..bucket_size].copy_from_slice(&workspace[..bucket_size]);
1033    m as SaSint
1034}
1035
1036#[allow(dead_code)]
1037fn count_and_gather_compacted_lms_suffixes_32s_2k_fs_omp(
1038    t: &[SaSint],
1039    sa: &mut [SaSint],
1040    n: SaSint,
1041    k: SaSint,
1042    buckets: &mut [SaSint],
1043    _local_buckets: SaSint,
1044    threads: SaSint,
1045    thread_state: &mut [ThreadState],
1046) {
1047    if threads == 1 || n < 65_536 {
1048        count_and_gather_compacted_lms_suffixes_32s_2k(t, sa, n, k, buckets, 0, n as isize);
1049        return;
1050    }
1051
1052    let thread_count = threads as usize;
1053    let n_usize = n as usize;
1054    let bucket_size = 2 * k as usize;
1055    let block_stride = (n / threads) & !15;
1056    let mut workspaces = vec![vec![0; bucket_size]; thread_count];
1057    let mut gathered_runs = vec![Vec::<SaSint>::new(); thread_count];
1058
1059    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
1060        let block_start = thread as SaSint * block_stride;
1061        let block_size = if thread + 1 < thread_count {
1062            block_stride
1063        } else {
1064            n - block_start
1065        };
1066        let mut temp_sa = vec![0; n_usize + block_size as usize];
1067        state.count = count_and_gather_compacted_lms_suffixes_32s_2k(
1068            t,
1069            &mut temp_sa,
1070            n,
1071            k,
1072            &mut workspaces[thread],
1073            block_start as isize,
1074            block_size as isize,
1075        );
1076        state.position = block_start + block_size;
1077        let count = usize::try_from(state.count).expect("count must be non-negative");
1078        let src_end =
1079            n_usize + usize::try_from(state.position).expect("position must be non-negative");
1080        let src_start = src_end - count;
1081        gathered_runs[thread].extend_from_slice(&temp_sa[src_start..src_end]);
1082    }
1083
1084    let mut suffixes_before = 0usize;
1085    for thread in (0..thread_count).rev() {
1086        let count =
1087            usize::try_from(thread_state[thread].count).expect("count must be non-negative");
1088        suffixes_before += count;
1089        if count > 0 {
1090            let dst_start = n_usize - suffixes_before;
1091            let dst_end = dst_start + count;
1092            sa[dst_start..dst_end].copy_from_slice(&gathered_runs[thread]);
1093        }
1094    }
1095
1096    buckets.fill(0);
1097    for workspace in &workspaces {
1098        for (dst, src) in buckets.iter_mut().zip(workspace.iter()) {
1099            *dst += *src;
1100        }
1101    }
1102}
1103
1104#[allow(dead_code)]
1105fn count_and_gather_lms_suffixes_32s_4k_nofs_omp(
1106    t: &[SaSint],
1107    sa: &mut [SaSint],
1108    n: SaSint,
1109    k: SaSint,
1110    buckets: &mut [SaSint],
1111    threads: SaSint,
1112) -> SaSint {
1113    if threads > 1 && n >= 65_536 {
1114        count_lms_suffixes_32s_4k(t, n, k, buckets);
1115        gather_lms_suffixes_32s(t, sa, n)
1116    } else {
1117        count_and_gather_lms_suffixes_32s_4k(t, sa, n, k, buckets, 0, n as isize)
1118    }
1119}
1120
1121#[allow(dead_code)]
1122fn count_and_gather_lms_suffixes_32s_2k_nofs_omp(
1123    t: &[SaSint],
1124    sa: &mut [SaSint],
1125    n: SaSint,
1126    k: SaSint,
1127    buckets: &mut [SaSint],
1128    threads: SaSint,
1129) -> SaSint {
1130    if threads > 1 && n >= 65_536 {
1131        count_lms_suffixes_32s_2k(t, n, k, buckets);
1132        gather_lms_suffixes_32s(t, sa, n)
1133    } else {
1134        count_and_gather_lms_suffixes_32s_2k(t, sa, n, k, buckets, 0, n as isize)
1135    }
1136}
1137
1138#[allow(dead_code)]
1139fn count_and_gather_compacted_lms_suffixes_32s_2k_nofs_omp(
1140    t: &[SaSint],
1141    sa: &mut [SaSint],
1142    n: SaSint,
1143    k: SaSint,
1144    buckets: &mut [SaSint],
1145    threads: SaSint,
1146) -> SaSint {
1147    if threads > 1 && n >= 65_536 {
1148        count_compacted_lms_suffixes_32s_2k(t, n, k, buckets);
1149        gather_compacted_lms_suffixes_32s(t, sa, n)
1150    } else {
1151        count_and_gather_compacted_lms_suffixes_32s_2k(t, sa, n, k, buckets, 0, n as isize)
1152    }
1153}
1154
1155#[allow(dead_code)]
1156fn count_and_gather_lms_suffixes_32s_4k_omp(
1157    t: &[SaSint],
1158    sa: &mut [SaSint],
1159    n: SaSint,
1160    k: SaSint,
1161    buckets: &mut [SaSint],
1162    local_buckets: SaSint,
1163    threads: SaSint,
1164    thread_state: &mut [ThreadState],
1165) -> SaSint {
1166    let free_space = if local_buckets != 0 {
1167        LIBSAIS_LOCAL_BUFFER_SIZE as SaSint
1168    } else {
1169        buckets.len() as SaSint
1170    };
1171    let mut max_threads = (free_space / (((4 * k) + 15) & !15)).min(threads);
1172
1173    if max_threads > 1 && n >= 65_536 && n / k >= 2 {
1174        let thread_cap = n / (16 * k);
1175        if max_threads > thread_cap {
1176            max_threads = thread_cap;
1177        }
1178        count_and_gather_lms_suffixes_32s_4k_fs_omp(
1179            t,
1180            sa,
1181            n,
1182            k,
1183            buckets,
1184            local_buckets,
1185            max_threads.max(2),
1186            thread_state,
1187        )
1188    } else if threads > 1 && n >= 65_536 {
1189        count_lms_suffixes_32s_4k(t, n, k, buckets);
1190        gather_lms_suffixes_32s(t, sa, n)
1191    } else {
1192        count_and_gather_lms_suffixes_32s_4k_nofs_omp(t, sa, n, k, buckets, threads)
1193    }
1194}
1195
1196#[allow(dead_code)]
1197fn count_and_gather_lms_suffixes_32s_2k_omp(
1198    t: &[SaSint],
1199    sa: &mut [SaSint],
1200    n: SaSint,
1201    k: SaSint,
1202    buckets: &mut [SaSint],
1203    local_buckets: SaSint,
1204    threads: SaSint,
1205    thread_state: &mut [ThreadState],
1206) -> SaSint {
1207    let free_space = if local_buckets != 0 {
1208        LIBSAIS_LOCAL_BUFFER_SIZE as SaSint
1209    } else {
1210        buckets.len() as SaSint
1211    };
1212    let mut max_threads = (free_space / (((2 * k) + 15) & !15)).min(threads);
1213
1214    if max_threads > 1 && n >= 65_536 && n / k >= 2 {
1215        let thread_cap = n / (8 * k);
1216        if max_threads > thread_cap {
1217            max_threads = thread_cap;
1218        }
1219        count_and_gather_lms_suffixes_32s_2k_fs_omp(
1220            t,
1221            sa,
1222            n,
1223            k,
1224            buckets,
1225            local_buckets,
1226            max_threads.max(2),
1227            thread_state,
1228        )
1229    } else if threads > 1 && n >= 65_536 {
1230        count_lms_suffixes_32s_2k(t, n, k, buckets);
1231        gather_lms_suffixes_32s(t, sa, n)
1232    } else {
1233        count_and_gather_lms_suffixes_32s_2k_nofs_omp(t, sa, n, k, buckets, threads)
1234    }
1235}
1236
1237#[allow(dead_code)]
1238fn count_suffixes_32s(t: &[SaSint], n: SaSint, k: SaSint, buckets: &mut [SaSint]) {
1239    buckets[..k as usize].fill(0);
1240
1241    let mut i = 0usize;
1242    let mut j = (n as usize).saturating_sub(7);
1243    while i < j {
1244        buckets[t[i] as usize] += 1;
1245        buckets[t[i + 1] as usize] += 1;
1246        buckets[t[i + 2] as usize] += 1;
1247        buckets[t[i + 3] as usize] += 1;
1248        buckets[t[i + 4] as usize] += 1;
1249        buckets[t[i + 5] as usize] += 1;
1250        buckets[t[i + 6] as usize] += 1;
1251        buckets[t[i + 7] as usize] += 1;
1252        i += 8;
1253    }
1254
1255    j += 7;
1256    while i < j {
1257        buckets[t[i] as usize] += 1;
1258        i += 1;
1259    }
1260}
1261
1262#[allow(dead_code)]
1263fn initialize_buckets_start_and_end_32s_6k(k: SaSint, buckets: &mut [SaSint]) {
1264    let k = k as usize;
1265    let mut sum = 0;
1266    for j in 0..k {
1267        let i = buckets_index4(j, 0);
1268        buckets[4 * k + j] = sum;
1269        sum += buckets[i] + buckets[i + 1] + buckets[i + 2] + buckets[i + 3];
1270        buckets[5 * k + j] = sum;
1271    }
1272}
1273
1274#[allow(dead_code)]
1275fn initialize_buckets_start_and_end_32s_4k(k: SaSint, buckets: &mut [SaSint]) {
1276    let k = k as usize;
1277    let mut sum = 0;
1278    for j in 0..k {
1279        let i = buckets_index2(j, 0);
1280        buckets[2 * k + j] = sum;
1281        sum += buckets[i] + buckets[i + 1];
1282        buckets[3 * k + j] = sum;
1283    }
1284}
1285
1286#[allow(dead_code)]
1287fn initialize_buckets_end_32s_2k(k: SaSint, buckets: &mut [SaSint]) {
1288    let mut sum0 = 0;
1289    for j in 0..k as usize {
1290        let i = buckets_index2(j, 0);
1291        sum0 += buckets[i] + buckets[i + 1];
1292        buckets[i] = sum0;
1293    }
1294}
1295
1296#[allow(dead_code)]
1297fn initialize_buckets_start_and_end_32s_2k(k: SaSint, buckets: &mut [SaSint]) {
1298    let k = k as usize;
1299    for j in 0..k {
1300        let i = buckets_index2(j, 0);
1301        buckets[j] = buckets[i];
1302    }
1303    buckets[k] = 0;
1304    buckets.copy_within(0..k - 1, k + 1);
1305}
1306
1307#[allow(dead_code)]
1308fn initialize_buckets_start_32s_1k(k: SaSint, buckets: &mut [SaSint]) {
1309    let mut sum = 0;
1310    for bucket in buckets.iter_mut().take(k as usize) {
1311        let tmp = *bucket;
1312        *bucket = sum;
1313        sum += tmp;
1314    }
1315}
1316
1317#[allow(dead_code)]
1318fn initialize_buckets_end_32s_1k(k: SaSint, buckets: &mut [SaSint]) {
1319    let mut sum = 0;
1320    for bucket in buckets.iter_mut().take(k as usize) {
1321        sum += *bucket;
1322        *bucket = sum;
1323    }
1324}
1325
1326#[allow(dead_code)]
1327fn initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(
1328    t: &[SaSint],
1329    k: SaSint,
1330    buckets: &mut [SaSint],
1331    first_lms_suffix: SaSint,
1332) {
1333    buckets[buckets_index2(t[first_lms_suffix as usize] as usize, 0)] += 1;
1334    buckets[buckets_index2(t[first_lms_suffix as usize] as usize, 1)] -= 1;
1335
1336    let mut sum0 = 0;
1337    let mut sum1 = 0;
1338    for j in 0..k as usize {
1339        let i = buckets_index2(j, 0);
1340        sum0 += buckets[i] + buckets[i + 1];
1341        sum1 += buckets[i + 1];
1342        buckets[i] = sum0;
1343        buckets[i + 1] = sum1;
1344    }
1345}
1346
1347#[allow(dead_code)]
1348fn initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(
1349    t: &[SaSint],
1350    k: SaSint,
1351    buckets: &mut [SaSint],
1352    mut first_lms_suffix: SaSint,
1353) -> SaSint {
1354    let mut f0 = 0usize;
1355    let mut c0 = t[first_lms_suffix as usize] as isize;
1356
1357    loop {
1358        first_lms_suffix -= 1;
1359        if first_lms_suffix < 0 {
1360            break;
1361        }
1362        let c1 = c0;
1363        c0 = t[first_lms_suffix as usize] as isize;
1364        let f1 = f0;
1365        f0 = usize::from(c0 > c1 - f1 as isize);
1366        buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] -= 1;
1367    }
1368    buckets[buckets_index4(c0 as usize, f0 + f0)] -= 1;
1369
1370    let mut sum = 0;
1371    for j in 0..k as usize {
1372        let i = buckets_index4(j, 0);
1373        sum += buckets[i + 1] + buckets[i + 3];
1374        buckets[4 * k as usize + j] = sum;
1375    }
1376    sum
1377}
1378
1379#[allow(dead_code)]
1380fn initialize_buckets_for_partial_sorting_32s_6k(
1381    t: &[SaSint],
1382    k: SaSint,
1383    buckets: &mut [SaSint],
1384    first_lms_suffix: SaSint,
1385    left_suffixes_count: SaSint,
1386) {
1387    let k = k as usize;
1388    let temp_offset = 4 * k;
1389    let first_symbol = t[first_lms_suffix as usize] as usize;
1390    let mut sum0 = left_suffixes_count + 1;
1391    let mut sum1 = 0;
1392    let mut sum2 = 0;
1393
1394    for j in 0..first_symbol {
1395        let i = buckets_index4(j, 0);
1396        let tj = buckets_index2(j, 0);
1397        let ss = buckets[i];
1398        let ls = buckets[i + 1];
1399        let sl = buckets[i + 2];
1400        let ll = buckets[i + 3];
1401
1402        buckets[i] = sum0;
1403        buckets[i + 1] = sum2;
1404        buckets[i + 2] = 0;
1405        buckets[i + 3] = 0;
1406
1407        sum0 += ss + sl;
1408        sum1 += ls;
1409        sum2 += ls + ll;
1410
1411        buckets[temp_offset + tj] = sum0;
1412        buckets[temp_offset + tj + 1] = sum1;
1413    }
1414
1415    sum1 += 1;
1416    for j in first_symbol..k {
1417        let i = buckets_index4(j, 0);
1418        let tj = buckets_index2(j, 0);
1419        let ss = buckets[i];
1420        let ls = buckets[i + 1];
1421        let sl = buckets[i + 2];
1422        let ll = buckets[i + 3];
1423
1424        buckets[i] = sum0;
1425        buckets[i + 1] = sum2;
1426        buckets[i + 2] = 0;
1427        buckets[i + 3] = 0;
1428
1429        sum0 += ss + sl;
1430        sum1 += ls;
1431        sum2 += ls + ll;
1432
1433        buckets[temp_offset + tj] = sum0;
1434        buckets[temp_offset + tj + 1] = sum1;
1435    }
1436}
1437
1438#[allow(dead_code)]
1439fn initialize_buckets_for_radix_and_partial_sorting_32s_4k(
1440    t: &[SaSint],
1441    k: SaSint,
1442    buckets: &mut [SaSint],
1443    first_lms_suffix: SaSint,
1444) {
1445    let k = k as usize;
1446    buckets[buckets_index2(t[first_lms_suffix as usize] as usize, 0)] += 1;
1447    buckets[buckets_index2(t[first_lms_suffix as usize] as usize, 1)] -= 1;
1448
1449    let mut sum0 = 0;
1450    let mut sum1 = 0;
1451    for j in 0..k {
1452        let i = buckets_index2(j, 0);
1453        buckets[2 * k + j] = sum1;
1454        sum0 += buckets[i + 1];
1455        sum1 += buckets[i] + buckets[i + 1];
1456        buckets[i + 1] = sum0;
1457        buckets[3 * k + j] = sum1;
1458    }
1459}
1460
1461#[allow(dead_code)]
1462fn count_and_gather_compacted_lms_suffixes_32s_2k_omp(
1463    t: &[SaSint],
1464    sa: &mut [SaSint],
1465    n: SaSint,
1466    k: SaSint,
1467    buckets: &mut [SaSint],
1468    local_buckets: SaSint,
1469    threads: SaSint,
1470    thread_state: &mut [ThreadState],
1471) {
1472    let free_space = if local_buckets != 0 {
1473        LIBSAIS_LOCAL_BUFFER_SIZE as SaSint
1474    } else {
1475        buckets.len() as SaSint
1476    };
1477    let mut max_threads = (free_space / (((2 * k) + 15) & !15)).min(threads);
1478
1479    if local_buckets == 0 && max_threads > 1 && n >= 65_536 && n / k >= 2 {
1480        let thread_cap = n / (8 * k);
1481        if max_threads > thread_cap {
1482            max_threads = thread_cap;
1483        }
1484        count_and_gather_compacted_lms_suffixes_32s_2k_fs_omp(
1485            t,
1486            sa,
1487            n,
1488            k,
1489            buckets,
1490            local_buckets,
1491            max_threads.max(2),
1492            thread_state,
1493        );
1494    } else {
1495        count_and_gather_compacted_lms_suffixes_32s_2k_nofs_omp(t, sa, n, k, buckets, threads);
1496    }
1497}
1498
1499#[allow(dead_code)]
1500fn gather_lms_suffixes_16u(
1501    t: &[u16],
1502    sa: &mut [SaSint],
1503    n: SaSint,
1504    mut m: SaSint,
1505    omp_block_start: SaSint,
1506    omp_block_size: SaSint,
1507) {
1508    if omp_block_size > 0 {
1509        let n = n as isize;
1510        let mut i: isize;
1511        let mut j = (omp_block_start + omp_block_size) as isize;
1512        let mut c0 = t[(omp_block_start + omp_block_size - 1) as usize] as isize;
1513        let mut c1 = -1isize;
1514
1515        while j < n {
1516            c1 = t[j as usize] as isize;
1517            if c1 != c0 {
1518                break;
1519            }
1520            j += 1;
1521        }
1522
1523        let mut f0 = usize::from(c0 >= c1);
1524        let mut f1: usize;
1525
1526        i = (omp_block_start + omp_block_size - 2) as isize;
1527        j = (omp_block_start + 3) as isize;
1528        while i >= j {
1529            c1 = t[i as usize] as isize;
1530            f1 = usize::from(c1 > c0 - f0 as isize);
1531            sa[m as usize] = (i + 1) as SaSint;
1532            m -= (f1 & (1 - f0)) as SaSint;
1533
1534            c0 = t[(i - 1) as usize] as isize;
1535            f0 = usize::from(c0 > c1 - f1 as isize);
1536            sa[m as usize] = i as SaSint;
1537            m -= (f0 & (1 - f1)) as SaSint;
1538
1539            c1 = t[(i - 2) as usize] as isize;
1540            f1 = usize::from(c1 > c0 - f0 as isize);
1541            sa[m as usize] = (i - 1) as SaSint;
1542            m -= (f1 & (1 - f0)) as SaSint;
1543
1544            c0 = t[(i - 3) as usize] as isize;
1545            f0 = usize::from(c0 > c1 - f1 as isize);
1546            sa[m as usize] = (i - 2) as SaSint;
1547            m -= (f0 & (1 - f1)) as SaSint;
1548
1549            i -= 4;
1550        }
1551
1552        j -= 3;
1553        while i >= j {
1554            c1 = c0;
1555            c0 = t[i as usize] as isize;
1556            f1 = f0;
1557            f0 = usize::from(c0 > c1 - f1 as isize);
1558            sa[m as usize] = (i + 1) as SaSint;
1559            m -= (f0 & (1 - f1)) as SaSint;
1560            i -= 1;
1561        }
1562
1563        sa[m as usize] = (i + 1) as SaSint;
1564    }
1565}
1566
1567#[allow(dead_code)]
1568fn count_and_gather_lms_suffixes_16u(
1569    t: &[u16],
1570    sa: &mut [SaSint],
1571    n: SaSint,
1572    buckets: &mut [SaSint],
1573    omp_block_start: SaSint,
1574    omp_block_size: SaSint,
1575) -> SaSint {
1576    buckets[..4 * ALPHABET_SIZE].fill(0);
1577
1578    let mut m = (omp_block_start + omp_block_size - 1) as isize;
1579
1580    if omp_block_size > 0 {
1581        let n = n as isize;
1582        let mut i: isize;
1583        let mut j = m + 1;
1584        let mut c0 = t[m as usize] as isize;
1585        let mut c1 = -1isize;
1586
1587        while j < n {
1588            c1 = t[j as usize] as isize;
1589            if c1 != c0 {
1590                break;
1591            }
1592            j += 1;
1593        }
1594
1595        let mut f0 = usize::from(c0 >= c1);
1596        let mut f1: usize;
1597
1598        i = m - 1;
1599        j = (omp_block_start + 3) as isize;
1600        while i >= j {
1601            c1 = t[i as usize] as isize;
1602            f1 = usize::from(c1 > c0 - f0 as isize);
1603            sa[m as usize] = (i + 1) as SaSint;
1604            m -= (f1 & (1 - f0)) as isize;
1605            buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
1606
1607            c0 = t[(i - 1) as usize] as isize;
1608            f0 = usize::from(c0 > c1 - f1 as isize);
1609            sa[m as usize] = i as SaSint;
1610            m -= (f0 & (1 - f1)) as isize;
1611            buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
1612
1613            c1 = t[(i - 2) as usize] as isize;
1614            f1 = usize::from(c1 > c0 - f0 as isize);
1615            sa[m as usize] = (i - 1) as SaSint;
1616            m -= (f1 & (1 - f0)) as isize;
1617            buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
1618
1619            c0 = t[(i - 3) as usize] as isize;
1620            f0 = usize::from(c0 > c1 - f1 as isize);
1621            sa[m as usize] = (i - 2) as SaSint;
1622            m -= (f0 & (1 - f1)) as isize;
1623            buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
1624
1625            i -= 4;
1626        }
1627
1628        j -= 3;
1629        while i >= j {
1630            c1 = c0;
1631            c0 = t[i as usize] as isize;
1632            f1 = f0;
1633            f0 = usize::from(c0 > c1 - f1 as isize);
1634            sa[m as usize] = (i + 1) as SaSint;
1635            m -= (f0 & (1 - f1)) as isize;
1636            buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
1637            i -= 1;
1638        }
1639
1640        c1 = if i >= 0 { t[i as usize] as isize } else { -1 };
1641        f1 = usize::from(c1 > c0 - f0 as isize);
1642        sa[m as usize] = (i + 1) as SaSint;
1643        m -= (f1 & (1 - f0)) as isize;
1644        buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
1645    }
1646
1647    omp_block_start + omp_block_size - 1 - m as SaSint
1648}
1649
1650#[allow(dead_code)]
1651fn gather_lms_suffixes_16u_omp(
1652    t: &[u16],
1653    sa: &mut [SaSint],
1654    n: SaSint,
1655    threads: SaSint,
1656    thread_state: &mut [ThreadState],
1657) {
1658    if threads == 1 || n < 65_536 || thread_state.is_empty() {
1659        gather_lms_suffixes_16u(t, sa, n, n - 1, 0, n);
1660        return;
1661    }
1662
1663    let thread_count = threads as usize;
1664    let block_stride = (n / threads) & !15;
1665    let mut suffix_counts_after = vec![0; thread_count];
1666    let mut m = 0;
1667    for thread in (0..thread_count).rev() {
1668        suffix_counts_after[thread] = m;
1669        m += thread_state[thread].m;
1670    }
1671
1672    for thread in 0..thread_count {
1673        let block_start = thread as SaSint * block_stride;
1674        let block_size = if thread + 1 < thread_count {
1675            block_stride
1676        } else {
1677            n - block_start
1678        };
1679        gather_lms_suffixes_16u(
1680            t,
1681            sa,
1682            n,
1683            n - 1 - suffix_counts_after[thread],
1684            block_start,
1685            block_size,
1686        );
1687    }
1688
1689    for thread in 0..thread_count {
1690        if thread_state[thread].m > 0 {
1691            sa[(n - 1 - suffix_counts_after[thread]) as usize] =
1692                thread_state[thread].last_lms_suffix;
1693        }
1694    }
1695}
1696
1697#[allow(dead_code)]
1698fn count_and_gather_lms_suffixes_16u_omp(
1699    t: &[u16],
1700    sa: &mut [SaSint],
1701    n: SaSint,
1702    buckets: &mut [SaSint],
1703    threads: SaSint,
1704    thread_state: &mut [ThreadState],
1705) -> SaSint {
1706    if threads == 1 || n < 65_536 || thread_state.is_empty() {
1707        return count_and_gather_lms_suffixes_16u(t, sa, n, buckets, 0, n);
1708    }
1709
1710    let thread_count = threads as usize;
1711    let block_stride = (n / threads) & !15;
1712
1713    for thread in 0..thread_count {
1714        let block_start = thread as SaSint * block_stride;
1715        let block_size = if thread + 1 < thread_count {
1716            block_stride
1717        } else {
1718            n - block_start
1719        };
1720        let count = count_and_gather_lms_suffixes_16u(
1721            t,
1722            sa,
1723            n,
1724            &mut thread_state[thread].buckets,
1725            block_start,
1726            block_size,
1727        );
1728        thread_state[thread].m = count;
1729        thread_state[thread].position = block_start + block_size;
1730        if count > 0 {
1731            thread_state[thread].last_lms_suffix = sa[(block_start + block_size - 1) as usize];
1732        }
1733    }
1734
1735    buckets[..4 * ALPHABET_SIZE].fill(0);
1736    let mut m = 0;
1737    for thread in (0..thread_count).rev() {
1738        let position = thread_state[thread].position;
1739        let count = thread_state[thread].m;
1740        m += count;
1741        if thread + 1 != thread_count && count > 0 {
1742            let src_end = position as usize;
1743            let src_start = src_end - count as usize;
1744            let dst_start = (n - m) as usize;
1745            sa.copy_within(src_start..src_end, dst_start);
1746        }
1747        for s in 0..4 * ALPHABET_SIZE {
1748            let a = buckets[s];
1749            let b = thread_state[thread].buckets[s];
1750            buckets[s] = a + b;
1751            thread_state[thread].buckets[s] = a;
1752        }
1753    }
1754
1755    m
1756}
1757
1758#[allow(dead_code)]
1759fn initialize_buckets_start_and_end_16u(
1760    buckets: &mut [SaSint],
1761    freq: Option<&mut [SaSint]>,
1762) -> SaSint {
1763    let (count_buckets, start_end) = buckets.split_at_mut(6 * ALPHABET_SIZE);
1764    let (bucket_start, bucket_end) = start_end.split_at_mut(ALPHABET_SIZE);
1765
1766    let mut k = -1;
1767    let mut sum = 0;
1768
1769    if let Some(freq) = freq {
1770        for j in 0..ALPHABET_SIZE {
1771            let i = buckets_index4(j, 0);
1772            let total = count_buckets[i]
1773                + count_buckets[i + buckets_index4(0, 1)]
1774                + count_buckets[i + buckets_index4(0, 2)]
1775                + count_buckets[i + buckets_index4(0, 3)];
1776
1777            bucket_start[j] = sum;
1778            sum += total;
1779            bucket_end[j] = sum;
1780            if total > 0 {
1781                k = j as SaSint;
1782            }
1783            freq[j] = total;
1784        }
1785    } else {
1786        for j in 0..ALPHABET_SIZE {
1787            let i = buckets_index4(j, 0);
1788            let total = count_buckets[i]
1789                + count_buckets[i + buckets_index4(0, 1)]
1790                + count_buckets[i + buckets_index4(0, 2)]
1791                + count_buckets[i + buckets_index4(0, 3)];
1792
1793            bucket_start[j] = sum;
1794            sum += total;
1795            bucket_end[j] = sum;
1796            if total > 0 {
1797                k = j as SaSint;
1798            }
1799        }
1800    }
1801
1802    k + 1
1803}
1804
1805#[allow(dead_code)]
1806fn initialize_buckets_for_lms_suffixes_radix_sort_16u(
1807    t: &[u16],
1808    buckets: &mut [SaSint],
1809    mut first_lms_suffix: SaSint,
1810) -> SaSint {
1811    let mut f0 = 0usize;
1812    let mut c0 = t[first_lms_suffix as usize] as isize;
1813
1814    loop {
1815        first_lms_suffix -= 1;
1816        if first_lms_suffix < 0 {
1817            break;
1818        }
1819
1820        let c1 = c0;
1821        c0 = t[first_lms_suffix as usize] as isize;
1822        let f1 = f0;
1823        f0 = usize::from(c0 > c1 - f1 as isize);
1824        buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] -= 1;
1825    }
1826
1827    buckets[buckets_index4(c0 as usize, f0 + f0)] -= 1;
1828
1829    let (count_buckets, temp_bucket) = buckets.split_at_mut(4 * ALPHABET_SIZE);
1830    let mut sum = 0;
1831    for c in 0..ALPHABET_SIZE {
1832        let i = buckets_index4(c, 0);
1833        let j = buckets_index2(c, 0);
1834        temp_bucket[j + buckets_index2(0, 1)] = sum;
1835        sum += count_buckets[i + buckets_index4(0, 1)] + count_buckets[i + buckets_index4(0, 3)];
1836        temp_bucket[j] = sum;
1837    }
1838
1839    sum
1840}
1841
1842#[allow(dead_code)]
1843fn radix_sort_lms_suffixes_16u(
1844    t: &[u16],
1845    sa: &mut [SaSint],
1846    induction_bucket: &mut [SaSint],
1847    omp_block_start: SaSint,
1848    omp_block_size: SaSint,
1849) {
1850    let mut i = omp_block_start + omp_block_size - 1;
1851    let mut j = omp_block_start + 64 + 3;
1852    while i >= j {
1853        let p0 = sa[i as usize];
1854        induction_bucket[buckets_index2(t[p0 as usize] as usize, 0)] -= 1;
1855        sa[induction_bucket[buckets_index2(t[p0 as usize] as usize, 0)] as usize] = p0;
1856
1857        let p1 = sa[(i - 1) as usize];
1858        induction_bucket[buckets_index2(t[p1 as usize] as usize, 0)] -= 1;
1859        sa[induction_bucket[buckets_index2(t[p1 as usize] as usize, 0)] as usize] = p1;
1860
1861        let p2 = sa[(i - 2) as usize];
1862        induction_bucket[buckets_index2(t[p2 as usize] as usize, 0)] -= 1;
1863        sa[induction_bucket[buckets_index2(t[p2 as usize] as usize, 0)] as usize] = p2;
1864
1865        let p3 = sa[(i - 3) as usize];
1866        induction_bucket[buckets_index2(t[p3 as usize] as usize, 0)] -= 1;
1867        sa[induction_bucket[buckets_index2(t[p3 as usize] as usize, 0)] as usize] = p3;
1868
1869        i -= 4;
1870    }
1871
1872    j -= 64 + 3;
1873    while i >= j {
1874        let p = sa[i as usize];
1875        induction_bucket[buckets_index2(t[p as usize] as usize, 0)] -= 1;
1876        sa[induction_bucket[buckets_index2(t[p as usize] as usize, 0)] as usize] = p;
1877        i -= 1;
1878    }
1879}
1880
1881#[allow(dead_code)]
1882fn radix_sort_lms_suffixes_16u_omp(
1883    t: &[u16],
1884    sa: &mut [SaSint],
1885    n: SaSint,
1886    m: SaSint,
1887    flags: SaSint,
1888    buckets: &mut [SaSint],
1889    threads: SaSint,
1890    thread_state: &mut [ThreadState],
1891) {
1892    if (flags & LIBSAIS_FLAGS_GSA) != 0 {
1893        buckets[4 * ALPHABET_SIZE] -= 1;
1894    }
1895    if threads == 1 || n < 65_536 || m < 65_536 || thread_state.is_empty() {
1896        radix_sort_lms_suffixes_16u(t, sa, &mut buckets[4 * ALPHABET_SIZE..], n - m + 1, m - 1);
1897        return;
1898    }
1899
1900    let thread_count = threads as usize;
1901    for thread in 0..thread_count {
1902        let (src_buckets, state_buckets) = (
1903            &buckets[4 * ALPHABET_SIZE..],
1904            &mut thread_state[thread].buckets,
1905        );
1906        for c in 0..ALPHABET_SIZE {
1907            let i = buckets_index2(c, 0);
1908            let j = buckets_index4(c, 1);
1909            state_buckets[i] = src_buckets[i] - state_buckets[j];
1910        }
1911
1912        let mut block_start = 0;
1913        let mut block_size = thread_state[thread].m;
1914        for idx in (thread..thread_count).rev() {
1915            block_start += thread_state[idx].m;
1916        }
1917
1918        if block_start == m && block_size > 0 {
1919            block_start -= 1;
1920            block_size -= 1;
1921        }
1922
1923        radix_sort_lms_suffixes_16u(
1924            t,
1925            sa,
1926            &mut thread_state[thread].buckets,
1927            n - block_start,
1928            block_size,
1929        );
1930    }
1931}
1932
1933#[allow(dead_code)]
1934fn radix_sort_lms_suffixes_32s_6k(
1935    t: &[SaSint],
1936    sa: &mut [SaSint],
1937    induction_bucket: &mut [SaSint],
1938    omp_block_start: SaSint,
1939    omp_block_size: SaSint,
1940) {
1941    let mut i = omp_block_start + omp_block_size - 1;
1942    let mut j = omp_block_start + 64 + 3;
1943    while i >= j {
1944        let p0 = sa[i as usize];
1945        induction_bucket[t[p0 as usize] as usize] -= 1;
1946        sa[induction_bucket[t[p0 as usize] as usize] as usize] = p0;
1947        let p1 = sa[(i - 1) as usize];
1948        induction_bucket[t[p1 as usize] as usize] -= 1;
1949        sa[induction_bucket[t[p1 as usize] as usize] as usize] = p1;
1950        let p2 = sa[(i - 2) as usize];
1951        induction_bucket[t[p2 as usize] as usize] -= 1;
1952        sa[induction_bucket[t[p2 as usize] as usize] as usize] = p2;
1953        let p3 = sa[(i - 3) as usize];
1954        induction_bucket[t[p3 as usize] as usize] -= 1;
1955        sa[induction_bucket[t[p3 as usize] as usize] as usize] = p3;
1956        i -= 4;
1957    }
1958
1959    j -= 64 + 3;
1960    while i >= j {
1961        let p = sa[i as usize];
1962        induction_bucket[t[p as usize] as usize] -= 1;
1963        sa[induction_bucket[t[p as usize] as usize] as usize] = p;
1964        i -= 1;
1965    }
1966}
1967
1968#[allow(dead_code)]
1969fn radix_sort_lms_suffixes_32s_2k(
1970    t: &[SaSint],
1971    sa: &mut [SaSint],
1972    induction_bucket: &mut [SaSint],
1973    omp_block_start: SaSint,
1974    omp_block_size: SaSint,
1975) {
1976    let mut i = omp_block_start + omp_block_size - 1;
1977    let mut j = omp_block_start + 64 + 3;
1978    while i >= j {
1979        let p0 = sa[i as usize];
1980        induction_bucket[buckets_index2(t[p0 as usize] as usize, 0)] -= 1;
1981        sa[induction_bucket[buckets_index2(t[p0 as usize] as usize, 0)] as usize] = p0;
1982        let p1 = sa[(i - 1) as usize];
1983        induction_bucket[buckets_index2(t[p1 as usize] as usize, 0)] -= 1;
1984        sa[induction_bucket[buckets_index2(t[p1 as usize] as usize, 0)] as usize] = p1;
1985        let p2 = sa[(i - 2) as usize];
1986        induction_bucket[buckets_index2(t[p2 as usize] as usize, 0)] -= 1;
1987        sa[induction_bucket[buckets_index2(t[p2 as usize] as usize, 0)] as usize] = p2;
1988        let p3 = sa[(i - 3) as usize];
1989        induction_bucket[buckets_index2(t[p3 as usize] as usize, 0)] -= 1;
1990        sa[induction_bucket[buckets_index2(t[p3 as usize] as usize, 0)] as usize] = p3;
1991        i -= 4;
1992    }
1993
1994    j -= 64 + 3;
1995    while i >= j {
1996        let p = sa[i as usize];
1997        induction_bucket[buckets_index2(t[p as usize] as usize, 0)] -= 1;
1998        sa[induction_bucket[buckets_index2(t[p as usize] as usize, 0)] as usize] = p;
1999        i -= 1;
2000    }
2001}
2002
2003#[allow(dead_code)]
2004fn radix_sort_lms_suffixes_32s_block_gather(
2005    t: &[SaSint],
2006    sa: &[SaSint],
2007    cache: &mut [ThreadCache],
2008    omp_block_start: SaSint,
2009    omp_block_size: SaSint,
2010) {
2011    if omp_block_size <= 0 {
2012        return;
2013    }
2014
2015    let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
2016    let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
2017    let cache_base = if cache.len() >= start + size {
2018        0
2019    } else {
2020        start
2021    };
2022    let mut i = start;
2023    let mut j = if size > 67 { start + size - 67 } else { start };
2024
2025    while i < j {
2026        for current in [i, i + 1, i + 2, i + 3] {
2027            let ci = current - cache_base;
2028            let index = sa[current];
2029            cache[ci].index = index;
2030            cache[ci].symbol = t[index as usize];
2031        }
2032        i += 4;
2033    }
2034
2035    j = if size > 67 { j + 67 } else { start + size };
2036    while i < j {
2037        let ci = i - cache_base;
2038        let index = sa[i];
2039        cache[ci].index = index;
2040        cache[ci].symbol = t[index as usize];
2041        i += 1;
2042    }
2043}
2044
2045#[allow(dead_code)]
2046fn radix_sort_lms_suffixes_32s_6k_block_sort(
2047    induction_bucket: &mut [SaSint],
2048    cache: &mut [ThreadCache],
2049    omp_block_start: SaSint,
2050    omp_block_size: SaSint,
2051) {
2052    if omp_block_size <= 0 {
2053        return;
2054    }
2055
2056    let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
2057    let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
2058    let cache_base = if cache.len() >= start + size {
2059        0
2060    } else {
2061        start
2062    };
2063    let mut i = start + size - 1;
2064    let mut j = start + 64 + 3;
2065
2066    while i >= j {
2067        for current in [i, i - 1, i - 2, i - 3] {
2068            let ci = current - cache_base;
2069            let v = cache[ci].symbol as usize;
2070            induction_bucket[v] -= 1;
2071            cache[ci].symbol = induction_bucket[v];
2072        }
2073        i -= 4;
2074    }
2075
2076    j -= 64 + 3;
2077    while i >= j {
2078        let ci = i - cache_base;
2079        let v = cache[ci].symbol as usize;
2080        induction_bucket[v] -= 1;
2081        cache[ci].symbol = induction_bucket[v];
2082        if i == 0 {
2083            break;
2084        }
2085        i -= 1;
2086    }
2087}
2088
2089#[allow(dead_code)]
2090fn radix_sort_lms_suffixes_32s_2k_block_sort(
2091    induction_bucket: &mut [SaSint],
2092    cache: &mut [ThreadCache],
2093    omp_block_start: SaSint,
2094    omp_block_size: SaSint,
2095) {
2096    if omp_block_size <= 0 {
2097        return;
2098    }
2099
2100    let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
2101    let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
2102    let cache_base = if cache.len() >= start + size {
2103        0
2104    } else {
2105        start
2106    };
2107    let mut i = start + size - 1;
2108    let mut j = start + 64 + 3;
2109
2110    while i >= j {
2111        for current in [i, i - 1, i - 2, i - 3] {
2112            let ci = current - cache_base;
2113            let v = buckets_index2(cache[ci].symbol as usize, 0);
2114            induction_bucket[v] -= 1;
2115            cache[ci].symbol = induction_bucket[v];
2116        }
2117        i -= 4;
2118    }
2119
2120    j -= 64 + 3;
2121    while i >= j {
2122        let ci = i - cache_base;
2123        let v = buckets_index2(cache[ci].symbol as usize, 0);
2124        induction_bucket[v] -= 1;
2125        cache[ci].symbol = induction_bucket[v];
2126        if i == 0 {
2127            break;
2128        }
2129        i -= 1;
2130    }
2131}
2132
2133#[allow(dead_code)]
2134fn radix_sort_lms_suffixes_32s_6k_block_omp(
2135    t: &[SaSint],
2136    sa: &mut [SaSint],
2137    induction_bucket: &mut [SaSint],
2138    cache: &mut [ThreadCache],
2139    block_start: SaSint,
2140    block_size: SaSint,
2141    threads: SaSint,
2142) {
2143    if threads <= 1 || block_size < 16_384 {
2144        radix_sort_lms_suffixes_32s_6k(t, sa, induction_bucket, block_start, block_size);
2145        return;
2146    }
2147
2148    radix_sort_lms_suffixes_32s_block_gather(t, sa, cache, block_start, block_size);
2149    radix_sort_lms_suffixes_32s_6k_block_sort(induction_bucket, cache, block_start, block_size);
2150    place_cached_suffixes(sa, cache, block_start, block_size);
2151}
2152
2153#[allow(dead_code)]
2154fn radix_sort_lms_suffixes_32s_2k_block_omp(
2155    t: &[SaSint],
2156    sa: &mut [SaSint],
2157    induction_bucket: &mut [SaSint],
2158    cache: &mut [ThreadCache],
2159    block_start: SaSint,
2160    block_size: SaSint,
2161    threads: SaSint,
2162) {
2163    if threads <= 1 || block_size < 16_384 {
2164        radix_sort_lms_suffixes_32s_2k(t, sa, induction_bucket, block_start, block_size);
2165        return;
2166    }
2167
2168    radix_sort_lms_suffixes_32s_block_gather(t, sa, cache, block_start, block_size);
2169    radix_sort_lms_suffixes_32s_2k_block_sort(induction_bucket, cache, block_start, block_size);
2170    place_cached_suffixes(sa, cache, block_start, block_size);
2171}
2172
2173#[allow(dead_code)]
2174fn radix_sort_lms_suffixes_32s_6k_omp(
2175    t: &[SaSint],
2176    sa: &mut [SaSint],
2177    n: SaSint,
2178    m: SaSint,
2179    induction_bucket: &mut [SaSint],
2180    threads: SaSint,
2181) {
2182    if threads <= 1 || m < 65_536 {
2183        radix_sort_lms_suffixes_32s_6k(t, sa, induction_bucket, n - m + 1, m - 1);
2184        return;
2185    }
2186
2187    let threads_usize = usize::try_from(threads).expect("threads must be positive");
2188    let mut cache = vec![ThreadCache::default(); threads_usize * PER_THREAD_CACHE_SIZE];
2189    let mut block_start = 0usize;
2190    let m_usize = usize::try_from(m).expect("m must be non-negative");
2191    let n_usize = usize::try_from(n).expect("n must be non-negative");
2192    let last = m_usize - 1;
2193
2194    while block_start < last {
2195        let block_end = (block_start + threads_usize * PER_THREAD_CACHE_SIZE).min(last);
2196        radix_sort_lms_suffixes_32s_6k_block_omp(
2197            t,
2198            sa,
2199            induction_bucket,
2200            &mut cache,
2201            (n_usize - block_end) as SaSint,
2202            (block_end - block_start) as SaSint,
2203            threads,
2204        );
2205        block_start = block_end;
2206    }
2207}
2208
2209#[allow(dead_code)]
2210fn radix_sort_lms_suffixes_32s_2k_omp(
2211    t: &[SaSint],
2212    sa: &mut [SaSint],
2213    n: SaSint,
2214    m: SaSint,
2215    induction_bucket: &mut [SaSint],
2216    threads: SaSint,
2217) {
2218    if threads <= 1 || m < 65_536 {
2219        radix_sort_lms_suffixes_32s_2k(t, sa, induction_bucket, n - m + 1, m - 1);
2220        return;
2221    }
2222
2223    let threads_usize = usize::try_from(threads).expect("threads must be positive");
2224    let mut cache = vec![ThreadCache::default(); threads_usize * PER_THREAD_CACHE_SIZE];
2225    let mut block_start = 0usize;
2226    let m_usize = usize::try_from(m).expect("m must be non-negative");
2227    let n_usize = usize::try_from(n).expect("n must be non-negative");
2228    let last = m_usize - 1;
2229
2230    while block_start < last {
2231        let block_end = (block_start + threads_usize * PER_THREAD_CACHE_SIZE).min(last);
2232        radix_sort_lms_suffixes_32s_2k_block_omp(
2233            t,
2234            sa,
2235            induction_bucket,
2236            &mut cache,
2237            (n_usize - block_end) as SaSint,
2238            (block_end - block_start) as SaSint,
2239            threads,
2240        );
2241        block_start = block_end;
2242    }
2243}
2244
2245#[allow(dead_code)]
2246fn radix_sort_lms_suffixes_32s_1k(
2247    t: &[SaSint],
2248    sa: &mut [SaSint],
2249    n: SaSint,
2250    buckets: &mut [SaSint],
2251) -> SaSint {
2252    let mut i = n - 2;
2253    let mut m = 0;
2254    let mut f0 = 1usize;
2255    let mut f1: usize;
2256    let mut c0 = t[(n - 1) as usize] as isize;
2257    let mut c1: isize;
2258    let mut c2 = 0isize;
2259
2260    while i >= 64 + 3 {
2261        c1 = t[i as usize] as isize;
2262        f1 = usize::from(c1 > c0 - f0 as isize);
2263        if (f1 & !f0) != 0 {
2264            c2 = c0;
2265            buckets[c2 as usize] -= 1;
2266            sa[buckets[c2 as usize] as usize] = i + 1;
2267            m += 1;
2268        }
2269        c0 = t[(i - 1) as usize] as isize;
2270        f0 = usize::from(c0 > c1 - f1 as isize);
2271        if (f0 & !f1) != 0 {
2272            c2 = c1;
2273            buckets[c2 as usize] -= 1;
2274            sa[buckets[c2 as usize] as usize] = i;
2275            m += 1;
2276        }
2277        c1 = t[(i - 2) as usize] as isize;
2278        f1 = usize::from(c1 > c0 - f0 as isize);
2279        if (f1 & !f0) != 0 {
2280            c2 = c0;
2281            buckets[c2 as usize] -= 1;
2282            sa[buckets[c2 as usize] as usize] = i - 1;
2283            m += 1;
2284        }
2285        c0 = t[(i - 3) as usize] as isize;
2286        f0 = usize::from(c0 > c1 - f1 as isize);
2287        if (f0 & !f1) != 0 {
2288            c2 = c1;
2289            buckets[c2 as usize] -= 1;
2290            sa[buckets[c2 as usize] as usize] = i - 2;
2291            m += 1;
2292        }
2293        i -= 4;
2294    }
2295
2296    while i >= 0 {
2297        c1 = c0;
2298        c0 = t[i as usize] as isize;
2299        f1 = f0;
2300        f0 = usize::from(c0 > c1 - f1 as isize);
2301        if (f0 & !f1) != 0 {
2302            c2 = c1;
2303            buckets[c2 as usize] -= 1;
2304            sa[buckets[c2 as usize] as usize] = i + 1;
2305            m += 1;
2306        }
2307        i -= 1;
2308    }
2309
2310    if m > 1 {
2311        sa[buckets[c2 as usize] as usize] = 0;
2312    }
2313
2314    m
2315}
2316
2317#[allow(dead_code)]
2318fn radix_sort_set_markers_32s_6k(
2319    sa: &mut [SaSint],
2320    induction_bucket: &[SaSint],
2321    omp_block_start: SaSint,
2322    omp_block_size: SaSint,
2323) {
2324    let mut i = omp_block_start;
2325    let mut j = omp_block_start + omp_block_size - 64 - 3;
2326
2327    while i < j {
2328        sa[induction_bucket[i as usize] as usize] |= SAINT_MIN;
2329        sa[induction_bucket[(i + 1) as usize] as usize] |= SAINT_MIN;
2330        sa[induction_bucket[(i + 2) as usize] as usize] |= SAINT_MIN;
2331        sa[induction_bucket[(i + 3) as usize] as usize] |= SAINT_MIN;
2332        i += 4;
2333    }
2334
2335    j += 64 + 3;
2336    while i < j {
2337        sa[induction_bucket[i as usize] as usize] |= SAINT_MIN;
2338        i += 1;
2339    }
2340}
2341
2342#[allow(dead_code)]
2343fn radix_sort_set_markers_32s_4k(
2344    sa: &mut [SaSint],
2345    induction_bucket: &[SaSint],
2346    omp_block_start: SaSint,
2347    omp_block_size: SaSint,
2348) {
2349    let mut i = omp_block_start;
2350    let mut j = omp_block_start + omp_block_size - 64 - 3;
2351
2352    while i < j {
2353        sa[induction_bucket[buckets_index2(i as usize, 0)] as usize] |= SUFFIX_GROUP_MARKER;
2354        sa[induction_bucket[buckets_index2((i + 1) as usize, 0)] as usize] |= SUFFIX_GROUP_MARKER;
2355        sa[induction_bucket[buckets_index2((i + 2) as usize, 0)] as usize] |= SUFFIX_GROUP_MARKER;
2356        sa[induction_bucket[buckets_index2((i + 3) as usize, 0)] as usize] |= SUFFIX_GROUP_MARKER;
2357        i += 4;
2358    }
2359
2360    j += 64 + 3;
2361    while i < j {
2362        sa[induction_bucket[buckets_index2(i as usize, 0)] as usize] |= SUFFIX_GROUP_MARKER;
2363        i += 1;
2364    }
2365}
2366
2367#[allow(dead_code)]
2368fn radix_sort_set_markers_32s_6k_omp(
2369    sa: &mut [SaSint],
2370    k: SaSint,
2371    induction_bucket: &[SaSint],
2372    threads: SaSint,
2373) {
2374    if k <= 1 {
2375        return;
2376    }
2377
2378    if threads <= 1 || k < 65_536 {
2379        radix_sort_set_markers_32s_6k(sa, induction_bucket, 0, k - 1);
2380        return;
2381    }
2382
2383    let threads_usize = usize::try_from(threads).expect("threads must be positive");
2384    let last = usize::try_from(k - 1).expect("k must be positive");
2385    let stride = (last / threads_usize) & !15usize;
2386    let mut start = 0usize;
2387
2388    for thread in 0..threads_usize {
2389        let end = if thread + 1 == threads_usize {
2390            last
2391        } else {
2392            start + stride
2393        };
2394        if end > start {
2395            radix_sort_set_markers_32s_6k(
2396                sa,
2397                induction_bucket,
2398                start as SaSint,
2399                (end - start) as SaSint,
2400            );
2401        }
2402        start = end;
2403    }
2404}
2405
2406#[allow(dead_code)]
2407fn radix_sort_set_markers_32s_4k_omp(
2408    sa: &mut [SaSint],
2409    k: SaSint,
2410    induction_bucket: &[SaSint],
2411    threads: SaSint,
2412) {
2413    if k <= 1 {
2414        return;
2415    }
2416
2417    if threads <= 1 || k < 65_536 {
2418        radix_sort_set_markers_32s_4k(sa, induction_bucket, 0, k - 1);
2419        return;
2420    }
2421
2422    let threads_usize = usize::try_from(threads).expect("threads must be positive");
2423    let last = usize::try_from(k - 1).expect("k must be positive");
2424    let stride = (last / threads_usize) & !15usize;
2425    let mut start = 0usize;
2426
2427    for thread in 0..threads_usize {
2428        let end = if thread + 1 == threads_usize {
2429            last
2430        } else {
2431            start + stride
2432        };
2433        if end > start {
2434            radix_sort_set_markers_32s_4k(
2435                sa,
2436                induction_bucket,
2437                start as SaSint,
2438                (end - start) as SaSint,
2439            );
2440        }
2441        start = end;
2442    }
2443}
2444
2445#[allow(dead_code)]
2446fn initialize_buckets_for_partial_sorting_16u(
2447    t: &[u16],
2448    buckets: &mut [SaSint],
2449    first_lms_suffix: SaSint,
2450    left_suffixes_count: SaSint,
2451) {
2452    buckets[buckets_index4(t[first_lms_suffix as usize] as usize, 1)] += 1;
2453
2454    let (front, temp_bucket) = buckets.split_at_mut(4 * ALPHABET_SIZE);
2455    let mut sum0 = left_suffixes_count + 1;
2456    let mut sum1 = 0;
2457
2458    for c in 0..ALPHABET_SIZE {
2459        let i = buckets_index4(c, 0);
2460        let j = buckets_index2(c, 0);
2461
2462        temp_bucket[j + buckets_index2(0, 0)] = sum0;
2463
2464        sum0 += front[i + buckets_index4(0, 0)] + front[i + buckets_index4(0, 2)];
2465        sum1 += front[i + buckets_index4(0, 1)];
2466
2467        front[j + buckets_index2(0, 0)] = sum0;
2468        front[j + buckets_index2(0, 1)] = sum1;
2469    }
2470}
2471
2472#[allow(dead_code)]
2473fn partial_sorting_shift_markers_32s_6k_omp(
2474    sa: &mut [SaSint],
2475    k: SaSint,
2476    buckets: &[SaSint],
2477    threads: SaSint,
2478) {
2479    let k_usize = usize::try_from(k).expect("k must be non-negative");
2480    let temp_bucket = &buckets[4 * k_usize..];
2481    let thread_count = if threads > 1 && k >= 65536 {
2482        usize::try_from(threads).expect("threads must be positive")
2483    } else {
2484        1
2485    };
2486    for t in 0..thread_count {
2487        let mut c = k_usize as isize - 1 - t as isize;
2488        while c >= 1 {
2489            let c_usize = c as usize;
2490            let mut i = buckets[buckets_index4(c_usize, 0)] - 1;
2491            let mut j = temp_bucket[buckets_index2(c_usize - 1, 0)] + 3;
2492            let mut s = SAINT_MIN;
2493
2494            while i >= j {
2495                let p0 = sa[i as usize];
2496                let q0 = (p0 & SAINT_MIN) ^ s;
2497                s ^= q0;
2498                sa[i as usize] = p0 ^ q0;
2499
2500                let p1 = sa[(i - 1) as usize];
2501                let q1 = (p1 & SAINT_MIN) ^ s;
2502                s ^= q1;
2503                sa[(i - 1) as usize] = p1 ^ q1;
2504
2505                let p2 = sa[(i - 2) as usize];
2506                let q2 = (p2 & SAINT_MIN) ^ s;
2507                s ^= q2;
2508                sa[(i - 2) as usize] = p2 ^ q2;
2509
2510                let p3 = sa[(i - 3) as usize];
2511                let q3 = (p3 & SAINT_MIN) ^ s;
2512                s ^= q3;
2513                sa[(i - 3) as usize] = p3 ^ q3;
2514
2515                i -= 4;
2516            }
2517
2518            j -= 3;
2519            while i >= j {
2520                let p = sa[i as usize];
2521                let q = (p & SAINT_MIN) ^ s;
2522                s ^= q;
2523                sa[i as usize] = p ^ q;
2524                i -= 1;
2525            }
2526
2527            c -= thread_count as isize;
2528        }
2529    }
2530}
2531
2532#[allow(dead_code)]
2533fn partial_sorting_shift_markers_32s_4k(sa: &mut [SaSint], n: SaSint) {
2534    let mut i = n - 1;
2535    let mut s = SUFFIX_GROUP_MARKER;
2536
2537    while i >= 3 {
2538        let p0 = sa[i as usize];
2539        let q0 =
2540            ((p0 & SUFFIX_GROUP_MARKER) ^ s) & (((p0 > 0) as SaSint) << (SUFFIX_GROUP_BIT - 1));
2541        s ^= q0;
2542        sa[i as usize] = p0 ^ q0;
2543
2544        let p1 = sa[(i - 1) as usize];
2545        let q1 =
2546            ((p1 & SUFFIX_GROUP_MARKER) ^ s) & (((p1 > 0) as SaSint) << (SUFFIX_GROUP_BIT - 1));
2547        s ^= q1;
2548        sa[(i - 1) as usize] = p1 ^ q1;
2549
2550        let p2 = sa[(i - 2) as usize];
2551        let q2 =
2552            ((p2 & SUFFIX_GROUP_MARKER) ^ s) & (((p2 > 0) as SaSint) << (SUFFIX_GROUP_BIT - 1));
2553        s ^= q2;
2554        sa[(i - 2) as usize] = p2 ^ q2;
2555
2556        let p3 = sa[(i - 3) as usize];
2557        let q3 =
2558            ((p3 & SUFFIX_GROUP_MARKER) ^ s) & (((p3 > 0) as SaSint) << (SUFFIX_GROUP_BIT - 1));
2559        s ^= q3;
2560        sa[(i - 3) as usize] = p3 ^ q3;
2561
2562        i -= 4;
2563    }
2564
2565    while i >= 0 {
2566        let p = sa[i as usize];
2567        let q = ((p & SUFFIX_GROUP_MARKER) ^ s) & (((p > 0) as SaSint) << (SUFFIX_GROUP_BIT - 1));
2568        s ^= q;
2569        sa[i as usize] = p ^ q;
2570        i -= 1;
2571    }
2572}
2573
2574#[allow(dead_code)]
2575fn partial_sorting_shift_buckets_32s_6k(k: SaSint, buckets: &mut [SaSint]) {
2576    let temp_offset = 4 * k as usize;
2577    let mut i = buckets_index2(0, 0);
2578
2579    while i <= buckets_index2(k as usize - 1, 0) {
2580        buckets[2 * i + buckets_index4(0, 0)] = buckets[temp_offset + i + buckets_index2(0, 0)];
2581        buckets[2 * i + buckets_index4(0, 1)] = buckets[temp_offset + i + buckets_index2(0, 1)];
2582        i += buckets_index2(1, 0);
2583    }
2584}
2585
2586#[allow(dead_code)]
2587fn partial_sorting_scan_left_to_right_16u(
2588    t: &[u16],
2589    sa: &mut [SaSint],
2590    buckets: &mut [SaSint],
2591    mut d: SaSint,
2592    omp_block_start: SaSint,
2593    omp_block_size: SaSint,
2594) -> SaSint {
2595    let mut i = omp_block_start as isize;
2596    let mut j = (omp_block_start + omp_block_size - 64 - 1) as isize;
2597    while i < j {
2598        let mut p0 = sa[i as usize];
2599        d += SaSint::from(p0 < 0);
2600        p0 &= SAINT_MAX;
2601        let v0 = buckets_index2(
2602            t[(p0 - 1) as usize] as usize,
2603            usize::from(t[(p0 - 2) as usize] >= t[(p0 - 1) as usize]),
2604        );
2605        let mark0 = if buckets[2 * ALPHABET_SIZE + v0] != d {
2606            SAINT_MIN
2607        } else {
2608            0
2609        };
2610        let dst0 = buckets[4 * ALPHABET_SIZE + v0] as usize;
2611        sa[dst0] = (p0 - 1) | mark0;
2612        buckets[4 * ALPHABET_SIZE + v0] += 1;
2613        buckets[2 * ALPHABET_SIZE + v0] = d;
2614
2615        let mut p1 = sa[(i + 1) as usize];
2616        d += SaSint::from(p1 < 0);
2617        p1 &= SAINT_MAX;
2618        let v1 = buckets_index2(
2619            t[(p1 - 1) as usize] as usize,
2620            usize::from(t[(p1 - 2) as usize] >= t[(p1 - 1) as usize]),
2621        );
2622        let mark1 = if buckets[2 * ALPHABET_SIZE + v1] != d {
2623            SAINT_MIN
2624        } else {
2625            0
2626        };
2627        let dst1 = buckets[4 * ALPHABET_SIZE + v1] as usize;
2628        sa[dst1] = (p1 - 1) | mark1;
2629        buckets[4 * ALPHABET_SIZE + v1] += 1;
2630        buckets[2 * ALPHABET_SIZE + v1] = d;
2631
2632        i += 2;
2633    }
2634
2635    j += 64 + 1;
2636    while i < j {
2637        let mut p = sa[i as usize];
2638        d += SaSint::from(p < 0);
2639        p &= SAINT_MAX;
2640        let v = buckets_index2(
2641            t[(p - 1) as usize] as usize,
2642            usize::from(t[(p - 2) as usize] >= t[(p - 1) as usize]),
2643        );
2644        let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
2645            SAINT_MIN
2646        } else {
2647            0
2648        };
2649        let dst = buckets[4 * ALPHABET_SIZE + v] as usize;
2650        sa[dst] = (p - 1) | mark;
2651        buckets[4 * ALPHABET_SIZE + v] += 1;
2652        buckets[2 * ALPHABET_SIZE + v] = d;
2653        i += 1;
2654    }
2655
2656    d
2657}
2658
2659#[allow(dead_code)]
2660fn partial_sorting_scan_left_to_right_16u_block_prepare(
2661    t: &[u16],
2662    sa: &mut [SaSint],
2663    k: SaSint,
2664    buckets: &mut [SaSint],
2665    cache: &mut [ThreadCache],
2666    omp_block_start: SaSint,
2667    omp_block_size: SaSint,
2668    state: &mut ThreadState,
2669) -> SaSint {
2670    let width = 2 * k as usize;
2671    buckets[..width].fill(0);
2672    buckets[2 * ALPHABET_SIZE..2 * ALPHABET_SIZE + width].fill(0);
2673
2674    let mut count = 0usize;
2675    let mut d = 1;
2676    for i in omp_block_start as usize..(omp_block_start + omp_block_size) as usize {
2677        let mut p = sa[i];
2678        cache[count].index = p;
2679        d += SaSint::from(p < 0);
2680        p &= SAINT_MAX;
2681        let v = buckets_index2(
2682            t[(p - 1) as usize] as usize,
2683            usize::from(t[(p - 2) as usize] >= t[(p - 1) as usize]),
2684        );
2685        cache[count].symbol = v as SaSint;
2686        buckets[v] += 1;
2687        buckets[2 * ALPHABET_SIZE + v] = d;
2688        count += 1;
2689    }
2690    state.cache_entries = count;
2691    d - 1
2692}
2693
2694#[allow(dead_code)]
2695fn partial_sorting_scan_left_to_right_16u_block_place(
2696    sa: &mut [SaSint],
2697    buckets: &mut [SaSint],
2698    cache: &[ThreadCache],
2699    count: SaSint,
2700    mut d: SaSint,
2701) {
2702    for entry in cache.iter().take(count as usize) {
2703        let mut p = entry.index;
2704        d += SaSint::from(p < 0);
2705        p &= SAINT_MAX;
2706        let v = entry.symbol as usize;
2707        let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
2708            SAINT_MIN
2709        } else {
2710            0
2711        };
2712        let dst = buckets[v] as usize;
2713        sa[dst] = (p - 1) | mark;
2714        buckets[v] += 1;
2715        buckets[2 * ALPHABET_SIZE + v] = d;
2716    }
2717}
2718
2719#[allow(dead_code)]
2720fn partial_sorting_scan_left_to_right_16u_block_omp(
2721    t: &[u16],
2722    sa: &mut [SaSint],
2723    k: SaSint,
2724    buckets: &mut [SaSint],
2725    d: SaSint,
2726    block_start: SaSint,
2727    block_size: SaSint,
2728    threads: SaSint,
2729    thread_state: &mut [ThreadState],
2730) -> SaSint {
2731    let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
2732        usize::try_from(threads)
2733            .expect("threads must be non-negative")
2734            .min(thread_state.len())
2735    } else {
2736        1
2737    };
2738    if thread_count <= 1 {
2739        return partial_sorting_scan_left_to_right_16u(t, sa, buckets, d, block_start, block_size);
2740    }
2741
2742    let bucket_width = 2 * k as usize;
2743    let block_stride = (block_size / thread_count as SaSint) & !15;
2744
2745    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
2746        let local_start = thread as SaSint * block_stride;
2747        let local_size = if thread + 1 < thread_count {
2748            block_stride
2749        } else {
2750            block_size - local_start
2751        };
2752        let mut local_state = ThreadState::default();
2753        state.position = partial_sorting_scan_left_to_right_16u_block_prepare(
2754            t,
2755            sa,
2756            k,
2757            &mut state.buckets,
2758            &mut state.cache,
2759            block_start + local_start,
2760            local_size,
2761            &mut local_state,
2762        );
2763        state.count = local_state.cache_entries as SaSint;
2764    }
2765
2766    let mut next_d = d;
2767    for state in thread_state.iter_mut().take(thread_count) {
2768        for c in 0..bucket_width {
2769            let a = buckets[4 * ALPHABET_SIZE + c];
2770            let b = state.buckets[c];
2771            buckets[4 * ALPHABET_SIZE + c] = a + b;
2772            state.buckets[c] = a;
2773        }
2774
2775        next_d -= 1;
2776        for c in 0..bucket_width {
2777            let a = buckets[2 * ALPHABET_SIZE + c];
2778            let b = state.buckets[2 * ALPHABET_SIZE + c];
2779            let shifted = b + next_d;
2780            buckets[2 * ALPHABET_SIZE + c] = if b > 0 { shifted } else { a };
2781            state.buckets[2 * ALPHABET_SIZE + c] = a;
2782        }
2783        next_d += 1 + state.position;
2784        state.position = next_d - state.position;
2785    }
2786
2787    for state in thread_state.iter_mut().take(thread_count) {
2788        partial_sorting_scan_left_to_right_16u_block_place(
2789            sa,
2790            &mut state.buckets,
2791            &state.cache,
2792            state.count,
2793            state.position,
2794        );
2795    }
2796
2797    next_d
2798}
2799
2800#[allow(dead_code)]
2801fn partial_sorting_scan_left_to_right_16u_omp(
2802    t: &[u16],
2803    sa: &mut [SaSint],
2804    n: SaSint,
2805    k: SaSint,
2806    buckets: &mut [SaSint],
2807    left_suffixes_count: SaSint,
2808    mut d: SaSint,
2809    threads: SaSint,
2810) -> SaSint {
2811    let v = buckets_index2(
2812        t[(n - 1) as usize] as usize,
2813        usize::from(t[(n - 2) as usize] >= t[(n - 1) as usize]),
2814    );
2815    let dst = buckets[4 * ALPHABET_SIZE + v] as usize;
2816    buckets[4 * ALPHABET_SIZE + v] += 1;
2817    sa[dst] = (n - 1) | SAINT_MIN;
2818    d += 1;
2819    buckets[2 * ALPHABET_SIZE + v] = d;
2820
2821    if threads == 1 || left_suffixes_count < 65536 {
2822        d = partial_sorting_scan_left_to_right_16u(t, sa, buckets, d, 0, left_suffixes_count);
2823    } else {
2824        let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
2825        let mut block_start = 0;
2826        while block_start < left_suffixes_count {
2827            if sa[block_start as usize] == 0 {
2828                block_start += 1;
2829            } else {
2830                let mut block_end =
2831                    block_start + threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
2832                if block_end > left_suffixes_count {
2833                    block_end = left_suffixes_count;
2834                }
2835                let mut block_scan_end = block_start + 1;
2836                while block_scan_end < block_end && sa[block_scan_end as usize] != 0 {
2837                    block_scan_end += 1;
2838                }
2839                let block_size = block_scan_end - block_start;
2840
2841                if block_size < 32 {
2842                    while block_start < block_scan_end {
2843                        let mut p = sa[block_start as usize];
2844                        d += SaSint::from(p < 0);
2845                        p &= SAINT_MAX;
2846                        let v = buckets_index2(
2847                            t[(p - 1) as usize] as usize,
2848                            usize::from(t[(p - 2) as usize] >= t[(p - 1) as usize]),
2849                        );
2850                        let dst = buckets[4 * ALPHABET_SIZE + v] as usize;
2851                        buckets[4 * ALPHABET_SIZE + v] += 1;
2852                        let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
2853                            SAINT_MIN
2854                        } else {
2855                            0
2856                        };
2857                        sa[dst] = (p - 1) | mark;
2858                        buckets[2 * ALPHABET_SIZE + v] = d;
2859                        block_start += 1;
2860                    }
2861                } else {
2862                    d = partial_sorting_scan_left_to_right_16u_block_omp(
2863                        t,
2864                        sa,
2865                        k,
2866                        buckets,
2867                        d,
2868                        block_start,
2869                        block_size,
2870                        threads,
2871                        &mut thread_state,
2872                    );
2873                    block_start = block_scan_end;
2874                }
2875            }
2876        }
2877    }
2878    d
2879}
2880
2881#[allow(dead_code)]
2882fn partial_sorting_scan_right_to_left_16u(
2883    t: &[u16],
2884    sa: &mut [SaSint],
2885    buckets: &mut [SaSint],
2886    mut d: SaSint,
2887    omp_block_start: SaSint,
2888    omp_block_size: SaSint,
2889) -> SaSint {
2890    let mut i = (omp_block_start + omp_block_size - 1) as isize;
2891    let mut j = (omp_block_start + 64 + 1) as isize;
2892    while i >= j {
2893        let mut p0 = sa[i as usize];
2894        d += SaSint::from(p0 < 0);
2895        p0 &= SAINT_MAX;
2896        let v0 = buckets_index2(
2897            t[(p0 - 1) as usize] as usize,
2898            usize::from(t[(p0 - 2) as usize] > t[(p0 - 1) as usize]),
2899        );
2900        let mark0 = if buckets[2 * ALPHABET_SIZE + v0] != d {
2901            SAINT_MIN
2902        } else {
2903            0
2904        };
2905        buckets[v0] -= 1;
2906        sa[buckets[v0] as usize] = (p0 - 1) | mark0;
2907        buckets[2 * ALPHABET_SIZE + v0] = d;
2908
2909        let mut p1 = sa[(i - 1) as usize];
2910        d += SaSint::from(p1 < 0);
2911        p1 &= SAINT_MAX;
2912        let v1 = buckets_index2(
2913            t[(p1 - 1) as usize] as usize,
2914            usize::from(t[(p1 - 2) as usize] > t[(p1 - 1) as usize]),
2915        );
2916        let mark1 = if buckets[2 * ALPHABET_SIZE + v1] != d {
2917            SAINT_MIN
2918        } else {
2919            0
2920        };
2921        buckets[v1] -= 1;
2922        sa[buckets[v1] as usize] = (p1 - 1) | mark1;
2923        buckets[2 * ALPHABET_SIZE + v1] = d;
2924
2925        i -= 2;
2926    }
2927
2928    j -= 64 + 1;
2929    while i >= j {
2930        let mut p = sa[i as usize];
2931        d += SaSint::from(p < 0);
2932        p &= SAINT_MAX;
2933        let v = buckets_index2(
2934            t[(p - 1) as usize] as usize,
2935            usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
2936        );
2937        let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
2938            SAINT_MIN
2939        } else {
2940            0
2941        };
2942        buckets[v] -= 1;
2943        sa[buckets[v] as usize] = (p - 1) | mark;
2944        buckets[2 * ALPHABET_SIZE + v] = d;
2945        i -= 1;
2946    }
2947
2948    d
2949}
2950
2951#[allow(dead_code)]
2952fn partial_sorting_scan_right_to_left_16u_block_prepare(
2953    t: &[u16],
2954    sa: &mut [SaSint],
2955    k: SaSint,
2956    buckets: &mut [SaSint],
2957    cache: &mut [ThreadCache],
2958    omp_block_start: SaSint,
2959    omp_block_size: SaSint,
2960    state: &mut ThreadState,
2961) -> SaSint {
2962    let width = 2 * k as usize;
2963    buckets[..width].fill(0);
2964    buckets[2 * ALPHABET_SIZE..2 * ALPHABET_SIZE + width].fill(0);
2965
2966    let mut count = 0usize;
2967    let mut d = 1;
2968    for i in (omp_block_start as usize..(omp_block_start + omp_block_size) as usize).rev() {
2969        let mut p = sa[i];
2970        cache[count].index = p;
2971        d += SaSint::from(p < 0);
2972        p &= SAINT_MAX;
2973        let v = buckets_index2(
2974            t[(p - 1) as usize] as usize,
2975            usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
2976        );
2977        cache[count].symbol = v as SaSint;
2978        buckets[v] += 1;
2979        buckets[2 * ALPHABET_SIZE + v] = d;
2980        count += 1;
2981    }
2982    state.cache_entries = count;
2983    d - 1
2984}
2985
2986#[allow(dead_code)]
2987fn partial_sorting_scan_right_to_left_16u_block_place(
2988    sa: &mut [SaSint],
2989    buckets: &mut [SaSint],
2990    cache: &[ThreadCache],
2991    count: SaSint,
2992    mut d: SaSint,
2993) {
2994    for entry in cache.iter().take(count as usize) {
2995        let mut p = entry.index;
2996        d += SaSint::from(p < 0);
2997        p &= SAINT_MAX;
2998        let v = entry.symbol as usize;
2999        let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
3000            SAINT_MIN
3001        } else {
3002            0
3003        };
3004        buckets[v] -= 1;
3005        sa[buckets[v] as usize] = (p - 1) | mark;
3006        buckets[2 * ALPHABET_SIZE + v] = d;
3007    }
3008}
3009
3010#[allow(dead_code)]
3011fn partial_gsa_scan_right_to_left_16u_block_place(
3012    sa: &mut [SaSint],
3013    buckets: &mut [SaSint],
3014    cache: &[ThreadCache],
3015    count: SaSint,
3016    mut d: SaSint,
3017) {
3018    for entry in cache.iter().take(count as usize) {
3019        let mut p = entry.index;
3020        d += SaSint::from(p < 0);
3021        p &= SAINT_MAX;
3022        let v = entry.symbol as usize;
3023        if v != 1 {
3024            let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
3025                SAINT_MIN
3026            } else {
3027                0
3028            };
3029            buckets[v] -= 1;
3030            sa[buckets[v] as usize] = (p - 1) | mark;
3031            buckets[2 * ALPHABET_SIZE + v] = d;
3032        }
3033    }
3034}
3035
3036#[allow(dead_code)]
3037fn partial_sorting_scan_right_to_left_16u_block_omp(
3038    t: &[u16],
3039    sa: &mut [SaSint],
3040    k: SaSint,
3041    buckets: &mut [SaSint],
3042    d: SaSint,
3043    block_start: SaSint,
3044    block_size: SaSint,
3045    threads: SaSint,
3046    thread_state: &mut [ThreadState],
3047) -> SaSint {
3048    let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
3049        usize::try_from(threads)
3050            .expect("threads must be non-negative")
3051            .min(thread_state.len())
3052    } else {
3053        1
3054    };
3055    if thread_count <= 1 {
3056        return partial_sorting_scan_right_to_left_16u(t, sa, buckets, d, block_start, block_size);
3057    }
3058
3059    let width = 2 * k as usize;
3060    let distinct_offset = 2 * ALPHABET_SIZE;
3061    let block_stride = (block_size / thread_count as SaSint) & !15;
3062
3063    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
3064        let local_start = thread as SaSint * block_stride;
3065        let local_size = if thread + 1 < thread_count {
3066            block_stride
3067        } else {
3068            block_size - local_start
3069        };
3070        let mut local_state = ThreadState::default();
3071        state.position = partial_sorting_scan_right_to_left_16u_block_prepare(
3072            t,
3073            sa,
3074            k,
3075            &mut state.buckets,
3076            &mut state.cache,
3077            block_start + local_start,
3078            local_size,
3079            &mut local_state,
3080        );
3081        state.count = local_state.cache_entries as SaSint;
3082    }
3083
3084    let mut next_d = d;
3085    for state in thread_state.iter_mut().take(thread_count).rev() {
3086        for c in 0..width {
3087            let a = buckets[c];
3088            let b = state.buckets[c];
3089            buckets[c] = a - b;
3090            state.buckets[c] = a;
3091        }
3092
3093        next_d -= 1;
3094        for c in 0..width {
3095            let offset = distinct_offset + c;
3096            let a = buckets[offset];
3097            let b = state.buckets[offset];
3098            let shifted = b + next_d;
3099            buckets[offset] = if b > 0 { shifted } else { a };
3100            state.buckets[offset] = a;
3101        }
3102        next_d += 1 + state.position;
3103        state.position = next_d - state.position;
3104    }
3105
3106    for state in thread_state.iter_mut().take(thread_count) {
3107        partial_sorting_scan_right_to_left_16u_block_place(
3108            sa,
3109            &mut state.buckets,
3110            &state.cache,
3111            state.count,
3112            state.position,
3113        );
3114    }
3115
3116    next_d
3117}
3118
3119#[allow(dead_code)]
3120fn partial_sorting_scan_right_to_left_16u_omp(
3121    t: &[u16],
3122    sa: &mut [SaSint],
3123    n: SaSint,
3124    k: SaSint,
3125    buckets: &mut [SaSint],
3126    first_lms_suffix: SaSint,
3127    left_suffixes_count: SaSint,
3128    d: SaSint,
3129    threads: SaSint,
3130) {
3131    let scan_start = left_suffixes_count + 1;
3132    let scan_end = n - first_lms_suffix;
3133
3134    if threads == 1 || scan_end - scan_start < 65536 {
3135        partial_sorting_scan_right_to_left_16u(
3136            t,
3137            sa,
3138            buckets,
3139            d,
3140            scan_start,
3141            scan_end - scan_start,
3142        );
3143    } else {
3144        let mut d = d;
3145        let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
3146        let mut block_start = scan_end - 1;
3147        while block_start >= scan_start {
3148            if sa[block_start as usize] == 0 {
3149                block_start -= 1;
3150            } else {
3151                let block_limit = threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
3152                let mut block_max_end = block_start - block_limit;
3153                if block_max_end < scan_start {
3154                    block_max_end = scan_start - 1;
3155                }
3156                let mut block_end = block_start - 1;
3157                while block_end > block_max_end && sa[block_end as usize] != 0 {
3158                    block_end -= 1;
3159                }
3160                let block_size = block_start - block_end;
3161
3162                if block_size < 32 {
3163                    while block_start > block_end {
3164                        let mut p = sa[block_start as usize];
3165                        d += SaSint::from(p < 0);
3166                        p &= SAINT_MAX;
3167                        let v = buckets_index2(
3168                            t[(p - 1) as usize] as usize,
3169                            usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
3170                        );
3171                        let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
3172                            SAINT_MIN
3173                        } else {
3174                            0
3175                        };
3176                        buckets[v] -= 1;
3177                        sa[buckets[v] as usize] = (p - 1) | mark;
3178                        buckets[2 * ALPHABET_SIZE + v] = d;
3179                        block_start -= 1;
3180                    }
3181                } else {
3182                    d = partial_sorting_scan_right_to_left_16u_block_omp(
3183                        t,
3184                        sa,
3185                        k,
3186                        buckets,
3187                        d,
3188                        block_end + 1,
3189                        block_size,
3190                        threads,
3191                        &mut thread_state,
3192                    );
3193                    block_start = block_end;
3194                }
3195            }
3196        }
3197    }
3198}
3199
3200#[allow(dead_code)]
3201fn partial_sorting_scan_left_to_right_32s_6k(
3202    t: &[SaSint],
3203    sa: &mut [SaSint],
3204    buckets: &mut [SaSint],
3205    mut d: SaSint,
3206    omp_block_start: SaSint,
3207    omp_block_size: SaSint,
3208) -> SaSint {
3209    let mut i = omp_block_start;
3210    let mut j = omp_block_start + omp_block_size - 2 * 64 - 1;
3211
3212    while i < j {
3213        let mut p2 = sa[i as usize];
3214        d += SaSint::from(p2 < 0);
3215        p2 &= SAINT_MAX;
3216        let v2 = buckets_index4(
3217            t[(p2 - 1) as usize] as usize,
3218            usize::from(t[(p2 - 2) as usize] >= t[(p2 - 1) as usize]),
3219        );
3220        let pos2 = buckets[v2] as usize;
3221        buckets[v2] += 1;
3222        sa[pos2] = (p2 - 1) | (((buckets[2 + v2] != d) as SaSint) << (SAINT_BIT - 1));
3223        buckets[2 + v2] = d;
3224
3225        let mut p3 = sa[(i + 1) as usize];
3226        d += SaSint::from(p3 < 0);
3227        p3 &= SAINT_MAX;
3228        let v3 = buckets_index4(
3229            t[(p3 - 1) as usize] as usize,
3230            usize::from(t[(p3 - 2) as usize] >= t[(p3 - 1) as usize]),
3231        );
3232        let pos3 = buckets[v3] as usize;
3233        buckets[v3] += 1;
3234        sa[pos3] = (p3 - 1) | (((buckets[2 + v3] != d) as SaSint) << (SAINT_BIT - 1));
3235        buckets[2 + v3] = d;
3236
3237        i += 2;
3238    }
3239
3240    j += 2 * 64 + 1;
3241    while i < j {
3242        let mut p = sa[i as usize];
3243        d += SaSint::from(p < 0);
3244        p &= SAINT_MAX;
3245        let v = buckets_index4(
3246            t[(p - 1) as usize] as usize,
3247            usize::from(t[(p - 2) as usize] >= t[(p - 1) as usize]),
3248        );
3249        let pos = buckets[v] as usize;
3250        buckets[v] += 1;
3251        sa[pos] = (p - 1) | (((buckets[2 + v] != d) as SaSint) << (SAINT_BIT - 1));
3252        buckets[2 + v] = d;
3253        i += 1;
3254    }
3255
3256    d
3257}
3258
3259#[allow(dead_code)]
3260fn partial_sorting_scan_left_to_right_32s_4k(
3261    t: &[SaSint],
3262    sa: &mut [SaSint],
3263    k: SaSint,
3264    buckets: &mut [SaSint],
3265    mut d: SaSint,
3266    omp_block_start: SaSint,
3267    omp_block_size: SaSint,
3268) -> SaSint {
3269    let k = k as usize;
3270    let mut i = omp_block_start;
3271    let mut j = omp_block_start + omp_block_size - 2 * 64 - 1;
3272
3273    while i < j {
3274        let mut p0 = sa[i as usize];
3275        sa[i as usize] = p0 & SAINT_MAX;
3276        if p0 > 0 {
3277            sa[i as usize] = 0;
3278            d += p0 >> (SUFFIX_GROUP_BIT - 1);
3279            p0 &= !SUFFIX_GROUP_MARKER;
3280            let v0 = buckets_index2(
3281                t[(p0 - 1) as usize] as usize,
3282                usize::from(t[(p0 - 2) as usize] < t[(p0 - 1) as usize]),
3283            );
3284            let c0 = t[(p0 - 1) as usize] as usize;
3285            let pos0 = buckets[2 * k + c0] as usize;
3286            buckets[2 * k + c0] += 1;
3287            sa[pos0] = (p0 - 1)
3288                | ((usize::from(t[(p0 - 2) as usize] < t[(p0 - 1) as usize]) as SaSint)
3289                    << (SAINT_BIT - 1))
3290                | (((buckets[v0] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3291            buckets[v0] = d;
3292        }
3293
3294        let mut p1 = sa[(i + 1) as usize];
3295        sa[(i + 1) as usize] = p1 & SAINT_MAX;
3296        if p1 > 0 {
3297            sa[(i + 1) as usize] = 0;
3298            d += p1 >> (SUFFIX_GROUP_BIT - 1);
3299            p1 &= !SUFFIX_GROUP_MARKER;
3300            let v1 = buckets_index2(
3301                t[(p1 - 1) as usize] as usize,
3302                usize::from(t[(p1 - 2) as usize] < t[(p1 - 1) as usize]),
3303            );
3304            let c1 = t[(p1 - 1) as usize] as usize;
3305            let pos1 = buckets[2 * k + c1] as usize;
3306            buckets[2 * k + c1] += 1;
3307            sa[pos1] = (p1 - 1)
3308                | ((usize::from(t[(p1 - 2) as usize] < t[(p1 - 1) as usize]) as SaSint)
3309                    << (SAINT_BIT - 1))
3310                | (((buckets[v1] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3311            buckets[v1] = d;
3312        }
3313
3314        i += 2;
3315    }
3316
3317    j += 2 * 64 + 1;
3318    while i < j {
3319        let mut p = sa[i as usize];
3320        sa[i as usize] = p & SAINT_MAX;
3321        if p > 0 {
3322            sa[i as usize] = 0;
3323            d += p >> (SUFFIX_GROUP_BIT - 1);
3324            p &= !SUFFIX_GROUP_MARKER;
3325            let v = buckets_index2(
3326                t[(p - 1) as usize] as usize,
3327                usize::from(t[(p - 2) as usize] < t[(p - 1) as usize]),
3328            );
3329            let c = t[(p - 1) as usize] as usize;
3330            let pos = buckets[2 * k + c] as usize;
3331            buckets[2 * k + c] += 1;
3332            sa[pos] = (p - 1)
3333                | ((usize::from(t[(p - 2) as usize] < t[(p - 1) as usize]) as SaSint)
3334                    << (SAINT_BIT - 1))
3335                | (((buckets[v] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3336            buckets[v] = d;
3337        }
3338        i += 1;
3339    }
3340
3341    d
3342}
3343
3344#[allow(dead_code)]
3345fn partial_sorting_scan_left_to_right_32s_1k(
3346    t: &[SaSint],
3347    sa: &mut [SaSint],
3348    induction_bucket: &mut [SaSint],
3349    omp_block_start: SaSint,
3350    omp_block_size: SaSint,
3351) {
3352    let mut i = omp_block_start;
3353    let mut j = omp_block_start + omp_block_size - 2 * 64 - 1;
3354
3355    while i < j {
3356        let p0 = sa[i as usize];
3357        sa[i as usize] = p0 & SAINT_MAX;
3358        if p0 > 0 {
3359            sa[i as usize] = 0;
3360            let c0 = t[(p0 - 1) as usize] as usize;
3361            let pos0 = induction_bucket[c0] as usize;
3362            induction_bucket[c0] += 1;
3363            sa[pos0] = (p0 - 1)
3364                | ((usize::from(t[(p0 - 2) as usize] < t[(p0 - 1) as usize]) as SaSint)
3365                    << (SAINT_BIT - 1));
3366        }
3367
3368        let p1 = sa[(i + 1) as usize];
3369        sa[(i + 1) as usize] = p1 & SAINT_MAX;
3370        if p1 > 0 {
3371            sa[(i + 1) as usize] = 0;
3372            let c1 = t[(p1 - 1) as usize] as usize;
3373            let pos1 = induction_bucket[c1] as usize;
3374            induction_bucket[c1] += 1;
3375            sa[pos1] = (p1 - 1)
3376                | ((usize::from(t[(p1 - 2) as usize] < t[(p1 - 1) as usize]) as SaSint)
3377                    << (SAINT_BIT - 1));
3378        }
3379
3380        i += 2;
3381    }
3382
3383    j += 2 * 64 + 1;
3384    while i < j {
3385        let p = sa[i as usize];
3386        sa[i as usize] = p & SAINT_MAX;
3387        if p > 0 {
3388            sa[i as usize] = 0;
3389            let c = t[(p - 1) as usize] as usize;
3390            let pos = induction_bucket[c] as usize;
3391            induction_bucket[c] += 1;
3392            sa[pos] = (p - 1)
3393                | ((usize::from(t[(p - 2) as usize] < t[(p - 1) as usize]) as SaSint)
3394                    << (SAINT_BIT - 1));
3395        }
3396        i += 1;
3397    }
3398}
3399
3400#[allow(dead_code)]
3401fn partial_sorting_scan_left_to_right_32s_6k_omp(
3402    t: &[SaSint],
3403    sa: &mut [SaSint],
3404    n: SaSint,
3405    buckets: &mut [SaSint],
3406    left_suffixes_count: SaSint,
3407    mut d: SaSint,
3408    threads: SaSint,
3409    _thread_state: &mut [ThreadState],
3410) -> SaSint {
3411    let v = buckets_index4(
3412        t[(n - 1) as usize] as usize,
3413        usize::from(t[(n - 2) as usize] >= t[(n - 1) as usize]),
3414    );
3415    let pos = buckets[v] as usize;
3416    buckets[v] += 1;
3417    sa[pos] = (n - 1) | SAINT_MIN;
3418    d += 1;
3419    buckets[2 + v] = d;
3420
3421    if threads == 1 || left_suffixes_count < 65536 {
3422        d = partial_sorting_scan_left_to_right_32s_6k(t, sa, buckets, d, 0, left_suffixes_count);
3423    } else {
3424        let mut cache = vec![ThreadCache::default(); left_suffixes_count as usize];
3425        let mut block_start = 0;
3426        while block_start < left_suffixes_count {
3427            let mut block_end = block_start + threads * PER_THREAD_CACHE_SIZE as SaSint;
3428            if block_end > left_suffixes_count {
3429                block_end = left_suffixes_count;
3430            }
3431            d = partial_sorting_scan_left_to_right_32s_6k_block_omp(
3432                t,
3433                sa,
3434                buckets,
3435                d,
3436                &mut cache,
3437                block_start,
3438                block_end - block_start,
3439                threads,
3440            );
3441            block_start = block_end;
3442        }
3443    }
3444
3445    d
3446}
3447
3448#[allow(dead_code)]
3449fn partial_sorting_scan_left_to_right_32s_4k_omp(
3450    t: &[SaSint],
3451    sa: &mut [SaSint],
3452    n: SaSint,
3453    k: SaSint,
3454    buckets: &mut [SaSint],
3455    mut d: SaSint,
3456    threads: SaSint,
3457    _thread_state: &mut [ThreadState],
3458) -> SaSint {
3459    let k_usize = k as usize;
3460    let pos = buckets[2 * k_usize + t[(n - 1) as usize] as usize] as usize;
3461    buckets[2 * k_usize + t[(n - 1) as usize] as usize] += 1;
3462    sa[pos] = (n - 1)
3463        | ((usize::from(t[(n - 2) as usize] < t[(n - 1) as usize]) as SaSint) << (SAINT_BIT - 1))
3464        | SUFFIX_GROUP_MARKER;
3465    d += 1;
3466    buckets[buckets_index2(
3467        t[(n - 1) as usize] as usize,
3468        usize::from(t[(n - 2) as usize] < t[(n - 1) as usize]),
3469    )] = d;
3470
3471    if threads == 1 || n < 65536 {
3472        d = partial_sorting_scan_left_to_right_32s_4k(t, sa, k, buckets, d, 0, n);
3473    } else {
3474        let mut cache = vec![ThreadCache::default(); n as usize];
3475        let mut block_start = 0;
3476        while block_start < n {
3477            let mut block_end = block_start + threads * PER_THREAD_CACHE_SIZE as SaSint;
3478            if block_end > n {
3479                block_end = n;
3480            }
3481            d = partial_sorting_scan_left_to_right_32s_4k_block_omp(
3482                t,
3483                sa,
3484                k,
3485                buckets,
3486                d,
3487                &mut cache,
3488                block_start,
3489                block_end - block_start,
3490                threads,
3491            );
3492            block_start = block_end;
3493        }
3494    }
3495
3496    d
3497}
3498
3499#[allow(dead_code)]
3500fn partial_sorting_scan_left_to_right_32s_1k_omp(
3501    t: &[SaSint],
3502    sa: &mut [SaSint],
3503    n: SaSint,
3504    buckets: &mut [SaSint],
3505    threads: SaSint,
3506    _thread_state: &mut [ThreadState],
3507) {
3508    let pos = buckets[t[(n - 1) as usize] as usize] as usize;
3509    buckets[t[(n - 1) as usize] as usize] += 1;
3510    sa[pos] = (n - 1)
3511        | ((usize::from(t[(n - 2) as usize] < t[(n - 1) as usize]) as SaSint) << (SAINT_BIT - 1));
3512
3513    if threads == 1 || n < 65536 {
3514        partial_sorting_scan_left_to_right_32s_1k(t, sa, buckets, 0, n);
3515    } else {
3516        let mut cache = vec![ThreadCache::default(); n as usize];
3517        let mut block_start = 0;
3518        while block_start < n {
3519            let mut block_end = block_start + threads * PER_THREAD_CACHE_SIZE as SaSint;
3520            if block_end > n {
3521                block_end = n;
3522            }
3523            partial_sorting_scan_left_to_right_32s_1k_block_omp(
3524                t,
3525                sa,
3526                buckets,
3527                &mut cache,
3528                block_start,
3529                block_end - block_start,
3530                threads,
3531            );
3532            block_start = block_end;
3533        }
3534    }
3535}
3536
3537#[allow(dead_code)]
3538fn partial_sorting_scan_right_to_left_32s_6k(
3539    t: &[SaSint],
3540    sa: &mut [SaSint],
3541    buckets: &mut [SaSint],
3542    mut d: SaSint,
3543    omp_block_start: SaSint,
3544    omp_block_size: SaSint,
3545) -> SaSint {
3546    if omp_block_size <= 0 {
3547        return d;
3548    }
3549
3550    let mut i = omp_block_start + omp_block_size - 1;
3551    let mut j = omp_block_start + 2 * 64 + 1;
3552
3553    while i >= j {
3554        let mut p2 = sa[i as usize];
3555        d += SaSint::from(p2 < 0);
3556        p2 &= SAINT_MAX;
3557        let v2 = buckets_index4(
3558            t[(p2 - 1) as usize] as usize,
3559            usize::from(t[(p2 - 2) as usize] > t[(p2 - 1) as usize]),
3560        );
3561        buckets[v2] -= 1;
3562        sa[buckets[v2] as usize] =
3563            (p2 - 1) | (((buckets[2 + v2] != d) as SaSint) << (SAINT_BIT - 1));
3564        buckets[2 + v2] = d;
3565
3566        let mut p3 = sa[(i - 1) as usize];
3567        d += SaSint::from(p3 < 0);
3568        p3 &= SAINT_MAX;
3569        let v3 = buckets_index4(
3570            t[(p3 - 1) as usize] as usize,
3571            usize::from(t[(p3 - 2) as usize] > t[(p3 - 1) as usize]),
3572        );
3573        buckets[v3] -= 1;
3574        sa[buckets[v3] as usize] =
3575            (p3 - 1) | (((buckets[2 + v3] != d) as SaSint) << (SAINT_BIT - 1));
3576        buckets[2 + v3] = d;
3577
3578        i -= 2;
3579    }
3580
3581    j -= 2 * 64 + 1;
3582    while i >= j {
3583        let mut p = sa[i as usize];
3584        d += SaSint::from(p < 0);
3585        p &= SAINT_MAX;
3586        let v = buckets_index4(
3587            t[(p - 1) as usize] as usize,
3588            usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
3589        );
3590        buckets[v] -= 1;
3591        sa[buckets[v] as usize] = (p - 1) | (((buckets[2 + v] != d) as SaSint) << (SAINT_BIT - 1));
3592        buckets[2 + v] = d;
3593        i -= 1;
3594    }
3595
3596    d
3597}
3598
3599#[allow(dead_code)]
3600fn partial_sorting_scan_right_to_left_32s_4k(
3601    t: &[SaSint],
3602    sa: &mut [SaSint],
3603    k: SaSint,
3604    buckets: &mut [SaSint],
3605    mut d: SaSint,
3606    omp_block_start: SaSint,
3607    omp_block_size: SaSint,
3608) -> SaSint {
3609    if omp_block_size <= 0 {
3610        return d;
3611    }
3612
3613    let k = k as usize;
3614    let mut i = omp_block_start + omp_block_size - 1;
3615    let mut j = omp_block_start + 2 * 64 + 1;
3616
3617    while i >= j {
3618        let mut p0 = sa[i as usize];
3619        if p0 > 0 {
3620            sa[i as usize] = 0;
3621            d += p0 >> (SUFFIX_GROUP_BIT - 1);
3622            p0 &= !SUFFIX_GROUP_MARKER;
3623            let v0 = buckets_index2(
3624                t[(p0 - 1) as usize] as usize,
3625                usize::from(t[(p0 - 2) as usize] > t[(p0 - 1) as usize]),
3626            );
3627            let c0 = t[(p0 - 1) as usize] as usize;
3628            buckets[3 * k + c0] -= 1;
3629            sa[buckets[3 * k + c0] as usize] = (p0 - 1)
3630                | ((usize::from(t[(p0 - 2) as usize] > t[(p0 - 1) as usize]) as SaSint)
3631                    << (SAINT_BIT - 1))
3632                | (((buckets[v0] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3633            buckets[v0] = d;
3634        }
3635
3636        let mut p1 = sa[(i - 1) as usize];
3637        if p1 > 0 {
3638            sa[(i - 1) as usize] = 0;
3639            d += p1 >> (SUFFIX_GROUP_BIT - 1);
3640            p1 &= !SUFFIX_GROUP_MARKER;
3641            let v1 = buckets_index2(
3642                t[(p1 - 1) as usize] as usize,
3643                usize::from(t[(p1 - 2) as usize] > t[(p1 - 1) as usize]),
3644            );
3645            let c1 = t[(p1 - 1) as usize] as usize;
3646            buckets[3 * k + c1] -= 1;
3647            sa[buckets[3 * k + c1] as usize] = (p1 - 1)
3648                | ((usize::from(t[(p1 - 2) as usize] > t[(p1 - 1) as usize]) as SaSint)
3649                    << (SAINT_BIT - 1))
3650                | (((buckets[v1] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3651            buckets[v1] = d;
3652        }
3653
3654        i -= 2;
3655    }
3656
3657    j -= 2 * 64 + 1;
3658    while i >= j {
3659        let mut p = sa[i as usize];
3660        if p > 0 {
3661            sa[i as usize] = 0;
3662            d += p >> (SUFFIX_GROUP_BIT - 1);
3663            p &= !SUFFIX_GROUP_MARKER;
3664            let v = buckets_index2(
3665                t[(p - 1) as usize] as usize,
3666                usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
3667            );
3668            let c = t[(p - 1) as usize] as usize;
3669            buckets[3 * k + c] -= 1;
3670            sa[buckets[3 * k + c] as usize] = (p - 1)
3671                | ((usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]) as SaSint)
3672                    << (SAINT_BIT - 1))
3673                | (((buckets[v] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3674            buckets[v] = d;
3675        }
3676        i -= 1;
3677    }
3678
3679    d
3680}
3681
3682#[allow(dead_code)]
3683fn partial_sorting_scan_right_to_left_32s_1k(
3684    t: &[SaSint],
3685    sa: &mut [SaSint],
3686    induction_bucket: &mut [SaSint],
3687    omp_block_start: SaSint,
3688    omp_block_size: SaSint,
3689) {
3690    if omp_block_size <= 0 {
3691        return;
3692    }
3693
3694    let mut i = omp_block_start + omp_block_size - 1;
3695    let mut j = omp_block_start + 2 * 64 + 1;
3696
3697    while i >= j {
3698        let p0 = sa[i as usize];
3699        if p0 > 0 {
3700            sa[i as usize] = 0;
3701            let c0 = t[(p0 - 1) as usize] as usize;
3702            induction_bucket[c0] -= 1;
3703            sa[induction_bucket[c0] as usize] = (p0 - 1)
3704                | ((usize::from(t[(p0 - 2) as usize] > t[(p0 - 1) as usize]) as SaSint)
3705                    << (SAINT_BIT - 1));
3706        }
3707
3708        let p1 = sa[(i - 1) as usize];
3709        if p1 > 0 {
3710            sa[(i - 1) as usize] = 0;
3711            let c1 = t[(p1 - 1) as usize] as usize;
3712            induction_bucket[c1] -= 1;
3713            sa[induction_bucket[c1] as usize] = (p1 - 1)
3714                | ((usize::from(t[(p1 - 2) as usize] > t[(p1 - 1) as usize]) as SaSint)
3715                    << (SAINT_BIT - 1));
3716        }
3717
3718        i -= 2;
3719    }
3720
3721    j -= 2 * 64 + 1;
3722    while i >= j {
3723        let p = sa[i as usize];
3724        if p > 0 {
3725            sa[i as usize] = 0;
3726            let c = t[(p - 1) as usize] as usize;
3727            induction_bucket[c] -= 1;
3728            sa[induction_bucket[c] as usize] = (p - 1)
3729                | ((usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]) as SaSint)
3730                    << (SAINT_BIT - 1));
3731        }
3732        i -= 1;
3733    }
3734}
3735
3736#[allow(dead_code)]
3737fn partial_sorting_scan_right_to_left_32s_6k_omp(
3738    t: &[SaSint],
3739    sa: &mut [SaSint],
3740    n: SaSint,
3741    buckets: &mut [SaSint],
3742    first_lms_suffix: SaSint,
3743    left_suffixes_count: SaSint,
3744    mut d: SaSint,
3745    threads: SaSint,
3746    _thread_state: &mut [ThreadState],
3747) -> SaSint {
3748    let scan_start = left_suffixes_count + 1;
3749    let scan_end = n - first_lms_suffix;
3750
3751    if threads == 1 || scan_end - scan_start < 65536 {
3752        d = partial_sorting_scan_right_to_left_32s_6k(
3753            t,
3754            sa,
3755            buckets,
3756            d,
3757            scan_start,
3758            scan_end - scan_start,
3759        );
3760    } else {
3761        let mut cache = vec![ThreadCache::default(); (scan_end - scan_start) as usize];
3762        let mut block_start = scan_end;
3763        while block_start > scan_start {
3764            let block_size =
3765                (block_start - scan_start).min(threads * PER_THREAD_CACHE_SIZE as SaSint);
3766            block_start -= block_size;
3767            d = partial_sorting_scan_right_to_left_32s_6k_block_omp(
3768                t,
3769                sa,
3770                buckets,
3771                d,
3772                &mut cache,
3773                block_start,
3774                block_size,
3775                threads,
3776            );
3777        }
3778    }
3779
3780    d
3781}
3782
3783#[allow(dead_code)]
3784fn partial_sorting_scan_right_to_left_32s_4k_omp(
3785    t: &[SaSint],
3786    sa: &mut [SaSint],
3787    n: SaSint,
3788    k: SaSint,
3789    buckets: &mut [SaSint],
3790    mut d: SaSint,
3791    threads: SaSint,
3792    _thread_state: &mut [ThreadState],
3793) -> SaSint {
3794    if threads == 1 || n < 65536 {
3795        d = partial_sorting_scan_right_to_left_32s_4k(t, sa, k, buckets, d, 0, n);
3796    } else {
3797        let mut cache = vec![ThreadCache::default(); n as usize];
3798        let mut block_start = n;
3799        while block_start > 0 {
3800            let block_size = block_start.min(threads * PER_THREAD_CACHE_SIZE as SaSint);
3801            block_start -= block_size;
3802            d = partial_sorting_scan_right_to_left_32s_4k_block_omp(
3803                t,
3804                sa,
3805                k,
3806                buckets,
3807                d,
3808                &mut cache,
3809                block_start,
3810                block_size,
3811                threads,
3812            );
3813        }
3814    }
3815
3816    d
3817}
3818
3819#[allow(dead_code)]
3820fn partial_sorting_scan_right_to_left_32s_1k_omp(
3821    t: &[SaSint],
3822    sa: &mut [SaSint],
3823    n: SaSint,
3824    buckets: &mut [SaSint],
3825    threads: SaSint,
3826    _thread_state: &mut [ThreadState],
3827) {
3828    if threads == 1 || n < 65536 {
3829        partial_sorting_scan_right_to_left_32s_1k(t, sa, buckets, 0, n);
3830    } else {
3831        let mut cache = vec![ThreadCache::default(); n as usize];
3832        let mut block_start = n;
3833        while block_start > 0 {
3834            let block_size = block_start.min(threads * PER_THREAD_CACHE_SIZE as SaSint);
3835            block_start -= block_size;
3836            partial_sorting_scan_right_to_left_32s_1k_block_omp(
3837                t,
3838                sa,
3839                buckets,
3840                &mut cache,
3841                block_start,
3842                block_size,
3843                threads,
3844            );
3845        }
3846    }
3847}
3848
3849#[allow(dead_code)]
3850fn partial_sorting_scan_left_to_right_32s_6k_block_gather(
3851    t: &[SaSint],
3852    sa: &mut [SaSint],
3853    cache: &mut [ThreadCache],
3854    omp_block_start: SaSint,
3855    omp_block_size: SaSint,
3856) {
3857    let mut i = omp_block_start;
3858    let mut j = omp_block_start + omp_block_size - 64 - 1;
3859
3860    while i < j {
3861        let p0 = sa[i as usize];
3862        cache[i as usize].index = p0;
3863        let p0 = p0 & SAINT_MAX;
3864        cache[i as usize].symbol = if p0 != 0 {
3865            buckets_index4(
3866                t[(p0 - 1) as usize] as usize,
3867                usize::from(t[(p0 - 2) as usize] >= t[(p0 - 1) as usize]),
3868            ) as SaSint
3869        } else {
3870            0
3871        };
3872
3873        let p1 = sa[(i + 1) as usize];
3874        cache[(i + 1) as usize].index = p1;
3875        let p1 = p1 & SAINT_MAX;
3876        cache[(i + 1) as usize].symbol = if p1 != 0 {
3877            buckets_index4(
3878                t[(p1 - 1) as usize] as usize,
3879                usize::from(t[(p1 - 2) as usize] >= t[(p1 - 1) as usize]),
3880            ) as SaSint
3881        } else {
3882            0
3883        };
3884
3885        i += 2;
3886    }
3887
3888    j += 64 + 1;
3889    while i < j {
3890        let p = sa[i as usize];
3891        cache[i as usize].index = p;
3892        let p = p & SAINT_MAX;
3893        cache[i as usize].symbol = if p != 0 {
3894            buckets_index4(
3895                t[(p - 1) as usize] as usize,
3896                usize::from(t[(p - 2) as usize] >= t[(p - 1) as usize]),
3897            ) as SaSint
3898        } else {
3899            0
3900        };
3901        i += 1;
3902    }
3903}
3904
3905#[allow(dead_code)]
3906fn partial_sorting_scan_left_to_right_32s_4k_block_gather(
3907    t: &[SaSint],
3908    sa: &mut [SaSint],
3909    cache: &mut [ThreadCache],
3910    omp_block_start: SaSint,
3911    omp_block_size: SaSint,
3912) {
3913    let mut i = omp_block_start;
3914    let mut j = omp_block_start + omp_block_size - 64 - 1;
3915
3916    while i < j {
3917        let mut symbol0 = SAINT_MIN;
3918        let mut p0 = sa[i as usize];
3919        if p0 > 0 {
3920            cache[i as usize].index = p0;
3921            p0 &= !SUFFIX_GROUP_MARKER;
3922            symbol0 = buckets_index2(
3923                t[(p0 - 1) as usize] as usize,
3924                usize::from(t[(p0 - 2) as usize] < t[(p0 - 1) as usize]),
3925            ) as SaSint;
3926            p0 = 0;
3927        }
3928        cache[i as usize].symbol = symbol0;
3929        sa[i as usize] = p0 & SAINT_MAX;
3930
3931        let mut symbol1 = SAINT_MIN;
3932        let mut p1 = sa[(i + 1) as usize];
3933        if p1 > 0 {
3934            cache[(i + 1) as usize].index = p1;
3935            p1 &= !SUFFIX_GROUP_MARKER;
3936            symbol1 = buckets_index2(
3937                t[(p1 - 1) as usize] as usize,
3938                usize::from(t[(p1 - 2) as usize] < t[(p1 - 1) as usize]),
3939            ) as SaSint;
3940            p1 = 0;
3941        }
3942        cache[(i + 1) as usize].symbol = symbol1;
3943        sa[(i + 1) as usize] = p1 & SAINT_MAX;
3944
3945        i += 2;
3946    }
3947
3948    j += 64 + 1;
3949    while i < j {
3950        let mut symbol = SAINT_MIN;
3951        let mut p = sa[i as usize];
3952        if p > 0 {
3953            cache[i as usize].index = p;
3954            p &= !SUFFIX_GROUP_MARKER;
3955            symbol = buckets_index2(
3956                t[(p - 1) as usize] as usize,
3957                usize::from(t[(p - 2) as usize] < t[(p - 1) as usize]),
3958            ) as SaSint;
3959            p = 0;
3960        }
3961        cache[i as usize].symbol = symbol;
3962        sa[i as usize] = p & SAINT_MAX;
3963        i += 1;
3964    }
3965}
3966
3967#[allow(dead_code)]
3968fn partial_sorting_scan_left_to_right_32s_1k_block_gather(
3969    t: &[SaSint],
3970    sa: &mut [SaSint],
3971    cache: &mut [ThreadCache],
3972    omp_block_start: SaSint,
3973    omp_block_size: SaSint,
3974) {
3975    let mut i = omp_block_start;
3976    let mut j = omp_block_start + omp_block_size - 64 - 1;
3977
3978    while i < j {
3979        let mut symbol0 = SAINT_MIN;
3980        let mut p0 = sa[i as usize];
3981        if p0 > 0 {
3982            cache[i as usize].index = (p0 - 1)
3983                | ((usize::from(t[(p0 - 2) as usize] < t[(p0 - 1) as usize]) as SaSint)
3984                    << (SAINT_BIT - 1));
3985            symbol0 = t[(p0 - 1) as usize];
3986            p0 = 0;
3987        }
3988        cache[i as usize].symbol = symbol0;
3989        sa[i as usize] = p0 & SAINT_MAX;
3990
3991        let mut symbol1 = SAINT_MIN;
3992        let mut p1 = sa[(i + 1) as usize];
3993        if p1 > 0 {
3994            cache[(i + 1) as usize].index = (p1 - 1)
3995                | ((usize::from(t[(p1 - 2) as usize] < t[(p1 - 1) as usize]) as SaSint)
3996                    << (SAINT_BIT - 1));
3997            symbol1 = t[(p1 - 1) as usize];
3998            p1 = 0;
3999        }
4000        cache[(i + 1) as usize].symbol = symbol1;
4001        sa[(i + 1) as usize] = p1 & SAINT_MAX;
4002
4003        i += 2;
4004    }
4005
4006    j += 64 + 1;
4007    while i < j {
4008        let mut symbol = SAINT_MIN;
4009        let mut p = sa[i as usize];
4010        if p > 0 {
4011            cache[i as usize].index = (p - 1)
4012                | ((usize::from(t[(p - 2) as usize] < t[(p - 1) as usize]) as SaSint)
4013                    << (SAINT_BIT - 1));
4014            symbol = t[(p - 1) as usize];
4015            p = 0;
4016        }
4017        cache[i as usize].symbol = symbol;
4018        sa[i as usize] = p & SAINT_MAX;
4019        i += 1;
4020    }
4021}
4022
4023#[allow(dead_code)]
4024fn partial_sorting_scan_right_to_left_32s_6k_block_gather(
4025    t: &[SaSint],
4026    sa: &mut [SaSint],
4027    cache: &mut [ThreadCache],
4028    omp_block_start: SaSint,
4029    omp_block_size: SaSint,
4030) {
4031    let mut i = omp_block_start;
4032    let mut j = omp_block_start + omp_block_size - 64 - 1;
4033
4034    while i < j {
4035        let p0 = sa[i as usize];
4036        cache[i as usize].index = p0;
4037        let p0 = p0 & SAINT_MAX;
4038        cache[i as usize].symbol = if p0 != 0 {
4039            buckets_index4(
4040                t[(p0 - 1) as usize] as usize,
4041                usize::from(t[(p0 - 2) as usize] > t[(p0 - 1) as usize]),
4042            ) as SaSint
4043        } else {
4044            0
4045        };
4046
4047        let p1 = sa[(i + 1) as usize];
4048        cache[(i + 1) as usize].index = p1;
4049        let p1 = p1 & SAINT_MAX;
4050        cache[(i + 1) as usize].symbol = if p1 != 0 {
4051            buckets_index4(
4052                t[(p1 - 1) as usize] as usize,
4053                usize::from(t[(p1 - 2) as usize] > t[(p1 - 1) as usize]),
4054            ) as SaSint
4055        } else {
4056            0
4057        };
4058
4059        i += 2;
4060    }
4061
4062    j += 64 + 1;
4063    while i < j {
4064        let p = sa[i as usize];
4065        cache[i as usize].index = p;
4066        let p = p & SAINT_MAX;
4067        cache[i as usize].symbol = if p != 0 {
4068            buckets_index4(
4069                t[(p - 1) as usize] as usize,
4070                usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
4071            ) as SaSint
4072        } else {
4073            0
4074        };
4075        i += 1;
4076    }
4077}
4078
4079#[allow(dead_code)]
4080fn partial_sorting_scan_right_to_left_32s_4k_block_gather(
4081    t: &[SaSint],
4082    sa: &mut [SaSint],
4083    cache: &mut [ThreadCache],
4084    omp_block_start: SaSint,
4085    omp_block_size: SaSint,
4086) {
4087    let mut i = omp_block_start;
4088    let mut j = omp_block_start + omp_block_size - 64 - 1;
4089
4090    while i < j {
4091        let mut symbol0 = SAINT_MIN;
4092        let mut p0 = sa[i as usize];
4093        if p0 > 0 {
4094            sa[i as usize] = 0;
4095            cache[i as usize].index = p0;
4096            p0 &= !SUFFIX_GROUP_MARKER;
4097            symbol0 = buckets_index2(
4098                t[(p0 - 1) as usize] as usize,
4099                usize::from(t[(p0 - 2) as usize] > t[(p0 - 1) as usize]),
4100            ) as SaSint;
4101        }
4102        cache[i as usize].symbol = symbol0;
4103
4104        let mut symbol1 = SAINT_MIN;
4105        let mut p1 = sa[(i + 1) as usize];
4106        if p1 > 0 {
4107            sa[(i + 1) as usize] = 0;
4108            cache[(i + 1) as usize].index = p1;
4109            p1 &= !SUFFIX_GROUP_MARKER;
4110            symbol1 = buckets_index2(
4111                t[(p1 - 1) as usize] as usize,
4112                usize::from(t[(p1 - 2) as usize] > t[(p1 - 1) as usize]),
4113            ) as SaSint;
4114        }
4115        cache[(i + 1) as usize].symbol = symbol1;
4116
4117        i += 2;
4118    }
4119
4120    j += 64 + 1;
4121    while i < j {
4122        let mut symbol = SAINT_MIN;
4123        let mut p = sa[i as usize];
4124        if p > 0 {
4125            sa[i as usize] = 0;
4126            cache[i as usize].index = p;
4127            p &= !SUFFIX_GROUP_MARKER;
4128            symbol = buckets_index2(
4129                t[(p - 1) as usize] as usize,
4130                usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
4131            ) as SaSint;
4132        }
4133        cache[i as usize].symbol = symbol;
4134        i += 1;
4135    }
4136}
4137
4138#[allow(dead_code)]
4139fn partial_sorting_scan_right_to_left_32s_1k_block_gather(
4140    t: &[SaSint],
4141    sa: &mut [SaSint],
4142    cache: &mut [ThreadCache],
4143    omp_block_start: SaSint,
4144    omp_block_size: SaSint,
4145) {
4146    let mut i = omp_block_start;
4147    let mut j = omp_block_start + omp_block_size - 64 - 1;
4148
4149    while i < j {
4150        let mut symbol0 = SAINT_MIN;
4151        let p0 = sa[i as usize];
4152        if p0 > 0 {
4153            sa[i as usize] = 0;
4154            cache[i as usize].index = (p0 - 1)
4155                | ((usize::from(t[(p0 - 2) as usize] > t[(p0 - 1) as usize]) as SaSint)
4156                    << (SAINT_BIT - 1));
4157            symbol0 = t[(p0 - 1) as usize];
4158        }
4159        cache[i as usize].symbol = symbol0;
4160
4161        let mut symbol1 = SAINT_MIN;
4162        let p1 = sa[(i + 1) as usize];
4163        if p1 > 0 {
4164            sa[(i + 1) as usize] = 0;
4165            cache[(i + 1) as usize].index = (p1 - 1)
4166                | ((usize::from(t[(p1 - 2) as usize] > t[(p1 - 1) as usize]) as SaSint)
4167                    << (SAINT_BIT - 1));
4168            symbol1 = t[(p1 - 1) as usize];
4169        }
4170        cache[(i + 1) as usize].symbol = symbol1;
4171
4172        i += 2;
4173    }
4174
4175    j += 64 + 1;
4176    while i < j {
4177        let mut symbol = SAINT_MIN;
4178        let p = sa[i as usize];
4179        if p > 0 {
4180            sa[i as usize] = 0;
4181            cache[i as usize].index = (p - 1)
4182                | ((usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]) as SaSint)
4183                    << (SAINT_BIT - 1));
4184            symbol = t[(p - 1) as usize];
4185        }
4186        cache[i as usize].symbol = symbol;
4187        i += 1;
4188    }
4189}
4190
4191#[allow(dead_code)]
4192fn partial_sorting_scan_left_to_right_32s_6k_block_sort(
4193    t: &[SaSint],
4194    buckets: &mut [SaSint],
4195    mut d: SaSint,
4196    cache: &mut [ThreadCache],
4197    omp_block_start: SaSint,
4198    omp_block_size: SaSint,
4199) -> SaSint {
4200    let mut i = omp_block_start;
4201    let omp_block_end = omp_block_start + omp_block_size;
4202    let mut j = omp_block_end - 64 - 1;
4203
4204    while i < j {
4205        let v0 = cache[i as usize].symbol as usize;
4206        let p0 = cache[i as usize].index;
4207        d += SaSint::from(p0 < 0);
4208        cache[i as usize].symbol = buckets[v0];
4209        buckets[v0] += 1;
4210        cache[i as usize].index =
4211            (p0 - 1) | (((buckets[2 + v0] != d) as SaSint) << (SAINT_BIT - 1));
4212        buckets[2 + v0] = d;
4213        if cache[i as usize].symbol < omp_block_end {
4214            let s = cache[i as usize].symbol as usize;
4215            let q = cache[i as usize].index & SAINT_MAX;
4216            cache[s].index = cache[i as usize].index;
4217            cache[s].symbol = buckets_index4(
4218                t[(q - 1) as usize] as usize,
4219                usize::from(t[(q - 2) as usize] >= t[(q - 1) as usize]),
4220            ) as SaSint;
4221        }
4222
4223        let v1 = cache[(i + 1) as usize].symbol as usize;
4224        let p1 = cache[(i + 1) as usize].index;
4225        d += SaSint::from(p1 < 0);
4226        cache[(i + 1) as usize].symbol = buckets[v1];
4227        buckets[v1] += 1;
4228        cache[(i + 1) as usize].index =
4229            (p1 - 1) | (((buckets[2 + v1] != d) as SaSint) << (SAINT_BIT - 1));
4230        buckets[2 + v1] = d;
4231        if cache[(i + 1) as usize].symbol < omp_block_end {
4232            let s = cache[(i + 1) as usize].symbol as usize;
4233            let q = cache[(i + 1) as usize].index & SAINT_MAX;
4234            cache[s].index = cache[(i + 1) as usize].index;
4235            cache[s].symbol = buckets_index4(
4236                t[(q - 1) as usize] as usize,
4237                usize::from(t[(q - 2) as usize] >= t[(q - 1) as usize]),
4238            ) as SaSint;
4239        }
4240
4241        i += 2;
4242    }
4243
4244    j += 64 + 1;
4245    while i < j {
4246        let v = cache[i as usize].symbol as usize;
4247        let p = cache[i as usize].index;
4248        d += SaSint::from(p < 0);
4249        cache[i as usize].symbol = buckets[v];
4250        buckets[v] += 1;
4251        cache[i as usize].index = (p - 1) | (((buckets[2 + v] != d) as SaSint) << (SAINT_BIT - 1));
4252        buckets[2 + v] = d;
4253        if cache[i as usize].symbol < omp_block_end {
4254            let s = cache[i as usize].symbol as usize;
4255            let q = cache[i as usize].index & SAINT_MAX;
4256            cache[s].index = cache[i as usize].index;
4257            cache[s].symbol = buckets_index4(
4258                t[(q - 1) as usize] as usize,
4259                usize::from(t[(q - 2) as usize] >= t[(q - 1) as usize]),
4260            ) as SaSint;
4261        }
4262        i += 1;
4263    }
4264
4265    d
4266}
4267
4268#[allow(dead_code)]
4269fn partial_sorting_scan_left_to_right_32s_4k_block_sort(
4270    t: &[SaSint],
4271    k: SaSint,
4272    buckets: &mut [SaSint],
4273    mut d: SaSint,
4274    cache: &mut [ThreadCache],
4275    omp_block_start: SaSint,
4276    omp_block_size: SaSint,
4277) -> SaSint {
4278    let k = k as usize;
4279    let mut i = omp_block_start;
4280    let omp_block_end = omp_block_start + omp_block_size;
4281    let mut j = omp_block_end - 64 - 1;
4282
4283    while i < j {
4284        for current in [i, i + 1] {
4285            let v = cache[current as usize].symbol;
4286            if v >= 0 {
4287                let p = cache[current as usize].index;
4288                d += p >> (SUFFIX_GROUP_BIT - 1);
4289                let bucket_index = (v >> 1) as usize;
4290                let v_usize = v as usize;
4291                cache[current as usize].symbol = buckets[2 * k + bucket_index];
4292                buckets[2 * k + bucket_index] += 1;
4293                cache[current as usize].index = (p - 1)
4294                    | ((v & 1) << (SAINT_BIT - 1))
4295                    | (((buckets[v_usize] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
4296                buckets[v_usize] = d;
4297                if cache[current as usize].symbol < omp_block_end {
4298                    let ni = cache[current as usize].symbol as usize;
4299                    let mut np = cache[current as usize].index;
4300                    if np > 0 {
4301                        cache[ni].index = np;
4302                        np &= !SUFFIX_GROUP_MARKER;
4303                        cache[ni].symbol = buckets_index2(
4304                            t[(np - 1) as usize] as usize,
4305                            usize::from(t[(np - 2) as usize] < t[(np - 1) as usize]),
4306                        ) as SaSint;
4307                        np = 0;
4308                    }
4309                    cache[current as usize].index = np & SAINT_MAX;
4310                }
4311            }
4312        }
4313        i += 2;
4314    }
4315
4316    j += 64 + 1;
4317    while i < j {
4318        let v = cache[i as usize].symbol;
4319        if v >= 0 {
4320            let p = cache[i as usize].index;
4321            d += p >> (SUFFIX_GROUP_BIT - 1);
4322            let bucket_index = (v >> 1) as usize;
4323            let v_usize = v as usize;
4324            cache[i as usize].symbol = buckets[2 * k + bucket_index];
4325            buckets[2 * k + bucket_index] += 1;
4326            cache[i as usize].index = (p - 1)
4327                | ((v & 1) << (SAINT_BIT - 1))
4328                | (((buckets[v_usize] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
4329            buckets[v_usize] = d;
4330            if cache[i as usize].symbol < omp_block_end {
4331                let ni = cache[i as usize].symbol as usize;
4332                let mut np = cache[i as usize].index;
4333                if np > 0 {
4334                    cache[ni].index = np;
4335                    np &= !SUFFIX_GROUP_MARKER;
4336                    cache[ni].symbol = buckets_index2(
4337                        t[(np - 1) as usize] as usize,
4338                        usize::from(t[(np - 2) as usize] < t[(np - 1) as usize]),
4339                    ) as SaSint;
4340                    np = 0;
4341                }
4342                cache[i as usize].index = np & SAINT_MAX;
4343            }
4344        }
4345        i += 1;
4346    }
4347
4348    d
4349}
4350
4351#[allow(dead_code)]
4352fn partial_sorting_scan_left_to_right_32s_1k_block_sort(
4353    t: &[SaSint],
4354    induction_bucket: &mut [SaSint],
4355    cache: &mut [ThreadCache],
4356    omp_block_start: SaSint,
4357    omp_block_size: SaSint,
4358) {
4359    let mut i = omp_block_start;
4360    let omp_block_end = omp_block_start + omp_block_size;
4361    let mut j = omp_block_end - 64 - 1;
4362
4363    while i < j {
4364        for current in [i, i + 1] {
4365            let v = cache[current as usize].symbol;
4366            if v >= 0 {
4367                cache[current as usize].symbol = induction_bucket[v as usize];
4368                induction_bucket[v as usize] += 1;
4369                if cache[current as usize].symbol < omp_block_end {
4370                    let ni = cache[current as usize].symbol as usize;
4371                    let mut np = cache[current as usize].index;
4372                    if np > 0 {
4373                        cache[ni].index = (np - 1)
4374                            | ((usize::from(t[(np - 2) as usize] < t[(np - 1) as usize])
4375                                as SaSint)
4376                                << (SAINT_BIT - 1));
4377                        cache[ni].symbol = t[(np - 1) as usize];
4378                        np = 0;
4379                    }
4380                    cache[current as usize].index = np & SAINT_MAX;
4381                }
4382            }
4383        }
4384        i += 2;
4385    }
4386
4387    j = omp_block_end;
4388    while i < j {
4389        let v = cache[i as usize].symbol;
4390        if v >= 0 {
4391            cache[i as usize].symbol = induction_bucket[v as usize];
4392            induction_bucket[v as usize] += 1;
4393            if cache[i as usize].symbol < omp_block_end {
4394                let ni = cache[i as usize].symbol as usize;
4395                let mut np = cache[i as usize].index;
4396                if np > 0 {
4397                    cache[ni].index = (np - 1)
4398                        | ((usize::from(t[(np - 2) as usize] < t[(np - 1) as usize]) as SaSint)
4399                            << (SAINT_BIT - 1));
4400                    cache[ni].symbol = t[(np - 1) as usize];
4401                    np = 0;
4402                }
4403                cache[i as usize].index = np & SAINT_MAX;
4404            }
4405        }
4406        i += 1;
4407    }
4408}
4409
4410#[allow(dead_code)]
4411fn partial_sorting_scan_right_to_left_32s_6k_block_sort(
4412    t: &[SaSint],
4413    buckets: &mut [SaSint],
4414    mut d: SaSint,
4415    cache: &mut [ThreadCache],
4416    omp_block_start: SaSint,
4417    omp_block_size: SaSint,
4418) -> SaSint {
4419    let mut i = omp_block_start + omp_block_size - 1;
4420    let mut j = omp_block_start + 64 + 1;
4421
4422    while i >= j {
4423        let v0 = cache[i as usize].symbol as usize;
4424        let p0 = cache[i as usize].index;
4425        d += SaSint::from(p0 < 0);
4426        buckets[v0] -= 1;
4427        cache[i as usize].symbol = buckets[v0];
4428        cache[i as usize].index =
4429            (p0 - 1) | (((buckets[2 + v0] != d) as SaSint) << (SAINT_BIT - 1));
4430        buckets[2 + v0] = d;
4431        if cache[i as usize].symbol >= omp_block_start {
4432            let s = cache[i as usize].symbol as usize;
4433            let q = cache[i as usize].index & SAINT_MAX;
4434            cache[s].index = cache[i as usize].index;
4435            cache[s].symbol = buckets_index4(
4436                t[(q - 1) as usize] as usize,
4437                usize::from(t[(q - 2) as usize] > t[(q - 1) as usize]),
4438            ) as SaSint;
4439        }
4440
4441        let v1 = cache[(i - 1) as usize].symbol as usize;
4442        let p1 = cache[(i - 1) as usize].index;
4443        d += SaSint::from(p1 < 0);
4444        buckets[v1] -= 1;
4445        cache[(i - 1) as usize].symbol = buckets[v1];
4446        cache[(i - 1) as usize].index =
4447            (p1 - 1) | (((buckets[2 + v1] != d) as SaSint) << (SAINT_BIT - 1));
4448        buckets[2 + v1] = d;
4449        if cache[(i - 1) as usize].symbol >= omp_block_start {
4450            let s = cache[(i - 1) as usize].symbol as usize;
4451            let q = cache[(i - 1) as usize].index & SAINT_MAX;
4452            cache[s].index = cache[(i - 1) as usize].index;
4453            cache[s].symbol = buckets_index4(
4454                t[(q - 1) as usize] as usize,
4455                usize::from(t[(q - 2) as usize] > t[(q - 1) as usize]),
4456            ) as SaSint;
4457        }
4458
4459        i -= 2;
4460    }
4461
4462    j -= 64 + 1;
4463    while i >= j {
4464        let v = cache[i as usize].symbol as usize;
4465        let p = cache[i as usize].index;
4466        d += SaSint::from(p < 0);
4467        buckets[v] -= 1;
4468        cache[i as usize].symbol = buckets[v];
4469        cache[i as usize].index = (p - 1) | (((buckets[2 + v] != d) as SaSint) << (SAINT_BIT - 1));
4470        buckets[2 + v] = d;
4471        if cache[i as usize].symbol >= omp_block_start {
4472            let s = cache[i as usize].symbol as usize;
4473            let q = cache[i as usize].index & SAINT_MAX;
4474            cache[s].index = cache[i as usize].index;
4475            cache[s].symbol = buckets_index4(
4476                t[(q - 1) as usize] as usize,
4477                usize::from(t[(q - 2) as usize] > t[(q - 1) as usize]),
4478            ) as SaSint;
4479        }
4480        i -= 1;
4481    }
4482
4483    d
4484}
4485
4486#[allow(dead_code)]
4487fn partial_sorting_scan_right_to_left_32s_4k_block_sort(
4488    t: &[SaSint],
4489    k: SaSint,
4490    buckets: &mut [SaSint],
4491    mut d: SaSint,
4492    cache: &mut [ThreadCache],
4493    omp_block_start: SaSint,
4494    omp_block_size: SaSint,
4495) -> SaSint {
4496    let k = k as usize;
4497    let mut i = omp_block_start + omp_block_size - 1;
4498    let mut j = omp_block_start + 64 + 1;
4499
4500    while i >= j {
4501        for current in [i, i - 1] {
4502            let v = cache[current as usize].symbol;
4503            if v >= 0 {
4504                let p = cache[current as usize].index;
4505                d += p >> (SUFFIX_GROUP_BIT - 1);
4506                let bucket_index = (v >> 1) as usize;
4507                let v_usize = v as usize;
4508                buckets[3 * k + bucket_index] -= 1;
4509                cache[current as usize].symbol = buckets[3 * k + bucket_index];
4510                cache[current as usize].index = (p - 1)
4511                    | ((v & 1) << (SAINT_BIT - 1))
4512                    | (((buckets[v_usize] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
4513                buckets[v_usize] = d;
4514                if cache[current as usize].symbol >= omp_block_start {
4515                    let ni = cache[current as usize].symbol as usize;
4516                    let mut np = cache[current as usize].index;
4517                    if np > 0 {
4518                        cache[current as usize].index = 0;
4519                        cache[ni].index = np;
4520                        np &= !SUFFIX_GROUP_MARKER;
4521                        cache[ni].symbol = buckets_index2(
4522                            t[(np - 1) as usize] as usize,
4523                            usize::from(t[(np - 2) as usize] > t[(np - 1) as usize]),
4524                        ) as SaSint;
4525                    }
4526                }
4527            }
4528        }
4529        i -= 2;
4530    }
4531
4532    j -= 64 + 1;
4533    while i >= j {
4534        let v = cache[i as usize].symbol;
4535        if v >= 0 {
4536            let p = cache[i as usize].index;
4537            d += p >> (SUFFIX_GROUP_BIT - 1);
4538            let bucket_index = (v >> 1) as usize;
4539            let v_usize = v as usize;
4540            buckets[3 * k + bucket_index] -= 1;
4541            cache[i as usize].symbol = buckets[3 * k + bucket_index];
4542            cache[i as usize].index = (p - 1)
4543                | ((v & 1) << (SAINT_BIT - 1))
4544                | (((buckets[v_usize] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
4545            buckets[v_usize] = d;
4546            if cache[i as usize].symbol >= omp_block_start {
4547                let ni = cache[i as usize].symbol as usize;
4548                let mut np = cache[i as usize].index;
4549                if np > 0 {
4550                    cache[i as usize].index = 0;
4551                    cache[ni].index = np;
4552                    np &= !SUFFIX_GROUP_MARKER;
4553                    cache[ni].symbol = buckets_index2(
4554                        t[(np - 1) as usize] as usize,
4555                        usize::from(t[(np - 2) as usize] > t[(np - 1) as usize]),
4556                    ) as SaSint;
4557                }
4558            }
4559        }
4560        i -= 1;
4561    }
4562
4563    d
4564}
4565
4566#[allow(dead_code)]
4567fn partial_sorting_scan_right_to_left_32s_1k_block_sort(
4568    t: &[SaSint],
4569    induction_bucket: &mut [SaSint],
4570    cache: &mut [ThreadCache],
4571    omp_block_start: SaSint,
4572    omp_block_size: SaSint,
4573) {
4574    let mut i = omp_block_start + omp_block_size - 1;
4575    let mut j = omp_block_start + 64 + 1;
4576
4577    while i >= j {
4578        for current in [i, i - 1] {
4579            let v = cache[current as usize].symbol;
4580            if v >= 0 {
4581                induction_bucket[v as usize] -= 1;
4582                cache[current as usize].symbol = induction_bucket[v as usize];
4583                if cache[current as usize].symbol >= omp_block_start {
4584                    let ni = cache[current as usize].symbol as usize;
4585                    let np = cache[current as usize].index;
4586                    if np > 0 {
4587                        cache[current as usize].index = 0;
4588                        cache[ni].index = (np - 1)
4589                            | ((usize::from(t[(np - 2) as usize] > t[(np - 1) as usize])
4590                                as SaSint)
4591                                << (SAINT_BIT - 1));
4592                        cache[ni].symbol = t[(np - 1) as usize];
4593                    }
4594                }
4595            }
4596        }
4597        i -= 2;
4598    }
4599
4600    j -= 64 + 1;
4601    while i >= j {
4602        let v = cache[i as usize].symbol;
4603        if v >= 0 {
4604            induction_bucket[v as usize] -= 1;
4605            cache[i as usize].symbol = induction_bucket[v as usize];
4606            if cache[i as usize].symbol >= omp_block_start {
4607                let ni = cache[i as usize].symbol as usize;
4608                let np = cache[i as usize].index;
4609                if np > 0 {
4610                    cache[i as usize].index = 0;
4611                    cache[ni].index = (np - 1)
4612                        | ((usize::from(t[(np - 2) as usize] > t[(np - 1) as usize]) as SaSint)
4613                            << (SAINT_BIT - 1));
4614                    cache[ni].symbol = t[(np - 1) as usize];
4615                }
4616            }
4617        }
4618        i -= 1;
4619    }
4620}
4621
4622#[allow(dead_code)]
4623fn partial_sorting_scan_left_to_right_32s_6k_block_omp(
4624    t: &[SaSint],
4625    sa: &mut [SaSint],
4626    buckets: &mut [SaSint],
4627    d: SaSint,
4628    cache: &mut [ThreadCache],
4629    block_start: SaSint,
4630    block_size: SaSint,
4631    threads: SaSint,
4632) -> SaSint {
4633    if block_size <= 0 {
4634        return d;
4635    }
4636    if threads == 1 || block_size < 16_384 {
4637        return partial_sorting_scan_left_to_right_32s_6k(
4638            t,
4639            sa,
4640            buckets,
4641            d,
4642            block_start,
4643            block_size,
4644        );
4645    }
4646
4647    let threads_usize = usize::try_from(threads)
4648        .expect("threads must be non-negative")
4649        .max(1);
4650    let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
4651    let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
4652    let omp_num_threads = threads_usize.min(block_size_usize.max(1));
4653    let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
4654
4655    for omp_thread_num in 0..omp_num_threads {
4656        let mut omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4657            omp_block_stride
4658        } else {
4659            block_size_usize - omp_thread_num * omp_block_stride
4660        };
4661        let omp_block_start = block_start_usize + omp_thread_num * omp_block_stride;
4662        if omp_block_size == 0 {
4663            omp_block_size = block_size_usize - (omp_block_start - block_start_usize);
4664        }
4665        partial_sorting_scan_left_to_right_32s_6k_block_gather(
4666            t,
4667            sa,
4668            &mut cache[omp_thread_num * omp_block_stride
4669                ..omp_thread_num * omp_block_stride + omp_block_size],
4670            omp_block_start as SaSint,
4671            omp_block_size as SaSint,
4672        );
4673    }
4674
4675    let d = partial_sorting_scan_left_to_right_32s_6k_block_sort(
4676        t,
4677        buckets,
4678        d,
4679        &mut cache[..block_size_usize],
4680        block_start,
4681        block_size,
4682    );
4683    place_cached_suffixes(sa, &cache[..block_size_usize], 0, block_size);
4684    d
4685}
4686
4687#[allow(dead_code)]
4688fn partial_sorting_scan_left_to_right_32s_4k_block_omp(
4689    t: &[SaSint],
4690    sa: &mut [SaSint],
4691    k: SaSint,
4692    buckets: &mut [SaSint],
4693    d: SaSint,
4694    cache: &mut [ThreadCache],
4695    block_start: SaSint,
4696    block_size: SaSint,
4697    threads: SaSint,
4698) -> SaSint {
4699    if block_size <= 0 {
4700        return d;
4701    }
4702    if threads == 1 || block_size < 16_384 {
4703        return partial_sorting_scan_left_to_right_32s_4k(
4704            t,
4705            sa,
4706            k,
4707            buckets,
4708            d,
4709            block_start,
4710            block_size,
4711        );
4712    }
4713
4714    let threads_usize = usize::try_from(threads)
4715        .expect("threads must be non-negative")
4716        .max(1);
4717    let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
4718    let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
4719    let omp_num_threads = threads_usize.min(block_size_usize.max(1));
4720    let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
4721
4722    for omp_thread_num in 0..omp_num_threads {
4723        let mut omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4724            omp_block_stride
4725        } else {
4726            block_size_usize - omp_thread_num * omp_block_stride
4727        };
4728        let omp_block_start = block_start_usize + omp_thread_num * omp_block_stride;
4729        if omp_block_size == 0 {
4730            omp_block_size = block_size_usize - (omp_block_start - block_start_usize);
4731        }
4732        partial_sorting_scan_left_to_right_32s_4k_block_gather(
4733            t,
4734            sa,
4735            &mut cache[omp_thread_num * omp_block_stride
4736                ..omp_thread_num * omp_block_stride + omp_block_size],
4737            omp_block_start as SaSint,
4738            omp_block_size as SaSint,
4739        );
4740    }
4741
4742    let cache = &mut cache[..block_size_usize];
4743    let d = partial_sorting_scan_left_to_right_32s_4k_block_sort(
4744        t,
4745        k,
4746        buckets,
4747        d,
4748        cache,
4749        block_start,
4750        block_size,
4751    );
4752    for entry in cache.iter() {
4753        if entry.symbol >= 0 {
4754            sa[entry.symbol as usize] = entry.index;
4755        }
4756    }
4757    d
4758}
4759
4760#[allow(dead_code)]
4761fn partial_sorting_scan_left_to_right_32s_1k_block_omp(
4762    t: &[SaSint],
4763    sa: &mut [SaSint],
4764    buckets: &mut [SaSint],
4765    cache: &mut [ThreadCache],
4766    block_start: SaSint,
4767    block_size: SaSint,
4768    threads: SaSint,
4769) {
4770    if block_size <= 0 {
4771        return;
4772    }
4773    if threads == 1 || block_size < 16_384 {
4774        partial_sorting_scan_left_to_right_32s_1k(t, sa, buckets, block_start, block_size);
4775        return;
4776    }
4777
4778    let threads_usize = usize::try_from(threads)
4779        .expect("threads must be non-negative")
4780        .max(1);
4781    let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
4782    let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
4783    let omp_num_threads = threads_usize.min(block_size_usize.max(1));
4784    let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
4785
4786    for omp_thread_num in 0..omp_num_threads {
4787        let mut omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4788            omp_block_stride
4789        } else {
4790            block_size_usize - omp_thread_num * omp_block_stride
4791        };
4792        let omp_block_start = block_start_usize + omp_thread_num * omp_block_stride;
4793        if omp_block_size == 0 {
4794            omp_block_size = block_size_usize - (omp_block_start - block_start_usize);
4795        }
4796        partial_sorting_scan_left_to_right_32s_1k_block_gather(
4797            t,
4798            sa,
4799            &mut cache[omp_thread_num * omp_block_stride
4800                ..omp_thread_num * omp_block_stride + omp_block_size],
4801            omp_block_start as SaSint,
4802            omp_block_size as SaSint,
4803        );
4804    }
4805
4806    let cache = &mut cache[..block_size_usize];
4807    partial_sorting_scan_left_to_right_32s_1k_block_sort(
4808        t,
4809        buckets,
4810        cache,
4811        block_start,
4812        block_size,
4813    );
4814    compact_and_place_cached_suffixes(sa, cache, block_start, block_size);
4815}
4816
4817#[allow(dead_code)]
4818fn partial_sorting_scan_right_to_left_32s_6k_block_omp(
4819    t: &[SaSint],
4820    sa: &mut [SaSint],
4821    buckets: &mut [SaSint],
4822    mut d: SaSint,
4823    cache: &mut [ThreadCache],
4824    block_start: SaSint,
4825    block_size: SaSint,
4826    threads: SaSint,
4827) -> SaSint {
4828    if block_size <= 0 {
4829        return d;
4830    }
4831    if threads == 1 || block_size < 16_384 {
4832        return partial_sorting_scan_right_to_left_32s_6k(
4833            t,
4834            sa,
4835            buckets,
4836            d,
4837            block_start,
4838            block_size,
4839        );
4840    }
4841
4842    let threads_usize = usize::try_from(threads)
4843        .expect("threads must be non-negative")
4844        .max(1);
4845    let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
4846    let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
4847    let omp_num_threads = threads_usize.min(block_size_usize.max(1));
4848    let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
4849
4850    for omp_thread_num in 0..omp_num_threads {
4851        let mut omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4852            omp_block_stride
4853        } else {
4854            block_size_usize - omp_thread_num * omp_block_stride
4855        };
4856        let omp_block_start = block_start_usize + omp_thread_num * omp_block_stride;
4857        if omp_block_size == 0 {
4858            omp_block_size = block_size_usize - (omp_block_start - block_start_usize);
4859        }
4860        partial_sorting_scan_right_to_left_32s_6k_block_gather(
4861            t,
4862            sa,
4863            &mut cache[omp_thread_num * omp_block_stride
4864                ..omp_thread_num * omp_block_stride + omp_block_size],
4865            omp_block_start as SaSint,
4866            omp_block_size as SaSint,
4867        );
4868    }
4869
4870    d = partial_sorting_scan_right_to_left_32s_6k_block_sort(
4871        t,
4872        buckets,
4873        d,
4874        &mut cache[..block_size_usize],
4875        block_start,
4876        block_size,
4877    );
4878    for omp_thread_num in 0..omp_num_threads {
4879        let mut omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4880            omp_block_stride
4881        } else {
4882            block_size_usize - omp_thread_num * omp_block_stride
4883        };
4884        let cache_start = omp_thread_num * omp_block_stride;
4885        if omp_block_size == 0 {
4886            omp_block_size = block_size_usize - cache_start;
4887        }
4888        for entry in &cache[cache_start..cache_start + omp_block_size] {
4889            sa[entry.symbol as usize] = entry.index;
4890        }
4891    }
4892    d
4893}
4894
4895#[allow(dead_code)]
4896fn partial_sorting_scan_right_to_left_32s_4k_block_omp(
4897    t: &[SaSint],
4898    sa: &mut [SaSint],
4899    k: SaSint,
4900    buckets: &mut [SaSint],
4901    mut d: SaSint,
4902    cache: &mut [ThreadCache],
4903    block_start: SaSint,
4904    block_size: SaSint,
4905    threads: SaSint,
4906) -> SaSint {
4907    if block_size <= 0 {
4908        return d;
4909    }
4910    if threads == 1 || block_size < 16_384 {
4911        return partial_sorting_scan_right_to_left_32s_4k(
4912            t,
4913            sa,
4914            k,
4915            buckets,
4916            d,
4917            block_start,
4918            block_size,
4919        );
4920    }
4921
4922    let threads_usize = usize::try_from(threads)
4923        .expect("threads must be non-negative")
4924        .max(1);
4925    let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
4926    let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
4927    let omp_num_threads = threads_usize.min(block_size_usize.max(1));
4928    let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
4929
4930    for omp_thread_num in 0..omp_num_threads {
4931        let mut omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4932            omp_block_stride
4933        } else {
4934            block_size_usize - omp_thread_num * omp_block_stride
4935        };
4936        let omp_block_start = block_start_usize + omp_thread_num * omp_block_stride;
4937        if omp_block_size == 0 {
4938            omp_block_size = block_size_usize - (omp_block_start - block_start_usize);
4939        }
4940        partial_sorting_scan_right_to_left_32s_4k_block_gather(
4941            t,
4942            sa,
4943            &mut cache[omp_thread_num * omp_block_stride
4944                ..omp_thread_num * omp_block_stride + omp_block_size],
4945            omp_block_start as SaSint,
4946            omp_block_size as SaSint,
4947        );
4948    }
4949
4950    d = partial_sorting_scan_right_to_left_32s_4k_block_sort(
4951        t,
4952        k,
4953        buckets,
4954        d,
4955        &mut cache[..block_size_usize],
4956        block_start,
4957        block_size,
4958    );
4959    let mut write = 0usize;
4960    for read in 0..block_size_usize {
4961        let entry = cache[read];
4962        if entry.symbol >= 0 {
4963            cache[write] = entry;
4964            write += 1;
4965        }
4966    }
4967    for entry in &cache[..write] {
4968        sa[entry.symbol as usize] = entry.index;
4969    }
4970    d
4971}
4972
4973#[allow(dead_code)]
4974fn partial_sorting_scan_right_to_left_32s_1k_block_omp(
4975    t: &[SaSint],
4976    sa: &mut [SaSint],
4977    buckets: &mut [SaSint],
4978    cache: &mut [ThreadCache],
4979    block_start: SaSint,
4980    block_size: SaSint,
4981    threads: SaSint,
4982) {
4983    if block_size <= 0 {
4984        return;
4985    }
4986    if threads == 1 || block_size < 16_384 {
4987        partial_sorting_scan_right_to_left_32s_1k(t, sa, buckets, block_start, block_size);
4988        return;
4989    }
4990
4991    let threads_usize = usize::try_from(threads)
4992        .expect("threads must be non-negative")
4993        .max(1);
4994    let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
4995    let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
4996    let omp_num_threads = threads_usize.min(block_size_usize.max(1));
4997    let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
4998
4999    for omp_thread_num in 0..omp_num_threads {
5000        let mut omp_block_size = if omp_thread_num + 1 < omp_num_threads {
5001            omp_block_stride
5002        } else {
5003            block_size_usize - omp_thread_num * omp_block_stride
5004        };
5005        let omp_block_start = block_start_usize + omp_thread_num * omp_block_stride;
5006        if omp_block_size == 0 {
5007            omp_block_size = block_size_usize - (omp_block_start - block_start_usize);
5008        }
5009        partial_sorting_scan_right_to_left_32s_1k_block_gather(
5010            t,
5011            sa,
5012            &mut cache[omp_thread_num * omp_block_stride
5013                ..omp_thread_num * omp_block_stride + omp_block_size],
5014            omp_block_start as SaSint,
5015            omp_block_size as SaSint,
5016        );
5017    }
5018
5019    let cache = &mut cache[..block_size_usize];
5020    partial_sorting_scan_right_to_left_32s_1k_block_sort(
5021        t,
5022        buckets,
5023        cache,
5024        block_start,
5025        block_size,
5026    );
5027    compact_and_place_cached_suffixes(sa, cache, block_start, block_size);
5028}
5029
5030#[allow(dead_code)]
5031fn partial_sorting_gather_lms_suffixes_32s_4k(
5032    sa: &mut [SaSint],
5033    omp_block_start: SaSint,
5034    omp_block_size: SaSint,
5035) -> SaSint {
5036    let mut i = omp_block_start;
5037    let mut j = omp_block_start + omp_block_size - 3;
5038    let mut l = omp_block_start;
5039
5040    while i < j {
5041        let s0 = sa[i as usize] as SaUint;
5042        sa[l as usize] = (s0.wrapping_sub(SUFFIX_GROUP_MARKER as SaUint)
5043            & !(SUFFIX_GROUP_MARKER as SaUint)) as SaSint;
5044        l += SaSint::from((s0 as SaSint) < 0);
5045
5046        let s1 = sa[(i + 1) as usize] as SaUint;
5047        sa[l as usize] = (s1.wrapping_sub(SUFFIX_GROUP_MARKER as SaUint)
5048            & !(SUFFIX_GROUP_MARKER as SaUint)) as SaSint;
5049        l += SaSint::from((s1 as SaSint) < 0);
5050
5051        let s2 = sa[(i + 2) as usize] as SaUint;
5052        sa[l as usize] = (s2.wrapping_sub(SUFFIX_GROUP_MARKER as SaUint)
5053            & !(SUFFIX_GROUP_MARKER as SaUint)) as SaSint;
5054        l += SaSint::from((s2 as SaSint) < 0);
5055
5056        let s3 = sa[(i + 3) as usize] as SaUint;
5057        sa[l as usize] = (s3.wrapping_sub(SUFFIX_GROUP_MARKER as SaUint)
5058            & !(SUFFIX_GROUP_MARKER as SaUint)) as SaSint;
5059        l += SaSint::from((s3 as SaSint) < 0);
5060
5061        i += 4;
5062    }
5063
5064    j += 3;
5065    while i < j {
5066        let s = sa[i as usize] as SaUint;
5067        sa[l as usize] = (s.wrapping_sub(SUFFIX_GROUP_MARKER as SaUint)
5068            & !(SUFFIX_GROUP_MARKER as SaUint)) as SaSint;
5069        l += SaSint::from((s as SaSint) < 0);
5070        i += 1;
5071    }
5072
5073    l
5074}
5075
5076#[allow(dead_code)]
5077fn partial_sorting_gather_lms_suffixes_32s_1k(
5078    sa: &mut [SaSint],
5079    omp_block_start: SaSint,
5080    omp_block_size: SaSint,
5081) -> SaSint {
5082    let mut i = omp_block_start;
5083    let mut j = omp_block_start + omp_block_size - 3;
5084    let mut l = omp_block_start;
5085
5086    while i < j {
5087        let s0 = sa[i as usize];
5088        sa[l as usize] = s0 & SAINT_MAX;
5089        l += SaSint::from(s0 < 0);
5090
5091        let s1 = sa[(i + 1) as usize];
5092        sa[l as usize] = s1 & SAINT_MAX;
5093        l += SaSint::from(s1 < 0);
5094
5095        let s2 = sa[(i + 2) as usize];
5096        sa[l as usize] = s2 & SAINT_MAX;
5097        l += SaSint::from(s2 < 0);
5098
5099        let s3 = sa[(i + 3) as usize];
5100        sa[l as usize] = s3 & SAINT_MAX;
5101        l += SaSint::from(s3 < 0);
5102
5103        i += 4;
5104    }
5105
5106    j += 3;
5107    while i < j {
5108        let s = sa[i as usize];
5109        sa[l as usize] = s & SAINT_MAX;
5110        l += SaSint::from(s < 0);
5111        i += 1;
5112    }
5113
5114    l
5115}
5116
5117#[allow(dead_code)]
5118fn partial_sorting_gather_lms_suffixes_32s_4k_omp(
5119    sa: &mut [SaSint],
5120    n: SaSint,
5121    threads: SaSint,
5122    thread_state: &mut [ThreadState],
5123) {
5124    let n_usize = usize::try_from(n).expect("n must be non-negative");
5125    let thread_count = if threads > 1 && n >= 65_536 {
5126        usize::try_from(threads)
5127            .expect("threads must be non-negative")
5128            .min(thread_state.len())
5129            .max(1)
5130    } else {
5131        1
5132    };
5133
5134    if thread_count == 1 {
5135        let _ = partial_sorting_gather_lms_suffixes_32s_4k(sa, 0, n);
5136        return;
5137    }
5138
5139    let block_stride = (n_usize / thread_count) & !15usize;
5140    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
5141        let block_start = thread * block_stride;
5142        let block_size = if thread + 1 < thread_count {
5143            block_stride
5144        } else {
5145            n_usize - block_start
5146        };
5147        state.position = block_start as SaSint;
5148        state.count = partial_sorting_gather_lms_suffixes_32s_4k(
5149            sa,
5150            block_start as SaSint,
5151            block_size as SaSint,
5152        ) - block_start as SaSint;
5153    }
5154
5155    let mut position = 0usize;
5156    for (thread, state) in thread_state.iter().take(thread_count).enumerate() {
5157        let count = usize::try_from(state.count).expect("count must be non-negative");
5158        let src = usize::try_from(state.position).expect("position must be non-negative");
5159        if thread > 0 && count > 0 {
5160            sa.copy_within(src..src + count, position);
5161        }
5162        position += count;
5163    }
5164}
5165
5166#[allow(dead_code)]
5167fn partial_sorting_gather_lms_suffixes_32s_1k_omp(
5168    sa: &mut [SaSint],
5169    n: SaSint,
5170    threads: SaSint,
5171    thread_state: &mut [ThreadState],
5172) {
5173    let n_usize = usize::try_from(n).expect("n must be non-negative");
5174    let thread_count = if threads > 1 && n >= 65_536 {
5175        usize::try_from(threads)
5176            .expect("threads must be non-negative")
5177            .min(thread_state.len())
5178            .max(1)
5179    } else {
5180        1
5181    };
5182
5183    if thread_count == 1 {
5184        let _ = partial_sorting_gather_lms_suffixes_32s_1k(sa, 0, n);
5185        return;
5186    }
5187
5188    let block_stride = (n_usize / thread_count) & !15usize;
5189    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
5190        let block_start = thread * block_stride;
5191        let block_size = if thread + 1 < thread_count {
5192            block_stride
5193        } else {
5194            n_usize - block_start
5195        };
5196        state.position = block_start as SaSint;
5197        state.count = partial_sorting_gather_lms_suffixes_32s_1k(
5198            sa,
5199            block_start as SaSint,
5200            block_size as SaSint,
5201        ) - block_start as SaSint;
5202    }
5203
5204    let mut position = 0usize;
5205    for (thread, state) in thread_state.iter().take(thread_count).enumerate() {
5206        let count = usize::try_from(state.count).expect("count must be non-negative");
5207        let src = usize::try_from(state.position).expect("position must be non-negative");
5208        if thread > 0 && count > 0 {
5209            sa.copy_within(src..src + count, position);
5210        }
5211        position += count;
5212    }
5213}
5214
5215#[allow(dead_code)]
5216fn partial_gsa_scan_right_to_left_16u(
5217    t: &[u16],
5218    sa: &mut [SaSint],
5219    buckets: &mut [SaSint],
5220    mut d: SaSint,
5221    omp_block_start: SaSint,
5222    omp_block_size: SaSint,
5223) -> SaSint {
5224    let mut i = (omp_block_start + omp_block_size - 1) as isize;
5225    let mut j = (omp_block_start + 64 + 1) as isize;
5226    while i >= j {
5227        let mut p0 = sa[i as usize];
5228        d += SaSint::from(p0 < 0);
5229        p0 &= SAINT_MAX;
5230        let v0 = buckets_index2(
5231            t[(p0 - 1) as usize] as usize,
5232            usize::from(t[(p0 - 2) as usize] > t[(p0 - 1) as usize]),
5233        );
5234        if v0 != 1 {
5235            let mark0 = if buckets[2 * ALPHABET_SIZE + v0] != d {
5236                SAINT_MIN
5237            } else {
5238                0
5239            };
5240            buckets[v0] -= 1;
5241            sa[buckets[v0] as usize] = (p0 - 1) | mark0;
5242            buckets[2 * ALPHABET_SIZE + v0] = d;
5243        }
5244
5245        let mut p1 = sa[(i - 1) as usize];
5246        d += SaSint::from(p1 < 0);
5247        p1 &= SAINT_MAX;
5248        let v1 = buckets_index2(
5249            t[(p1 - 1) as usize] as usize,
5250            usize::from(t[(p1 - 2) as usize] > t[(p1 - 1) as usize]),
5251        );
5252        if v1 != 1 {
5253            let mark1 = if buckets[2 * ALPHABET_SIZE + v1] != d {
5254                SAINT_MIN
5255            } else {
5256                0
5257            };
5258            buckets[v1] -= 1;
5259            sa[buckets[v1] as usize] = (p1 - 1) | mark1;
5260            buckets[2 * ALPHABET_SIZE + v1] = d;
5261        }
5262
5263        i -= 2;
5264    }
5265
5266    j -= 64 + 1;
5267    while i >= j {
5268        let mut p = sa[i as usize];
5269        d += SaSint::from(p < 0);
5270        p &= SAINT_MAX;
5271        let v = buckets_index2(
5272            t[(p - 1) as usize] as usize,
5273            usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
5274        );
5275        if v != 1 {
5276            let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
5277                SAINT_MIN
5278            } else {
5279                0
5280            };
5281            buckets[v] -= 1;
5282            sa[buckets[v] as usize] = (p - 1) | mark;
5283            buckets[2 * ALPHABET_SIZE + v] = d;
5284        }
5285        i -= 1;
5286    }
5287
5288    d
5289}
5290
5291#[allow(dead_code)]
5292fn partial_gsa_scan_right_to_left_16u_block_omp(
5293    t: &[u16],
5294    sa: &mut [SaSint],
5295    k: SaSint,
5296    buckets: &mut [SaSint],
5297    d: SaSint,
5298    block_start: SaSint,
5299    block_size: SaSint,
5300    threads: SaSint,
5301    thread_state: &mut [ThreadState],
5302) -> SaSint {
5303    let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
5304        usize::try_from(threads)
5305            .expect("threads must be non-negative")
5306            .min(thread_state.len())
5307    } else {
5308        1
5309    };
5310    if thread_count <= 1 {
5311        return partial_gsa_scan_right_to_left_16u(t, sa, buckets, d, block_start, block_size);
5312    }
5313
5314    let width = 2 * k as usize;
5315    let distinct_offset = 2 * ALPHABET_SIZE;
5316    let block_stride = (block_size / thread_count as SaSint) & !15;
5317
5318    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
5319        let local_start = thread as SaSint * block_stride;
5320        let local_size = if thread + 1 < thread_count {
5321            block_stride
5322        } else {
5323            block_size - local_start
5324        };
5325        let mut local_state = ThreadState::default();
5326        state.position = partial_sorting_scan_right_to_left_16u_block_prepare(
5327            t,
5328            sa,
5329            k,
5330            &mut state.buckets,
5331            &mut state.cache,
5332            block_start + local_start,
5333            local_size,
5334            &mut local_state,
5335        );
5336        state.count = local_state.cache_entries as SaSint;
5337    }
5338
5339    let mut next_d = d;
5340    for state in thread_state.iter_mut().take(thread_count).rev() {
5341        for c in 0..width {
5342            let a = buckets[c];
5343            let b = state.buckets[c];
5344            buckets[c] = a - b;
5345            state.buckets[c] = a;
5346        }
5347
5348        next_d -= 1;
5349        for c in 0..width {
5350            let offset = distinct_offset + c;
5351            let a = buckets[offset];
5352            let b = state.buckets[offset];
5353            let shifted = b + next_d;
5354            buckets[offset] = if b > 0 { shifted } else { a };
5355            state.buckets[offset] = a;
5356        }
5357        next_d += 1 + state.position;
5358        state.position = next_d - state.position;
5359    }
5360
5361    for state in thread_state.iter_mut().take(thread_count) {
5362        partial_gsa_scan_right_to_left_16u_block_place(
5363            sa,
5364            &mut state.buckets,
5365            &state.cache,
5366            state.count,
5367            state.position,
5368        );
5369    }
5370
5371    next_d
5372}
5373
5374#[allow(dead_code)]
5375fn partial_gsa_scan_right_to_left_16u_omp(
5376    t: &[u16],
5377    sa: &mut [SaSint],
5378    n: SaSint,
5379    k: SaSint,
5380    buckets: &mut [SaSint],
5381    first_lms_suffix: SaSint,
5382    left_suffixes_count: SaSint,
5383    d: SaSint,
5384    threads: SaSint,
5385) {
5386    let scan_start = left_suffixes_count + 1;
5387    let scan_end = n - first_lms_suffix;
5388
5389    if threads == 1 || scan_end - scan_start < 65536 {
5390        partial_gsa_scan_right_to_left_16u(t, sa, buckets, d, scan_start, scan_end - scan_start);
5391    } else {
5392        let mut d = d;
5393        let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
5394        let mut block_start = scan_end - 1;
5395        while block_start >= scan_start {
5396            if sa[block_start as usize] == 0 {
5397                block_start -= 1;
5398            } else {
5399                let block_limit = threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
5400                let mut block_max_end = block_start - block_limit;
5401                if block_max_end < scan_start {
5402                    block_max_end = scan_start - 1;
5403                }
5404                let mut block_end = block_start - 1;
5405                while block_end > block_max_end && sa[block_end as usize] != 0 {
5406                    block_end -= 1;
5407                }
5408                let block_size = block_start - block_end;
5409
5410                if block_size < 32 {
5411                    while block_start > block_end {
5412                        let mut p = sa[block_start as usize];
5413                        d += SaSint::from(p < 0);
5414                        p &= SAINT_MAX;
5415                        let v = buckets_index2(
5416                            t[(p - 1) as usize] as usize,
5417                            usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
5418                        );
5419                        if v != 1 {
5420                            let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
5421                                SAINT_MIN
5422                            } else {
5423                                0
5424                            };
5425                            buckets[v] -= 1;
5426                            sa[buckets[v] as usize] = (p - 1) | mark;
5427                            buckets[2 * ALPHABET_SIZE + v] = d;
5428                        }
5429                        block_start -= 1;
5430                    }
5431                } else {
5432                    d = partial_gsa_scan_right_to_left_16u_block_omp(
5433                        t,
5434                        sa,
5435                        k,
5436                        buckets,
5437                        d,
5438                        block_end + 1,
5439                        block_size,
5440                        threads,
5441                        &mut thread_state,
5442                    );
5443                    block_start = block_end;
5444                }
5445            }
5446        }
5447    }
5448}
5449
5450#[allow(dead_code)]
5451fn partial_sorting_shift_markers_16u_omp(
5452    sa: &mut [SaSint],
5453    n: SaSint,
5454    buckets: &[SaSint],
5455    threads: SaSint,
5456) {
5457    let thread_count = if threads > 1 && n >= 65536 {
5458        usize::try_from(threads).expect("threads must be positive")
5459    } else {
5460        1
5461    };
5462    let c_step = buckets_index2(1, 0) as isize;
5463    let c_min = buckets_index2(1, 0) as isize;
5464    let c_max = buckets_index2(ALPHABET_SIZE - 1, 0) as isize;
5465    for t in 0..thread_count {
5466        let mut c = c_max - (t as isize * c_step);
5467        while c >= c_min {
5468            let c_usize = c as usize;
5469            let mut s = SAINT_MIN;
5470            let mut i = buckets[4 * ALPHABET_SIZE + c_usize] as isize - 1;
5471            let mut j = buckets[c_usize - buckets_index2(1, 0)] as isize + 3;
5472            while i >= j {
5473                let p0 = sa[i as usize];
5474                let q0 = (p0 & SAINT_MIN) ^ s;
5475                s ^= q0;
5476                sa[i as usize] = p0 ^ q0;
5477
5478                let p1 = sa[(i - 1) as usize];
5479                let q1 = (p1 & SAINT_MIN) ^ s;
5480                s ^= q1;
5481                sa[(i - 1) as usize] = p1 ^ q1;
5482
5483                let p2 = sa[(i - 2) as usize];
5484                let q2 = (p2 & SAINT_MIN) ^ s;
5485                s ^= q2;
5486                sa[(i - 2) as usize] = p2 ^ q2;
5487
5488                let p3 = sa[(i - 3) as usize];
5489                let q3 = (p3 & SAINT_MIN) ^ s;
5490                s ^= q3;
5491                sa[(i - 3) as usize] = p3 ^ q3;
5492
5493                i -= 4;
5494            }
5495
5496            j -= 3;
5497            while i >= j {
5498                let p = sa[i as usize];
5499                let q = (p & SAINT_MIN) ^ s;
5500                s ^= q;
5501                sa[i as usize] = p ^ q;
5502                i -= 1;
5503            }
5504
5505            c -= c_step * thread_count as isize;
5506        }
5507    }
5508}
5509
5510#[allow(dead_code)]
5511fn induce_partial_order_16u_omp(
5512    t: &[u16],
5513    sa: &mut [SaSint],
5514    n: SaSint,
5515    k: SaSint,
5516    flags: SaSint,
5517    buckets: &mut [SaSint],
5518    first_lms_suffix: SaSint,
5519    left_suffixes_count: SaSint,
5520    threads: SaSint,
5521) {
5522    buckets[2 * ALPHABET_SIZE..4 * ALPHABET_SIZE].fill(0);
5523
5524    if (flags & LIBSAIS_FLAGS_GSA) != 0 {
5525        let marker = 4 * ALPHABET_SIZE + buckets_index2(0, 1);
5526        buckets[marker] = buckets[4 * ALPHABET_SIZE + buckets_index2(1, 1)] - 1;
5527        flip_suffix_markers_omp(sa, buckets[marker], threads);
5528    }
5529
5530    let d = partial_sorting_scan_left_to_right_16u_omp(
5531        t,
5532        sa,
5533        n,
5534        k,
5535        buckets,
5536        left_suffixes_count,
5537        0,
5538        threads,
5539    );
5540    partial_sorting_shift_markers_16u_omp(sa, n, buckets, threads);
5541
5542    if (flags & LIBSAIS_FLAGS_GSA) != 0 {
5543        partial_gsa_scan_right_to_left_16u_omp(
5544            t,
5545            sa,
5546            n,
5547            k,
5548            buckets,
5549            first_lms_suffix,
5550            left_suffixes_count,
5551            d,
5552            threads,
5553        );
5554
5555        if t[first_lms_suffix as usize] == 0 {
5556            let count = (buckets[buckets_index2(1, 1)] - 1) as usize;
5557            sa.copy_within(0..count, 1);
5558            sa[0] = first_lms_suffix | SAINT_MIN;
5559        }
5560
5561        buckets[buckets_index2(0, 1)] = 0;
5562    } else {
5563        partial_sorting_scan_right_to_left_16u_omp(
5564            t,
5565            sa,
5566            n,
5567            k,
5568            buckets,
5569            first_lms_suffix,
5570            left_suffixes_count,
5571            d,
5572            threads,
5573        );
5574    }
5575}
5576
5577#[allow(dead_code)]
5578fn induce_partial_order_32s_6k_omp(
5579    t: &[SaSint],
5580    sa: &mut [SaSint],
5581    n: SaSint,
5582    k: SaSint,
5583    buckets: &mut [SaSint],
5584    first_lms_suffix: SaSint,
5585    left_suffixes_count: SaSint,
5586    threads: SaSint,
5587    thread_state: &mut [ThreadState],
5588) {
5589    let d = partial_sorting_scan_left_to_right_32s_6k_omp(
5590        t,
5591        sa,
5592        n,
5593        buckets,
5594        left_suffixes_count,
5595        0,
5596        threads,
5597        thread_state,
5598    );
5599    partial_sorting_shift_markers_32s_6k_omp(sa, k, buckets, threads);
5600    partial_sorting_shift_buckets_32s_6k(k, buckets);
5601    partial_sorting_scan_right_to_left_32s_6k_omp(
5602        t,
5603        sa,
5604        n,
5605        buckets,
5606        first_lms_suffix,
5607        left_suffixes_count,
5608        d,
5609        threads,
5610        thread_state,
5611    );
5612}
5613
5614#[allow(dead_code)]
5615fn induce_partial_order_32s_4k_omp(
5616    t: &[SaSint],
5617    sa: &mut [SaSint],
5618    n: SaSint,
5619    k: SaSint,
5620    buckets: &mut [SaSint],
5621    threads: SaSint,
5622    thread_state: &mut [ThreadState],
5623) {
5624    buckets[..2 * k as usize].fill(0);
5625    let d = partial_sorting_scan_left_to_right_32s_4k_omp(
5626        t,
5627        sa,
5628        n,
5629        k,
5630        buckets,
5631        0,
5632        threads,
5633        thread_state,
5634    );
5635    partial_sorting_shift_markers_32s_4k(sa, n);
5636    partial_sorting_scan_right_to_left_32s_4k_omp(t, sa, n, k, buckets, d, threads, thread_state);
5637    partial_sorting_gather_lms_suffixes_32s_4k_omp(sa, n, threads, thread_state);
5638}
5639
5640#[allow(dead_code)]
5641fn induce_partial_order_32s_2k_omp(
5642    t: &[SaSint],
5643    sa: &mut [SaSint],
5644    n: SaSint,
5645    k: SaSint,
5646    buckets: &mut [SaSint],
5647    threads: SaSint,
5648    thread_state: &mut [ThreadState],
5649) {
5650    let k = k as usize;
5651    let (left, right) = buckets.split_at_mut(k);
5652    partial_sorting_scan_left_to_right_32s_1k_omp(t, sa, n, right, threads, thread_state);
5653    partial_sorting_scan_right_to_left_32s_1k_omp(t, sa, n, left, threads, thread_state);
5654    partial_sorting_gather_lms_suffixes_32s_1k_omp(sa, n, threads, thread_state);
5655}
5656
5657#[allow(dead_code)]
5658fn induce_partial_order_32s_1k_omp(
5659    t: &[SaSint],
5660    sa: &mut [SaSint],
5661    n: SaSint,
5662    k: SaSint,
5663    buckets: &mut [SaSint],
5664    threads: SaSint,
5665    thread_state: &mut [ThreadState],
5666) {
5667    count_suffixes_32s(t, n, k, buckets);
5668    initialize_buckets_start_32s_1k(k, buckets);
5669    partial_sorting_scan_left_to_right_32s_1k_omp(t, sa, n, buckets, threads, thread_state);
5670
5671    count_suffixes_32s(t, n, k, buckets);
5672    initialize_buckets_end_32s_1k(k, buckets);
5673    partial_sorting_scan_right_to_left_32s_1k_omp(t, sa, n, buckets, threads, thread_state);
5674
5675    partial_sorting_gather_lms_suffixes_32s_1k_omp(sa, n, threads, thread_state);
5676}
5677
5678#[allow(dead_code)]
5679fn final_sorting_scan_left_to_right_16u(
5680    t: &[u16],
5681    sa: &mut [SaSint],
5682    induction_bucket: &mut [SaSint],
5683    omp_block_start: SaSint,
5684    omp_block_size: SaSint,
5685) {
5686    let mut i = omp_block_start as isize;
5687    let mut j = (omp_block_start + omp_block_size - 64 - 1) as isize;
5688    while i < j {
5689        final_sorting_ltr_step(t, sa, induction_bucket, i as usize);
5690        final_sorting_ltr_step(t, sa, induction_bucket, (i + 1) as usize);
5691        i += 2;
5692    }
5693    j += 64 + 1;
5694    while i < j {
5695        final_sorting_ltr_step(t, sa, induction_bucket, i as usize);
5696        i += 1;
5697    }
5698}
5699
5700#[allow(dead_code)]
5701fn final_sorting_scan_right_to_left_16u(
5702    t: &[u16],
5703    sa: &mut [SaSint],
5704    induction_bucket: &mut [SaSint],
5705    omp_block_start: SaSint,
5706    omp_block_size: SaSint,
5707) {
5708    let mut i = (omp_block_start + omp_block_size - 1) as isize;
5709    let mut j = (omp_block_start + 64 + 1) as isize;
5710    while i >= j {
5711        final_sorting_rtl_step(t, sa, induction_bucket, i as usize, false);
5712        final_sorting_rtl_step(t, sa, induction_bucket, (i - 1) as usize, false);
5713        i -= 2;
5714    }
5715    j -= 64 + 1;
5716    while i >= j {
5717        final_sorting_rtl_step(t, sa, induction_bucket, i as usize, false);
5718        i -= 1;
5719    }
5720}
5721
5722#[allow(dead_code)]
5723fn final_sorting_scan_left_to_right_32s(
5724    t: &[SaSint],
5725    sa: &mut [SaSint],
5726    induction_bucket: &mut [SaSint],
5727    omp_block_start: SaSint,
5728    omp_block_size: SaSint,
5729) {
5730    let mut i = omp_block_start as isize;
5731    let mut j = (omp_block_start + omp_block_size - 2 * 64 - 1) as isize;
5732    while i < j {
5733        for current in [i, i + 1] {
5734            let current = current as usize;
5735            let mut p = sa[current];
5736            sa[current] = p ^ SAINT_MIN;
5737            if p > 0 {
5738                p -= 1;
5739                let p_usize = p as usize;
5740                let bucket = t[p_usize] as usize;
5741                let slot = induction_bucket[bucket] as usize;
5742                sa[slot] = p
5743                    | ((usize::from(t[p_usize - usize::from(p > 0)] < t[p_usize]) as SaSint)
5744                        << (SAINT_BIT - 1));
5745                induction_bucket[bucket] += 1;
5746            }
5747        }
5748        i += 2;
5749    }
5750
5751    j += 2 * 64 + 1;
5752    while i < j {
5753        let current = i as usize;
5754        let mut p = sa[current];
5755        sa[current] = p ^ SAINT_MIN;
5756        if p > 0 {
5757            p -= 1;
5758            let p_usize = p as usize;
5759            let bucket = t[p_usize] as usize;
5760            let slot = induction_bucket[bucket] as usize;
5761            sa[slot] = p
5762                | ((usize::from(t[p_usize - usize::from(p > 0)] < t[p_usize]) as SaSint)
5763                    << (SAINT_BIT - 1));
5764            induction_bucket[bucket] += 1;
5765        }
5766        i += 1;
5767    }
5768}
5769
5770#[allow(dead_code)]
5771fn final_sorting_scan_left_to_right_32s_block_gather(
5772    t: &[SaSint],
5773    sa: &mut [SaSint],
5774    cache: &mut [ThreadCache],
5775    omp_block_start: SaSint,
5776    omp_block_size: SaSint,
5777) {
5778    if omp_block_size <= 0 {
5779        return;
5780    }
5781
5782    let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
5783    let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
5784    for offset in 0..size {
5785        let current = start + offset;
5786        let mut symbol = SAINT_MIN;
5787        let mut p = sa[current];
5788        sa[current] = p ^ SAINT_MIN;
5789        if p > 0 {
5790            p -= 1;
5791            let p_usize = p as usize;
5792            cache[offset].index = p
5793                | ((usize::from(t[p_usize - usize::from(p > 0)] < t[p_usize]) as SaSint)
5794                    << (SAINT_BIT - 1));
5795            symbol = t[p_usize];
5796        }
5797        cache[offset].symbol = symbol;
5798    }
5799}
5800
5801#[allow(dead_code)]
5802fn final_sorting_scan_left_to_right_32s_block_sort(
5803    t: &[SaSint],
5804    induction_bucket: &mut [SaSint],
5805    cache: &mut [ThreadCache],
5806    omp_block_start: SaSint,
5807    omp_block_size: SaSint,
5808) {
5809    if omp_block_size <= 0 {
5810        return;
5811    }
5812
5813    let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
5814    let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
5815    let block_end = start + size;
5816
5817    for offset in 0..size {
5818        let v = cache[offset].symbol;
5819        if v >= 0 {
5820            let bucket_index = v as usize;
5821            let target = induction_bucket[bucket_index];
5822            cache[offset].symbol = target;
5823            induction_bucket[bucket_index] += 1;
5824            if target >= omp_block_start && target < block_end as SaSint {
5825                let ni = usize::try_from(target - omp_block_start)
5826                    .expect("cache slot must be non-negative");
5827                let mut np = cache[offset].index;
5828                cache[offset].index = np ^ SAINT_MIN;
5829                if np > 0 {
5830                    np -= 1;
5831                    let np_usize = np as usize;
5832                    cache[ni].index = np
5833                        | ((usize::from(t[np_usize - usize::from(np > 0)] < t[np_usize])
5834                            as SaSint)
5835                            << (SAINT_BIT - 1));
5836                    cache[ni].symbol = t[np_usize];
5837                }
5838            }
5839        }
5840    }
5841}
5842
5843#[allow(dead_code)]
5844fn final_sorting_scan_left_to_right_32s_block_omp(
5845    t: &[SaSint],
5846    sa: &mut [SaSint],
5847    buckets: &mut [SaSint],
5848    cache: &mut [ThreadCache],
5849    block_start: SaSint,
5850    block_size: SaSint,
5851    threads: SaSint,
5852) {
5853    if threads <= 1 || block_size < 16_384 {
5854        final_sorting_scan_left_to_right_32s(t, sa, buckets, block_start, block_size);
5855        return;
5856    }
5857
5858    final_sorting_scan_left_to_right_32s_block_gather(t, sa, cache, block_start, block_size);
5859    final_sorting_scan_left_to_right_32s_block_sort(t, buckets, cache, block_start, block_size);
5860
5861    let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
5862    let threads_usize = usize::try_from(threads.max(1)).expect("threads must be positive");
5863    let omp_num_threads = threads_usize.min(block_size_usize.max(1));
5864    let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
5865    for omp_thread_num in 0..omp_num_threads {
5866        let omp_block_start = omp_thread_num * omp_block_stride;
5867        let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
5868            omp_block_stride
5869        } else {
5870            block_size_usize - omp_block_start
5871        };
5872        compact_and_place_cached_suffixes(
5873            sa,
5874            cache,
5875            omp_block_start as SaSint,
5876            omp_block_size as SaSint,
5877        );
5878    }
5879}
5880
5881#[allow(dead_code)]
5882fn final_sorting_scan_left_to_right_32s_omp(
5883    t: &[SaSint],
5884    sa: &mut [SaSint],
5885    n: SaSint,
5886    induction_bucket: &mut [SaSint],
5887    threads: SaSint,
5888    thread_state: &mut [ThreadState],
5889) {
5890    let last = (n - 1) as usize;
5891    let bucket = t[last] as usize;
5892    let slot = induction_bucket[bucket] as usize;
5893    sa[slot] = (n - 1) | ((usize::from(t[last - 1] < t[last]) as SaSint) << (SAINT_BIT - 1));
5894    induction_bucket[bucket] += 1;
5895
5896    if threads == 1 || n < 65536 || thread_state.is_empty() {
5897        final_sorting_scan_left_to_right_32s(t, sa, induction_bucket, 0, n);
5898        return;
5899    }
5900
5901    let threads_usize = usize::try_from(threads)
5902        .expect("threads must be non-negative")
5903        .max(1);
5904    let block_span = threads_usize * PER_THREAD_CACHE_SIZE;
5905    let mut cache = vec![ThreadCache::default(); block_span];
5906    let mut block_start = 0;
5907    while block_start < n {
5908        let block_end = (block_start + block_span as SaSint).min(n);
5909        final_sorting_scan_left_to_right_32s_block_omp(
5910            t,
5911            sa,
5912            induction_bucket,
5913            &mut cache,
5914            block_start,
5915            block_end - block_start,
5916            threads,
5917        );
5918        block_start = block_end;
5919    }
5920}
5921
5922#[allow(dead_code)]
5923fn final_sorting_scan_right_to_left_32s(
5924    t: &[SaSint],
5925    sa: &mut [SaSint],
5926    induction_bucket: &mut [SaSint],
5927    omp_block_start: SaSint,
5928    omp_block_size: SaSint,
5929) {
5930    let mut i = (omp_block_start + omp_block_size - 1) as isize;
5931    let mut j = (omp_block_start + 2 * 64 + 1) as isize;
5932    while i >= j {
5933        for current in [i, i - 1] {
5934            let current = current as usize;
5935            let mut p = sa[current];
5936            sa[current] = p & SAINT_MAX;
5937            if p > 0 {
5938                p -= 1;
5939                let p_usize = p as usize;
5940                let bucket = t[p_usize] as usize;
5941                induction_bucket[bucket] -= 1;
5942                let slot = induction_bucket[bucket] as usize;
5943                sa[slot] = p
5944                    | ((usize::from(t[p_usize - usize::from(p > 0)] > t[p_usize]) as SaSint)
5945                        << (SAINT_BIT - 1));
5946            }
5947        }
5948        i -= 2;
5949    }
5950
5951    j -= 2 * 64 + 1;
5952    while i >= j {
5953        let current = i as usize;
5954        let mut p = sa[current];
5955        sa[current] = p & SAINT_MAX;
5956        if p > 0 {
5957            p -= 1;
5958            let p_usize = p as usize;
5959            let bucket = t[p_usize] as usize;
5960            induction_bucket[bucket] -= 1;
5961            let slot = induction_bucket[bucket] as usize;
5962            sa[slot] = p
5963                | ((usize::from(t[p_usize - usize::from(p > 0)] > t[p_usize]) as SaSint)
5964                    << (SAINT_BIT - 1));
5965        }
5966        i -= 1;
5967    }
5968}
5969
5970#[allow(dead_code)]
5971fn final_sorting_scan_right_to_left_32s_block_gather(
5972    t: &[SaSint],
5973    sa: &mut [SaSint],
5974    cache: &mut [ThreadCache],
5975    omp_block_start: SaSint,
5976    omp_block_size: SaSint,
5977) {
5978    if omp_block_size <= 0 {
5979        return;
5980    }
5981
5982    let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
5983    let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
5984    for offset in 0..size {
5985        let current = start + offset;
5986        let mut symbol = SAINT_MIN;
5987        let mut p = sa[current];
5988        sa[current] = p & SAINT_MAX;
5989        if p > 0 {
5990            p -= 1;
5991            let p_usize = p as usize;
5992            cache[offset].index = p
5993                | ((usize::from(t[p_usize - usize::from(p > 0)] > t[p_usize]) as SaSint)
5994                    << (SAINT_BIT - 1));
5995            symbol = t[p_usize];
5996        }
5997        cache[offset].symbol = symbol;
5998    }
5999}
6000
6001#[allow(dead_code)]
6002fn final_sorting_scan_right_to_left_32s_block_sort(
6003    t: &[SaSint],
6004    induction_bucket: &mut [SaSint],
6005    cache: &mut [ThreadCache],
6006    omp_block_start: SaSint,
6007    omp_block_size: SaSint,
6008) {
6009    if omp_block_size <= 0 {
6010        return;
6011    }
6012
6013    let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
6014    let block_end = omp_block_start + omp_block_size;
6015    let mut offset = size;
6016
6017    while offset > 0 {
6018        offset -= 1;
6019        let v = cache[offset].symbol;
6020        if v >= 0 {
6021            let bucket_index = v as usize;
6022            induction_bucket[bucket_index] -= 1;
6023            let target = induction_bucket[bucket_index];
6024            cache[offset].symbol = target;
6025            if target >= omp_block_start && target < block_end {
6026                let ni = usize::try_from(target - omp_block_start)
6027                    .expect("cache slot must be non-negative");
6028                let mut np = cache[offset].index;
6029                cache[offset].index = np & SAINT_MAX;
6030                if np > 0 {
6031                    np -= 1;
6032                    let np_usize = np as usize;
6033                    cache[ni].index = np
6034                        | ((usize::from(t[np_usize - usize::from(np > 0)] > t[np_usize])
6035                            as SaSint)
6036                            << (SAINT_BIT - 1));
6037                    cache[ni].symbol = t[np_usize];
6038                }
6039            }
6040        }
6041    }
6042}
6043
6044#[allow(dead_code)]
6045fn final_sorting_scan_right_to_left_32s_block_omp(
6046    t: &[SaSint],
6047    sa: &mut [SaSint],
6048    buckets: &mut [SaSint],
6049    cache: &mut [ThreadCache],
6050    block_start: SaSint,
6051    block_size: SaSint,
6052    threads: SaSint,
6053) {
6054    if threads <= 1 || block_size < 16_384 {
6055        final_sorting_scan_right_to_left_32s(t, sa, buckets, block_start, block_size);
6056        return;
6057    }
6058
6059    final_sorting_scan_right_to_left_32s_block_gather(t, sa, cache, block_start, block_size);
6060    final_sorting_scan_right_to_left_32s_block_sort(t, buckets, cache, block_start, block_size);
6061
6062    let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
6063    let threads_usize = usize::try_from(threads.max(1)).expect("threads must be positive");
6064    let omp_num_threads = threads_usize.min(block_size_usize.max(1));
6065    let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
6066    for omp_thread_num in 0..omp_num_threads {
6067        let omp_block_start = omp_thread_num * omp_block_stride;
6068        let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
6069            omp_block_stride
6070        } else {
6071            block_size_usize - omp_block_start
6072        };
6073        compact_and_place_cached_suffixes(
6074            sa,
6075            cache,
6076            omp_block_start as SaSint,
6077            omp_block_size as SaSint,
6078        );
6079    }
6080}
6081
6082#[allow(dead_code)]
6083fn final_sorting_scan_right_to_left_32s_omp(
6084    t: &[SaSint],
6085    sa: &mut [SaSint],
6086    n: SaSint,
6087    induction_bucket: &mut [SaSint],
6088    threads: SaSint,
6089    thread_state: &mut [ThreadState],
6090) {
6091    if threads == 1 || n < 65536 || thread_state.is_empty() {
6092        final_sorting_scan_right_to_left_32s(t, sa, induction_bucket, 0, n);
6093        return;
6094    }
6095
6096    let threads_usize = usize::try_from(threads)
6097        .expect("threads must be non-negative")
6098        .max(1);
6099    let block_span = threads_usize * PER_THREAD_CACHE_SIZE;
6100    let mut cache = vec![ThreadCache::default(); block_span];
6101    let mut block_start = n - 1;
6102    while block_start >= 0 {
6103        let block_end = (block_start - block_span as SaSint).max(-1);
6104        final_sorting_scan_right_to_left_32s_block_omp(
6105            t,
6106            sa,
6107            induction_bucket,
6108            &mut cache,
6109            block_end + 1,
6110            block_start - block_end,
6111            threads,
6112        );
6113        block_start = block_end;
6114    }
6115}
6116
6117#[allow(dead_code)]
6118fn induce_final_order_32s_6k(
6119    t: &[SaSint],
6120    sa: &mut [SaSint],
6121    n: SaSint,
6122    k: SaSint,
6123    buckets: &mut [SaSint],
6124    threads: SaSint,
6125    thread_state: &mut [ThreadState],
6126) {
6127    let k = k as usize;
6128    final_sorting_scan_left_to_right_32s_omp(
6129        t,
6130        sa,
6131        n,
6132        &mut buckets[4 * k..5 * k],
6133        threads,
6134        thread_state,
6135    );
6136    final_sorting_scan_right_to_left_32s_omp(
6137        t,
6138        sa,
6139        n,
6140        &mut buckets[5 * k..6 * k],
6141        threads,
6142        thread_state,
6143    );
6144}
6145
6146#[allow(dead_code)]
6147fn induce_final_order_32s_4k(
6148    t: &[SaSint],
6149    sa: &mut [SaSint],
6150    n: SaSint,
6151    k: SaSint,
6152    buckets: &mut [SaSint],
6153    threads: SaSint,
6154    thread_state: &mut [ThreadState],
6155) {
6156    let k = k as usize;
6157    final_sorting_scan_left_to_right_32s_omp(
6158        t,
6159        sa,
6160        n,
6161        &mut buckets[2 * k..3 * k],
6162        threads,
6163        thread_state,
6164    );
6165    final_sorting_scan_right_to_left_32s_omp(
6166        t,
6167        sa,
6168        n,
6169        &mut buckets[3 * k..4 * k],
6170        threads,
6171        thread_state,
6172    );
6173}
6174
6175#[allow(dead_code)]
6176fn induce_final_order_32s_2k(
6177    t: &[SaSint],
6178    sa: &mut [SaSint],
6179    n: SaSint,
6180    k: SaSint,
6181    buckets: &mut [SaSint],
6182    threads: SaSint,
6183    thread_state: &mut [ThreadState],
6184) {
6185    let k = k as usize;
6186    final_sorting_scan_left_to_right_32s_omp(
6187        t,
6188        sa,
6189        n,
6190        &mut buckets[k..2 * k],
6191        threads,
6192        thread_state,
6193    );
6194    final_sorting_scan_right_to_left_32s_omp(t, sa, n, &mut buckets[..k], threads, thread_state);
6195}
6196
6197#[allow(dead_code)]
6198fn induce_final_order_32s_1k(
6199    t: &[SaSint],
6200    sa: &mut [SaSint],
6201    n: SaSint,
6202    k: SaSint,
6203    buckets: &mut [SaSint],
6204    threads: SaSint,
6205    thread_state: &mut [ThreadState],
6206) {
6207    count_suffixes_32s(t, n, k, buckets);
6208    initialize_buckets_start_32s_1k(k, buckets);
6209    final_sorting_scan_left_to_right_32s_omp(t, sa, n, buckets, threads, thread_state);
6210
6211    count_suffixes_32s(t, n, k, buckets);
6212    initialize_buckets_end_32s_1k(k, buckets);
6213    final_sorting_scan_right_to_left_32s_omp(t, sa, n, buckets, threads, thread_state);
6214}
6215
6216#[allow(dead_code)]
6217fn clear_lms_suffixes_omp(
6218    sa: &mut [SaSint],
6219    n: SaSint,
6220    k: SaSint,
6221    bucket_start: &[SaSint],
6222    bucket_end: &[SaSint],
6223    threads: SaSint,
6224) {
6225    let k_usize = usize::try_from(k).expect("k must be non-negative");
6226    let thread_count = if threads > 1 && n >= 65536 {
6227        usize::try_from(threads).expect("threads must be positive")
6228    } else {
6229        1
6230    };
6231    for t in 0..thread_count {
6232        let mut c = t;
6233        while c < k_usize {
6234            if bucket_end[c] > bucket_start[c] {
6235                let start = bucket_start[c] as usize;
6236                let end = bucket_end[c] as usize;
6237                sa[start..end].fill(0);
6238            }
6239            c += thread_count;
6240        }
6241    }
6242}
6243
6244#[allow(dead_code)]
6245fn final_gsa_scan_right_to_left_16u(
6246    t: &[u16],
6247    sa: &mut [SaSint],
6248    induction_bucket: &mut [SaSint],
6249    omp_block_start: SaSint,
6250    omp_block_size: SaSint,
6251) {
6252    let mut i = (omp_block_start + omp_block_size - 1) as isize;
6253    let mut j = (omp_block_start + 64 + 1) as isize;
6254    while i >= j {
6255        final_sorting_rtl_step(t, sa, induction_bucket, i as usize, true);
6256        final_sorting_rtl_step(t, sa, induction_bucket, (i - 1) as usize, true);
6257        i -= 2;
6258    }
6259    j -= 64 + 1;
6260    while i >= j {
6261        final_sorting_rtl_step(t, sa, induction_bucket, i as usize, true);
6262        i -= 1;
6263    }
6264}
6265
6266#[allow(dead_code)]
6267fn final_sorting_ltr_step(
6268    t: &[u16],
6269    sa: &mut [SaSint],
6270    induction_bucket: &mut [SaSint],
6271    index: usize,
6272) {
6273    let mut p = sa[index];
6274    sa[index] = p ^ SAINT_MIN;
6275    if p > 0 {
6276        p -= 1;
6277        let c = t[p as usize] as usize;
6278        let mark = if t[(p - SaSint::from(p > 0)) as usize] < t[p as usize] {
6279            SAINT_MIN
6280        } else {
6281            0
6282        };
6283        let dst = induction_bucket[c] as usize;
6284        sa[dst] = p | mark;
6285        induction_bucket[c] += 1;
6286    }
6287}
6288
6289#[allow(dead_code)]
6290fn final_sorting_rtl_step(
6291    t: &[u16],
6292    sa: &mut [SaSint],
6293    induction_bucket: &mut [SaSint],
6294    index: usize,
6295    gsa: bool,
6296) {
6297    let mut p = sa[index];
6298    sa[index] = p & SAINT_MAX;
6299    if p > 0 && (!gsa || t[(p - 1) as usize] > 0) {
6300        p -= 1;
6301        let c = t[p as usize] as usize;
6302        let mark = if t[(p - SaSint::from(p > 0)) as usize] > t[p as usize] {
6303            SAINT_MIN
6304        } else {
6305            0
6306        };
6307        induction_bucket[c] -= 1;
6308        sa[induction_bucket[c] as usize] = p | mark;
6309    }
6310}
6311
6312#[allow(dead_code)]
6313fn final_bwt_scan_left_to_right_16u(
6314    t: &[u16],
6315    sa: &mut [SaSint],
6316    induction_bucket: &mut [SaSint],
6317    omp_block_start: SaSint,
6318    omp_block_size: SaSint,
6319) {
6320    let mut i = omp_block_start as isize;
6321    let mut j = (omp_block_start + omp_block_size - 64 - 1) as isize;
6322    while i < j {
6323        final_bwt_ltr_step(t, sa, induction_bucket, i as usize);
6324        final_bwt_ltr_step(t, sa, induction_bucket, (i + 1) as usize);
6325        i += 2;
6326    }
6327    j += 64 + 1;
6328    while i < j {
6329        final_bwt_ltr_step(t, sa, induction_bucket, i as usize);
6330        i += 1;
6331    }
6332}
6333
6334#[allow(dead_code)]
6335fn final_bwt_scan_right_to_left_16u(
6336    t: &[u16],
6337    sa: &mut [SaSint],
6338    induction_bucket: &mut [SaSint],
6339    omp_block_start: SaSint,
6340    omp_block_size: SaSint,
6341) -> SaSint {
6342    let mut index = -1;
6343    let mut i = (omp_block_start + omp_block_size - 1) as isize;
6344    let mut j = (omp_block_start + 64 + 1) as isize;
6345    while i >= j {
6346        final_bwt_rtl_step(t, sa, induction_bucket, i as usize, &mut index);
6347        final_bwt_rtl_step(t, sa, induction_bucket, (i - 1) as usize, &mut index);
6348        i -= 2;
6349    }
6350    j -= 64 + 1;
6351    while i >= j {
6352        final_bwt_rtl_step(t, sa, induction_bucket, i as usize, &mut index);
6353        i -= 1;
6354    }
6355    index
6356}
6357
6358#[allow(dead_code)]
6359fn final_bwt_aux_scan_left_to_right_16u(
6360    t: &[u16],
6361    sa: &mut [SaSint],
6362    rm: SaSint,
6363    i_sample: &mut [SaSint],
6364    induction_bucket: &mut [SaSint],
6365    omp_block_start: SaSint,
6366    omp_block_size: SaSint,
6367) {
6368    let mut i = omp_block_start as isize;
6369    let mut j = (omp_block_start + omp_block_size - 64 - 1) as isize;
6370    while i < j {
6371        final_bwt_aux_ltr_step(t, sa, rm, i_sample, induction_bucket, i as usize);
6372        final_bwt_aux_ltr_step(t, sa, rm, i_sample, induction_bucket, (i + 1) as usize);
6373        i += 2;
6374    }
6375    j += 64 + 1;
6376    while i < j {
6377        final_bwt_aux_ltr_step(t, sa, rm, i_sample, induction_bucket, i as usize);
6378        i += 1;
6379    }
6380}
6381
6382#[allow(dead_code)]
6383fn final_bwt_aux_scan_right_to_left_16u(
6384    t: &[u16],
6385    sa: &mut [SaSint],
6386    rm: SaSint,
6387    i_sample: &mut [SaSint],
6388    induction_bucket: &mut [SaSint],
6389    omp_block_start: SaSint,
6390    omp_block_size: SaSint,
6391) {
6392    let mut i = (omp_block_start + omp_block_size - 1) as isize;
6393    let mut j = (omp_block_start + 64 + 1) as isize;
6394    while i >= j {
6395        final_bwt_aux_rtl_step(t, sa, rm, i_sample, induction_bucket, i as usize);
6396        final_bwt_aux_rtl_step(t, sa, rm, i_sample, induction_bucket, (i - 1) as usize);
6397        i -= 2;
6398    }
6399    j -= 64 + 1;
6400    while i >= j {
6401        final_bwt_aux_rtl_step(t, sa, rm, i_sample, induction_bucket, i as usize);
6402        i -= 1;
6403    }
6404}
6405
6406#[allow(dead_code)]
6407fn renumber_lms_suffixes_16u(
6408    sa: &mut [SaSint],
6409    m: SaSint,
6410    mut name: SaSint,
6411    omp_block_start: SaSint,
6412    omp_block_size: SaSint,
6413) -> SaSint {
6414    let mut i = omp_block_start as isize;
6415    let mut j = (omp_block_start + omp_block_size - 64 - 3) as isize;
6416    while i < j {
6417        let p0 = sa[i as usize];
6418        sa[m as usize + ((p0 & SAINT_MAX) >> 1) as usize] = name | SAINT_MIN;
6419        name += SaSint::from(p0 < 0);
6420
6421        let p1 = sa[(i + 1) as usize];
6422        sa[m as usize + ((p1 & SAINT_MAX) >> 1) as usize] = name | SAINT_MIN;
6423        name += SaSint::from(p1 < 0);
6424
6425        let p2 = sa[(i + 2) as usize];
6426        sa[m as usize + ((p2 & SAINT_MAX) >> 1) as usize] = name | SAINT_MIN;
6427        name += SaSint::from(p2 < 0);
6428
6429        let p3 = sa[(i + 3) as usize];
6430        sa[m as usize + ((p3 & SAINT_MAX) >> 1) as usize] = name | SAINT_MIN;
6431        name += SaSint::from(p3 < 0);
6432
6433        i += 4;
6434    }
6435
6436    j += 64 + 3;
6437    while i < j {
6438        let p = sa[i as usize];
6439        sa[m as usize + ((p & SAINT_MAX) >> 1) as usize] = name | SAINT_MIN;
6440        name += SaSint::from(p < 0);
6441        i += 1;
6442    }
6443
6444    name
6445}
6446
6447#[allow(dead_code)]
6448fn renumber_lms_suffixes_16u_omp(
6449    sa: &mut [SaSint],
6450    m: SaSint,
6451    threads: SaSint,
6452    thread_state: &mut [ThreadState],
6453) -> SaSint {
6454    if threads == 1 || m < 65_536 || thread_state.is_empty() {
6455        return renumber_lms_suffixes_16u(sa, m, 0, 0, m);
6456    }
6457
6458    let thread_count = usize::try_from(threads)
6459        .expect("threads must be non-negative")
6460        .min(thread_state.len());
6461    let block_stride = (m / thread_count as SaSint) & !15;
6462
6463    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
6464        let block_start = thread as SaSint * block_stride;
6465        let block_size = if thread + 1 < thread_count {
6466            block_stride
6467        } else {
6468            m - block_start
6469        };
6470        state.count = count_negative_marked_suffixes(sa, block_start, block_size);
6471    }
6472
6473    let mut name = 0;
6474    for thread in 0..thread_count {
6475        let block_start = thread as SaSint * block_stride;
6476        let block_size = if thread + 1 < thread_count {
6477            block_stride
6478        } else {
6479            m - block_start
6480        };
6481        renumber_lms_suffixes_16u(sa, m, name, block_start, block_size);
6482        name += thread_state[thread].count;
6483    }
6484
6485    name
6486}
6487
6488#[allow(dead_code)]
6489fn gather_marked_lms_suffixes(
6490    sa: &mut [SaSint],
6491    m: SaSint,
6492    mut l: isize,
6493    omp_block_start: isize,
6494    omp_block_size: isize,
6495) -> isize {
6496    if omp_block_size <= 0 {
6497        return l;
6498    }
6499
6500    l -= 1;
6501    let mut i = m as isize + omp_block_start + omp_block_size - 1;
6502    let mut j = m as isize + omp_block_start + 3;
6503    while i >= j {
6504        let s0 = sa[i as usize];
6505        sa[l as usize] = s0 & SAINT_MAX;
6506        l -= isize::from(s0 < 0);
6507
6508        let s1 = sa[(i - 1) as usize];
6509        sa[l as usize] = s1 & SAINT_MAX;
6510        l -= isize::from(s1 < 0);
6511
6512        let s2 = sa[(i - 2) as usize];
6513        sa[l as usize] = s2 & SAINT_MAX;
6514        l -= isize::from(s2 < 0);
6515
6516        let s3 = sa[(i - 3) as usize];
6517        sa[l as usize] = s3 & SAINT_MAX;
6518        l -= isize::from(s3 < 0);
6519
6520        i -= 4;
6521    }
6522
6523    j -= 3;
6524    while i >= j {
6525        let s = sa[i as usize];
6526        sa[l as usize] = s & SAINT_MAX;
6527        l -= isize::from(s < 0);
6528        i -= 1;
6529    }
6530
6531    l + 1
6532}
6533
6534#[allow(dead_code)]
6535fn gather_marked_lms_suffixes_omp(
6536    sa: &mut [SaSint],
6537    n: SaSint,
6538    m: SaSint,
6539    fs: SaSint,
6540    threads: SaSint,
6541    thread_state: &mut [ThreadState],
6542) {
6543    let half_n = n >> 1;
6544    if threads == 1 || n < 131_072 || thread_state.is_empty() {
6545        let _ = gather_marked_lms_suffixes(sa, m, (n + fs) as isize, 0, half_n as isize);
6546        return;
6547    }
6548
6549    let thread_count = usize::try_from(threads)
6550        .expect("threads must be non-negative")
6551        .min(thread_state.len());
6552    let block_stride = (half_n / thread_count as SaSint) & !15;
6553
6554    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
6555        let block_start = thread as SaSint * block_stride;
6556        let block_size = if thread + 1 < thread_count {
6557            block_stride
6558        } else {
6559            half_n - block_start
6560        };
6561        let local_end = if thread + 1 < thread_count {
6562            m + block_start + block_size
6563        } else {
6564            n + fs
6565        } as isize;
6566        let gathered_position =
6567            gather_marked_lms_suffixes(sa, m, local_end, block_start as isize, block_size as isize);
6568        state.position = gathered_position as SaSint;
6569        state.count = (local_end - gathered_position) as SaSint;
6570    }
6571
6572    let mut position = (n + fs) as isize;
6573    for thread in (0..thread_count).rev() {
6574        let count =
6575            usize::try_from(thread_state[thread].count).expect("count must be non-negative");
6576        position -= thread_state[thread].count as isize;
6577        if thread + 1 != thread_count && count > 0 {
6578            let src = usize::try_from(thread_state[thread].position)
6579                .expect("position must be non-negative");
6580            let dst = position as usize;
6581            sa.copy_within(src..src + count, dst);
6582        }
6583    }
6584}
6585
6586#[allow(dead_code)]
6587fn renumber_and_gather_lms_suffixes_omp(
6588    sa: &mut [SaSint],
6589    n: SaSint,
6590    m: SaSint,
6591    fs: SaSint,
6592    threads: SaSint,
6593    thread_state: &mut [ThreadState],
6594) -> SaSint {
6595    let m_usize = m as usize;
6596    let half_n = (n >> 1) as usize;
6597    sa[m_usize..m_usize + half_n].fill(0);
6598
6599    let name = renumber_lms_suffixes_16u_omp(sa, m, threads, thread_state);
6600    if name < m {
6601        gather_marked_lms_suffixes_omp(sa, n, m, fs, threads, thread_state);
6602    } else {
6603        for item in &mut sa[..m_usize] {
6604            *item &= SAINT_MAX;
6605        }
6606    }
6607
6608    name
6609}
6610
6611#[allow(dead_code)]
6612fn reconstruct_lms_suffixes(
6613    sa: &mut [SaSint],
6614    n: SaSint,
6615    m: SaSint,
6616    omp_block_start: isize,
6617    omp_block_size: isize,
6618) {
6619    if omp_block_size <= 0 {
6620        return;
6621    }
6622
6623    let base = (n - m) as usize;
6624    let mut i = omp_block_start;
6625    let mut j = omp_block_start + omp_block_size - 64 - 3;
6626    while i < j {
6627        let iu = i as usize;
6628        let s0 = sa[iu] as usize;
6629        let s1 = sa[iu + 1] as usize;
6630        let s2 = sa[iu + 2] as usize;
6631        let s3 = sa[iu + 3] as usize;
6632        sa[iu] = sa[base + s0];
6633        sa[iu + 1] = sa[base + s1];
6634        sa[iu + 2] = sa[base + s2];
6635        sa[iu + 3] = sa[base + s3];
6636        i += 4;
6637    }
6638
6639    j += 64 + 3;
6640    while i < j {
6641        let iu = i as usize;
6642        let s = sa[iu] as usize;
6643        sa[iu] = sa[base + s];
6644        i += 1;
6645    }
6646}
6647
6648#[allow(dead_code)]
6649fn reconstruct_lms_suffixes_omp(sa: &mut [SaSint], n: SaSint, m: SaSint, threads: SaSint) {
6650    if threads == 1 || m < 65_536 {
6651        reconstruct_lms_suffixes(sa, n, m, 0, m as isize);
6652        return;
6653    }
6654
6655    let thread_count = threads as usize;
6656    let block_stride = (m / threads) & !15;
6657    for thread in 0..thread_count {
6658        let block_start = thread as SaSint * block_stride;
6659        let block_size = if thread + 1 < thread_count {
6660            block_stride
6661        } else {
6662            m - block_start
6663        };
6664        reconstruct_lms_suffixes(sa, n, m, block_start as isize, block_size as isize);
6665    }
6666}
6667
6668#[allow(dead_code)]
6669fn renumber_distinct_lms_suffixes_32s_4k(
6670    sa: &mut [SaSint],
6671    m: SaSint,
6672    mut name: SaSint,
6673    omp_block_start: isize,
6674    omp_block_size: isize,
6675) -> SaSint {
6676    if omp_block_size <= 0 {
6677        return name;
6678    }
6679
6680    let m_usize = m as usize;
6681    let start = omp_block_start as usize;
6682    let size = omp_block_size as usize;
6683    let (sa_head, sam) = sa.split_at_mut(m_usize);
6684    let mut i = start;
6685    let mut j = start + size.saturating_sub(64 + 3);
6686    let mut p3 = 0;
6687
6688    while i < j {
6689        let p0 = sa_head[i];
6690        sa_head[i] = p0 & SAINT_MAX;
6691        sam[(sa_head[i] >> 1) as usize] = name | (p0 & p3 & SAINT_MIN);
6692        name += SaSint::from(p0 < 0);
6693
6694        let p1 = sa_head[i + 1];
6695        sa_head[i + 1] = p1 & SAINT_MAX;
6696        sam[(sa_head[i + 1] >> 1) as usize] = name | (p1 & p0 & SAINT_MIN);
6697        name += SaSint::from(p1 < 0);
6698
6699        let p2 = sa_head[i + 2];
6700        sa_head[i + 2] = p2 & SAINT_MAX;
6701        sam[(sa_head[i + 2] >> 1) as usize] = name | (p2 & p1 & SAINT_MIN);
6702        name += SaSint::from(p2 < 0);
6703
6704        p3 = sa_head[i + 3];
6705        sa_head[i + 3] = p3 & SAINT_MAX;
6706        sam[(sa_head[i + 3] >> 1) as usize] = name | (p3 & p2 & SAINT_MIN);
6707        name += SaSint::from(p3 < 0);
6708
6709        i += 4;
6710    }
6711
6712    j = start + size;
6713    while i < j {
6714        let p2 = p3;
6715        p3 = sa_head[i];
6716        sa_head[i] = p3 & SAINT_MAX;
6717        sam[(sa_head[i] >> 1) as usize] = name | (p3 & p2 & SAINT_MIN);
6718        name += SaSint::from(p3 < 0);
6719        i += 1;
6720    }
6721
6722    name
6723}
6724
6725#[allow(dead_code)]
6726fn mark_distinct_lms_suffixes_32s(
6727    sa: &mut [SaSint],
6728    m: SaSint,
6729    omp_block_start: isize,
6730    omp_block_size: isize,
6731) {
6732    if omp_block_size <= 0 {
6733        return;
6734    }
6735
6736    let mut i = m as usize + omp_block_start as usize;
6737    let mut j = i + (omp_block_size as usize).saturating_sub(3);
6738    let mut p3 = 0;
6739    while i < j {
6740        let mut p0 = sa[i];
6741        sa[i] = p0 & (p3 | SAINT_MAX);
6742        p0 = if p0 == 0 { p3 } else { p0 };
6743
6744        let mut p1 = sa[i + 1];
6745        sa[i + 1] = p1 & (p0 | SAINT_MAX);
6746        p1 = if p1 == 0 { p0 } else { p1 };
6747
6748        let mut p2 = sa[i + 2];
6749        sa[i + 2] = p2 & (p1 | SAINT_MAX);
6750        p2 = if p2 == 0 { p1 } else { p2 };
6751
6752        p3 = sa[i + 3];
6753        sa[i + 3] = p3 & (p2 | SAINT_MAX);
6754        p3 = if p3 == 0 { p2 } else { p3 };
6755        i += 4;
6756    }
6757
6758    j = m as usize + omp_block_start as usize + omp_block_size as usize;
6759    while i < j {
6760        let p2 = p3;
6761        p3 = sa[i];
6762        sa[i] = p3 & (p2 | SAINT_MAX);
6763        p3 = if p3 == 0 { p2 } else { p3 };
6764        i += 1;
6765    }
6766}
6767
6768#[allow(dead_code)]
6769fn clamp_lms_suffixes_length_32s(
6770    sa: &mut [SaSint],
6771    m: SaSint,
6772    omp_block_start: isize,
6773    omp_block_size: isize,
6774) {
6775    if omp_block_size <= 0 {
6776        return;
6777    }
6778
6779    let mut i = m as usize + omp_block_start as usize;
6780    let mut j = i + (omp_block_size as usize).saturating_sub(3);
6781    while i < j {
6782        let s0 = sa[i];
6783        sa[i] = if s0 < 0 { s0 } else { 0 } & SAINT_MAX;
6784
6785        let s1 = sa[i + 1];
6786        sa[i + 1] = if s1 < 0 { s1 } else { 0 } & SAINT_MAX;
6787
6788        let s2 = sa[i + 2];
6789        sa[i + 2] = if s2 < 0 { s2 } else { 0 } & SAINT_MAX;
6790
6791        let s3 = sa[i + 3];
6792        sa[i + 3] = if s3 < 0 { s3 } else { 0 } & SAINT_MAX;
6793
6794        i += 4;
6795    }
6796
6797    j = m as usize + omp_block_start as usize + omp_block_size as usize;
6798    while i < j {
6799        let s = sa[i];
6800        sa[i] = if s < 0 { s } else { 0 } & SAINT_MAX;
6801        i += 1;
6802    }
6803}
6804
6805#[allow(dead_code)]
6806fn renumber_distinct_lms_suffixes_32s_4k_omp(
6807    sa: &mut [SaSint],
6808    m: SaSint,
6809    threads: SaSint,
6810    thread_state: &mut [ThreadState],
6811) -> SaSint {
6812    if threads == 1 || m < 65_536 || thread_state.is_empty() {
6813        return renumber_distinct_lms_suffixes_32s_4k(sa, m, 1, 0, m as isize) - 1;
6814    }
6815
6816    let thread_count = usize::try_from(threads)
6817        .expect("threads must be non-negative")
6818        .min(thread_state.len());
6819    let block_stride = (m / thread_count as SaSint) & !15;
6820
6821    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
6822        let block_start = thread as SaSint * block_stride;
6823        let block_size = if thread + 1 < thread_count {
6824            block_stride
6825        } else {
6826            m - block_start
6827        };
6828        state.count = count_negative_marked_suffixes(sa, block_start, block_size);
6829    }
6830
6831    let mut count = 1;
6832    for thread in 0..thread_count {
6833        let block_start = thread as SaSint * block_stride;
6834        let block_size = if thread + 1 < thread_count {
6835            block_stride
6836        } else {
6837            m - block_start
6838        };
6839        renumber_distinct_lms_suffixes_32s_4k(
6840            sa,
6841            m,
6842            count,
6843            block_start as isize,
6844            block_size as isize,
6845        );
6846        count += thread_state[thread].count;
6847    }
6848
6849    count - 1
6850}
6851
6852#[allow(dead_code)]
6853fn mark_distinct_lms_suffixes_32s_omp(sa: &mut [SaSint], n: SaSint, m: SaSint, threads: SaSint) {
6854    let half_n = n >> 1;
6855    if threads == 1 || n < 131_072 {
6856        mark_distinct_lms_suffixes_32s(sa, m, 0, half_n as isize);
6857        return;
6858    }
6859
6860    let thread_count = threads as usize;
6861    let block_stride = (half_n / threads) & !15;
6862    for thread in 0..thread_count {
6863        let block_start = thread as SaSint * block_stride;
6864        let block_size = if thread + 1 < thread_count {
6865            block_stride
6866        } else {
6867            half_n - block_start
6868        };
6869        mark_distinct_lms_suffixes_32s(sa, m, block_start as isize, block_size as isize);
6870    }
6871}
6872
6873#[allow(dead_code)]
6874fn clamp_lms_suffixes_length_32s_omp(sa: &mut [SaSint], n: SaSint, m: SaSint, threads: SaSint) {
6875    let half_n = n >> 1;
6876    if threads == 1 || n < 131_072 {
6877        clamp_lms_suffixes_length_32s(sa, m, 0, half_n as isize);
6878        return;
6879    }
6880
6881    let thread_count = threads as usize;
6882    let block_stride = (half_n / threads) & !15;
6883    for thread in 0..thread_count {
6884        let block_start = thread as SaSint * block_stride;
6885        let block_size = if thread + 1 < thread_count {
6886            block_stride
6887        } else {
6888            half_n - block_start
6889        };
6890        clamp_lms_suffixes_length_32s(sa, m, block_start as isize, block_size as isize);
6891    }
6892}
6893
6894#[allow(dead_code)]
6895fn renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(
6896    sa: &mut [SaSint],
6897    n: SaSint,
6898    m: SaSint,
6899    threads: SaSint,
6900    thread_state: &mut [ThreadState],
6901) -> SaSint {
6902    let m_usize = m as usize;
6903    let half_n = (n >> 1) as usize;
6904    sa[m_usize..m_usize + half_n].fill(0);
6905
6906    let name = renumber_distinct_lms_suffixes_32s_4k_omp(sa, m, threads, thread_state);
6907    if name < m {
6908        mark_distinct_lms_suffixes_32s_omp(sa, n, m, threads);
6909    }
6910
6911    name
6912}
6913
6914#[allow(dead_code)]
6915fn renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(
6916    t: &[SaSint],
6917    sa: &mut [SaSint],
6918    n: SaSint,
6919    m: SaSint,
6920    threads: SaSint,
6921) -> SaSint {
6922    let m_usize = m as usize;
6923    let n_usize = n as usize;
6924
6925    gather_lms_suffixes_32s(t, sa, n);
6926    sa[m_usize..n_usize - m_usize].fill(0);
6927
6928    let mut i = n - m;
6929    let mut j = n - 1 - 64 - 3;
6930    while i < j {
6931        let s0 = (sa[i as usize] as SaUint >> 1) as usize;
6932        let s1 = (sa[(i + 1) as usize] as SaUint >> 1) as usize;
6933        let s2 = (sa[(i + 2) as usize] as SaUint >> 1) as usize;
6934        let s3 = (sa[(i + 3) as usize] as SaUint >> 1) as usize;
6935        sa[m_usize + s0] = sa[(i + 1) as usize] - sa[i as usize] + 1 + SAINT_MIN;
6936        sa[m_usize + s1] = sa[(i + 2) as usize] - sa[(i + 1) as usize] + 1 + SAINT_MIN;
6937        sa[m_usize + s2] = sa[(i + 3) as usize] - sa[(i + 2) as usize] + 1 + SAINT_MIN;
6938        sa[m_usize + s3] = sa[(i + 4) as usize] - sa[(i + 3) as usize] + 1 + SAINT_MIN;
6939        i += 4;
6940    }
6941
6942    j += 64 + 3;
6943    while i < j {
6944        let s = (sa[i as usize] as SaUint >> 1) as usize;
6945        sa[m_usize + s] = sa[(i + 1) as usize] - sa[i as usize] + 1 + SAINT_MIN;
6946        i += 1;
6947    }
6948
6949    let tail = (sa[n_usize - 1] as SaUint >> 1) as usize;
6950    sa[m_usize + tail] = 1 + SAINT_MIN;
6951
6952    clamp_lms_suffixes_length_32s_omp(sa, n, m, threads);
6953
6954    let mut name = 1;
6955    if m_usize > 0 {
6956        let mut i = 1usize;
6957        let mut j = m_usize.saturating_sub(64 + 1);
6958        let mut p = sa[0] as usize;
6959        let mut plen = sa[m_usize + (p >> 1)];
6960        let mut pdiff = SAINT_MIN;
6961
6962        while i < j {
6963            let q = sa[i] as usize;
6964            let qlen = sa[m_usize + (q >> 1)];
6965            let mut qdiff = SAINT_MIN;
6966            if plen == qlen {
6967                let mut l = 0;
6968                while l < qlen as usize {
6969                    if t[p + l] != t[q + l] {
6970                        break;
6971                    }
6972                    l += 1;
6973                }
6974                qdiff = ((l as SaSint) - qlen) & SAINT_MIN;
6975            }
6976            sa[m_usize + (p >> 1)] = name | (pdiff & qdiff);
6977            name += SaSint::from(qdiff < 0);
6978
6979            p = sa[i + 1] as usize;
6980            plen = sa[m_usize + (p >> 1)];
6981            pdiff = SAINT_MIN;
6982            if qlen == plen {
6983                let mut l = 0;
6984                while l < plen as usize {
6985                    if t[q + l] != t[p + l] {
6986                        break;
6987                    }
6988                    l += 1;
6989                }
6990                pdiff = ((l as SaSint) - plen) & SAINT_MIN;
6991            }
6992            sa[m_usize + (q >> 1)] = name | (qdiff & pdiff);
6993            name += SaSint::from(pdiff < 0);
6994            i += 2;
6995        }
6996
6997        j = m_usize;
6998        while i < j {
6999            let q = sa[i] as usize;
7000            let qlen = sa[m_usize + (q >> 1)];
7001            let mut qdiff = SAINT_MIN;
7002            if plen == qlen {
7003                let mut l = 0;
7004                while l < plen as usize {
7005                    if t[p + l] != t[q + l] {
7006                        break;
7007                    }
7008                    l += 1;
7009                }
7010                qdiff = ((l as SaSint) - plen) & SAINT_MIN;
7011            }
7012            sa[m_usize + (p >> 1)] = name | (pdiff & qdiff);
7013            name += SaSint::from(qdiff < 0);
7014            p = q;
7015            plen = qlen;
7016            pdiff = qdiff;
7017            i += 1;
7018        }
7019
7020        sa[m_usize + (p >> 1)] = name | pdiff;
7021        name += 1;
7022    }
7023
7024    if name <= m {
7025        mark_distinct_lms_suffixes_32s_omp(sa, n, m, threads);
7026    }
7027
7028    name - 1
7029}
7030
7031#[allow(dead_code)]
7032fn renumber_unique_and_nonunique_lms_suffixes_32s(
7033    t: &mut [SaSint],
7034    sa: &mut [SaSint],
7035    m: SaSint,
7036    mut f: SaSint,
7037    omp_block_start: isize,
7038    omp_block_size: isize,
7039) -> SaSint {
7040    if omp_block_size <= 0 {
7041        return f;
7042    }
7043
7044    let m_usize = m as usize;
7045    let (sa_head, sam) = sa.split_at_mut(m_usize);
7046    let mut i = omp_block_start;
7047    let mut j = omp_block_start + omp_block_size - 128 - 3;
7048    while i < j {
7049        for offset in 0..4 {
7050            let idx = (i + offset) as usize;
7051            let p = sa_head[idx] as SaUint;
7052            let mut s = sam[(p >> 1) as usize];
7053            if s < 0 {
7054                t[p as usize] |= SAINT_MIN;
7055                f += 1;
7056                s = i as SaSint + offset as SaSint + SAINT_MIN + f;
7057            }
7058            sam[(p >> 1) as usize] = s - f;
7059        }
7060        i += 4;
7061    }
7062
7063    j += 128 + 3;
7064    while i < j {
7065        let p = sa_head[i as usize] as SaUint;
7066        let mut s = sam[(p >> 1) as usize];
7067        if s < 0 {
7068            t[p as usize] |= SAINT_MIN;
7069            f += 1;
7070            s = i as SaSint + SAINT_MIN + f;
7071        }
7072        sam[(p >> 1) as usize] = s - f;
7073        i += 1;
7074    }
7075
7076    f
7077}
7078
7079#[allow(dead_code)]
7080fn compact_unique_and_nonunique_lms_suffixes_32s(
7081    sa: &mut [SaSint],
7082    m: SaSint,
7083    pl: &mut isize,
7084    pr: &mut isize,
7085    omp_block_start: isize,
7086    omp_block_size: isize,
7087) {
7088    if omp_block_size <= 0 {
7089        return;
7090    }
7091
7092    let m_usize = m as usize;
7093    let source: Vec<SaSint> = sa
7094        [m_usize + omp_block_start as usize..m_usize + (omp_block_start + omp_block_size) as usize]
7095        .to_vec();
7096    let mut l = *pl - 1;
7097    let mut r = *pr - 1;
7098
7099    for &p in source.iter().rev() {
7100        sa[l as usize] = p & SAINT_MAX;
7101        l -= isize::from(p < 0);
7102
7103        sa[r as usize] = p.wrapping_sub(1);
7104        r -= isize::from(p > 0);
7105    }
7106
7107    *pl = l + 1;
7108    *pr = r + 1;
7109}
7110
7111#[allow(dead_code)]
7112fn count_unique_suffixes(
7113    sa: &[SaSint],
7114    m: SaSint,
7115    omp_block_start: isize,
7116    omp_block_size: isize,
7117) -> SaSint {
7118    let base = m as usize;
7119    let start = omp_block_start as usize;
7120    let end = start + omp_block_size as usize;
7121    let mut count = 0;
7122    for i in start..end {
7123        count += SaSint::from(sa[base + ((sa[i] as SaUint) >> 1) as usize] < 0);
7124    }
7125    count
7126}
7127
7128#[allow(dead_code)]
7129fn renumber_unique_and_nonunique_lms_suffixes_32s_omp(
7130    t: &mut [SaSint],
7131    sa: &mut [SaSint],
7132    m: SaSint,
7133    threads: SaSint,
7134) -> SaSint {
7135    if threads == 1 || m < 65_536 {
7136        return renumber_unique_and_nonunique_lms_suffixes_32s(t, sa, m, 0, 0, m as isize);
7137    }
7138
7139    let thread_count = threads as usize;
7140    let block_stride = (m / threads) & !15;
7141    let mut counts = vec![0; thread_count];
7142
7143    for thread in 0..thread_count {
7144        let block_start = thread as SaSint * block_stride;
7145        let block_size = if thread + 1 < thread_count {
7146            block_stride
7147        } else {
7148            m - block_start
7149        };
7150        counts[thread] = count_unique_suffixes(sa, m, block_start as isize, block_size as isize);
7151    }
7152
7153    let mut f = 0;
7154    for thread in 0..thread_count {
7155        let block_start = thread as SaSint * block_stride;
7156        let block_size = if thread + 1 < thread_count {
7157            block_stride
7158        } else {
7159            m - block_start
7160        };
7161        renumber_unique_and_nonunique_lms_suffixes_32s(
7162            t,
7163            sa,
7164            m,
7165            f,
7166            block_start as isize,
7167            block_size as isize,
7168        );
7169        f += counts[thread];
7170    }
7171
7172    f
7173}
7174
7175#[allow(dead_code)]
7176fn compact_unique_and_nonunique_lms_suffixes_32s_omp(
7177    sa: &mut [SaSint],
7178    n: SaSint,
7179    m: SaSint,
7180    fs: SaSint,
7181    f: SaSint,
7182    threads: SaSint,
7183) {
7184    let half_n = n >> 1;
7185    if threads == 1 || n < 131_072 || m >= fs {
7186        let mut l = m as isize;
7187        let mut r = (n + fs) as isize;
7188        compact_unique_and_nonunique_lms_suffixes_32s(sa, m, &mut l, &mut r, 0, half_n as isize);
7189    } else {
7190        let thread_count = threads as usize;
7191        let block_stride = (half_n / threads) & !15;
7192        let mut positions = vec![0isize; thread_count];
7193        let mut counts = vec![0isize; thread_count];
7194
7195        for thread in 0..thread_count {
7196            let block_start = thread as SaSint * block_stride;
7197            let block_size = if thread + 1 < thread_count {
7198                block_stride
7199            } else {
7200                half_n - block_start
7201            };
7202            let mut position = (m + half_n + block_start + block_size) as isize;
7203            let mut count = (m + block_start + block_size) as isize;
7204            compact_unique_and_nonunique_lms_suffixes_32s(
7205                sa,
7206                m,
7207                &mut position,
7208                &mut count,
7209                block_start as isize,
7210                block_size as isize,
7211            );
7212            positions[thread] = position;
7213            counts[thread] = count;
7214        }
7215
7216        let mut position = m as isize;
7217        for thread in (0..thread_count).rev() {
7218            let block_end = if thread + 1 < thread_count {
7219                block_stride * (thread as SaSint + 1)
7220            } else {
7221                half_n
7222            };
7223            let count = (m + half_n + block_end) as isize - positions[thread];
7224            if count > 0 {
7225                position -= count;
7226                let src = positions[thread] as usize;
7227                let dst = position as usize;
7228                sa.copy_within(src..src + count as usize, dst);
7229            }
7230        }
7231
7232        let mut position = (n + fs) as isize;
7233        for thread in (0..thread_count).rev() {
7234            let block_end = if thread + 1 < thread_count {
7235                block_stride * (thread as SaSint + 1)
7236            } else {
7237                half_n
7238            };
7239            let count = (m + block_end) as isize - counts[thread];
7240            if count > 0 {
7241                position -= count;
7242                let src = counts[thread] as usize;
7243                let dst = position as usize;
7244                sa.copy_within(src..src + count as usize, dst);
7245            }
7246        }
7247    }
7248
7249    let dst = (n + fs - m) as usize;
7250    let src = (m - f) as usize;
7251    sa.copy_within(src..src + f as usize, dst);
7252}
7253
7254#[allow(dead_code)]
7255fn compact_lms_suffixes_32s_omp(
7256    t: &mut [SaSint],
7257    sa: &mut [SaSint],
7258    n: SaSint,
7259    m: SaSint,
7260    fs: SaSint,
7261    threads: SaSint,
7262) -> SaSint {
7263    let f = renumber_unique_and_nonunique_lms_suffixes_32s_omp(t, sa, m, threads);
7264    compact_unique_and_nonunique_lms_suffixes_32s_omp(sa, n, m, fs, f, threads);
7265    f
7266}
7267
7268#[allow(dead_code)]
7269fn merge_unique_lms_suffixes_32s(
7270    t: &mut [SaSint],
7271    sa: &mut [SaSint],
7272    n: SaSint,
7273    m: SaSint,
7274    l: isize,
7275    omp_block_start: isize,
7276    omp_block_size: isize,
7277) {
7278    let mut src_index = (n as isize - m as isize - 1 + l) as usize;
7279    let mut tmp = sa[src_index] as isize;
7280    src_index += 1;
7281
7282    let mut i = omp_block_start;
7283    let mut j = omp_block_start + omp_block_size - 6;
7284    while i < j {
7285        let iu = i as usize;
7286
7287        let c0 = t[iu];
7288        if c0 < 0 {
7289            t[iu] = c0 & SAINT_MAX;
7290            sa[tmp as usize] = i as SaSint;
7291            i += 1;
7292            tmp = sa[src_index] as isize;
7293            src_index += 1;
7294        }
7295
7296        let c1 = t[(i + 1) as usize];
7297        if c1 < 0 {
7298            t[(i + 1) as usize] = c1 & SAINT_MAX;
7299            sa[tmp as usize] = i as SaSint + 1;
7300            i += 1;
7301            tmp = sa[src_index] as isize;
7302            src_index += 1;
7303        }
7304
7305        let c2 = t[(i + 2) as usize];
7306        if c2 < 0 {
7307            t[(i + 2) as usize] = c2 & SAINT_MAX;
7308            sa[tmp as usize] = i as SaSint + 2;
7309            i += 1;
7310            tmp = sa[src_index] as isize;
7311            src_index += 1;
7312        }
7313
7314        let c3 = t[(i + 3) as usize];
7315        if c3 < 0 {
7316            t[(i + 3) as usize] = c3 & SAINT_MAX;
7317            sa[tmp as usize] = i as SaSint + 3;
7318            i += 1;
7319            tmp = sa[src_index] as isize;
7320            src_index += 1;
7321        }
7322
7323        i += 4;
7324    }
7325
7326    j += 6;
7327    while i < j {
7328        let c = t[i as usize];
7329        if c < 0 {
7330            t[i as usize] = c & SAINT_MAX;
7331            sa[tmp as usize] = i as SaSint;
7332            i += 1;
7333            tmp = sa[src_index] as isize;
7334            src_index += 1;
7335        }
7336        i += 1;
7337    }
7338}
7339
7340#[allow(dead_code)]
7341fn merge_nonunique_lms_suffixes_32s(
7342    sa: &mut [SaSint],
7343    n: SaSint,
7344    m: SaSint,
7345    l: isize,
7346    omp_block_start: isize,
7347    omp_block_size: isize,
7348) {
7349    let mut src_index = (n as isize - m as isize - 1 + l) as usize;
7350    let mut tmp = sa[src_index];
7351    src_index += 1;
7352
7353    let mut i = omp_block_start;
7354    let mut j = omp_block_start + omp_block_size - 3;
7355    while i < j {
7356        if sa[i as usize] == 0 {
7357            sa[i as usize] = tmp;
7358            tmp = sa[src_index];
7359            src_index += 1;
7360        }
7361        if sa[(i + 1) as usize] == 0 {
7362            sa[(i + 1) as usize] = tmp;
7363            tmp = sa[src_index];
7364            src_index += 1;
7365        }
7366        if sa[(i + 2) as usize] == 0 {
7367            sa[(i + 2) as usize] = tmp;
7368            tmp = sa[src_index];
7369            src_index += 1;
7370        }
7371        if sa[(i + 3) as usize] == 0 {
7372            sa[(i + 3) as usize] = tmp;
7373            tmp = sa[src_index];
7374            src_index += 1;
7375        }
7376        i += 4;
7377    }
7378
7379    j += 3;
7380    while i < j {
7381        if sa[i as usize] == 0 {
7382            sa[i as usize] = tmp;
7383            tmp = sa[src_index];
7384            src_index += 1;
7385        }
7386        i += 1;
7387    }
7388}
7389
7390#[allow(dead_code)]
7391fn merge_unique_lms_suffixes_32s_omp(
7392    t: &mut [SaSint],
7393    sa: &mut [SaSint],
7394    n: SaSint,
7395    m: SaSint,
7396    threads: SaSint,
7397) {
7398    if threads == 1 || n < 65_536 {
7399        merge_unique_lms_suffixes_32s(t, sa, n, m, 0, 0, n as isize);
7400        return;
7401    }
7402
7403    let thread_count = threads as usize;
7404    let block_stride = (n / threads) & !15;
7405    let mut counts = vec![0; thread_count];
7406
7407    for thread in 0..thread_count {
7408        let block_start = thread as SaSint * block_stride;
7409        let block_size = if thread + 1 < thread_count {
7410            block_stride
7411        } else {
7412            n - block_start
7413        };
7414        counts[thread] = count_negative_marked_suffixes(t, block_start, block_size);
7415    }
7416
7417    let mut count = 0;
7418    for thread in 0..thread_count {
7419        let block_start = thread as SaSint * block_stride;
7420        let block_size = if thread + 1 < thread_count {
7421            block_stride
7422        } else {
7423            n - block_start
7424        };
7425        merge_unique_lms_suffixes_32s(
7426            t,
7427            sa,
7428            n,
7429            m,
7430            count as isize,
7431            block_start as isize,
7432            block_size as isize,
7433        );
7434        count += counts[thread];
7435    }
7436}
7437
7438#[allow(dead_code)]
7439fn merge_nonunique_lms_suffixes_32s_omp(
7440    sa: &mut [SaSint],
7441    n: SaSint,
7442    m: SaSint,
7443    f: SaSint,
7444    threads: SaSint,
7445) {
7446    if threads == 1 || m < 65_536 {
7447        merge_nonunique_lms_suffixes_32s(sa, n, m, f as isize, 0, m as isize);
7448        return;
7449    }
7450
7451    let thread_count = threads as usize;
7452    let block_stride = (m / threads) & !15;
7453    let mut counts = vec![0; thread_count];
7454
7455    for thread in 0..thread_count {
7456        let block_start = thread as SaSint * block_stride;
7457        let block_size = if thread + 1 < thread_count {
7458            block_stride
7459        } else {
7460            m - block_start
7461        };
7462        counts[thread] = count_zero_marked_suffixes(sa, block_start, block_size);
7463    }
7464
7465    let mut count = f;
7466    for thread in 0..thread_count {
7467        let block_start = thread as SaSint * block_stride;
7468        let block_size = if thread + 1 < thread_count {
7469            block_stride
7470        } else {
7471            m - block_start
7472        };
7473        merge_nonunique_lms_suffixes_32s(
7474            sa,
7475            n,
7476            m,
7477            count as isize,
7478            block_start as isize,
7479            block_size as isize,
7480        );
7481        count += counts[thread];
7482    }
7483}
7484
7485#[allow(dead_code)]
7486fn merge_compacted_lms_suffixes_32s_omp(
7487    t: &mut [SaSint],
7488    sa: &mut [SaSint],
7489    n: SaSint,
7490    m: SaSint,
7491    f: SaSint,
7492    threads: SaSint,
7493) {
7494    merge_unique_lms_suffixes_32s_omp(t, sa, n, m, threads);
7495    merge_nonunique_lms_suffixes_32s_omp(sa, n, m, f, threads);
7496}
7497
7498#[allow(dead_code)]
7499fn reconstruct_compacted_lms_suffixes_32s_2k_omp(
7500    t: &mut [SaSint],
7501    sa: &mut [SaSint],
7502    n: SaSint,
7503    k: SaSint,
7504    m: SaSint,
7505    fs: SaSint,
7506    f: SaSint,
7507    buckets: &mut [SaSint],
7508    local_buckets: SaSint,
7509    threads: SaSint,
7510    thread_state: &mut [ThreadState],
7511) {
7512    if f > 0 {
7513        let dst = (n - m - 1) as usize;
7514        let src = (n + fs - m) as usize;
7515        sa.copy_within(src..src + f as usize, dst);
7516
7517        count_and_gather_compacted_lms_suffixes_32s_2k_omp(
7518            t,
7519            sa,
7520            n,
7521            k,
7522            buckets,
7523            local_buckets,
7524            threads,
7525            thread_state,
7526        );
7527        reconstruct_lms_suffixes_omp(sa, n, m - f, threads);
7528
7529        let dst = (n - m - 1 + f) as usize;
7530        sa.copy_within(0..(m - f) as usize, dst);
7531        sa[..m as usize].fill(0);
7532
7533        merge_compacted_lms_suffixes_32s_omp(t, sa, n, m, f, threads);
7534    } else {
7535        count_and_gather_lms_suffixes_32s_2k(t, sa, n, k, buckets, 0, n as isize);
7536        reconstruct_lms_suffixes_omp(sa, n, m, threads);
7537    }
7538}
7539
7540#[allow(dead_code)]
7541fn reconstruct_compacted_lms_suffixes_32s_1k_omp(
7542    t: &mut [SaSint],
7543    sa: &mut [SaSint],
7544    n: SaSint,
7545    m: SaSint,
7546    fs: SaSint,
7547    f: SaSint,
7548    threads: SaSint,
7549) {
7550    if f > 0 {
7551        let dst = (n - m - 1) as usize;
7552        let src = (n + fs - m) as usize;
7553        sa.copy_within(src..src + f as usize, dst);
7554
7555        gather_compacted_lms_suffixes_32s(t, sa, n);
7556        reconstruct_lms_suffixes_omp(sa, n, m - f, threads);
7557
7558        let dst = (n - m - 1 + f) as usize;
7559        sa.copy_within(0..(m - f) as usize, dst);
7560        sa[..m as usize].fill(0);
7561
7562        merge_compacted_lms_suffixes_32s_omp(t, sa, n, m, f, threads);
7563    } else {
7564        gather_lms_suffixes_32s(t, sa, n);
7565        reconstruct_lms_suffixes_omp(sa, n, m, threads);
7566    }
7567}
7568
7569#[allow(dead_code)]
7570fn place_lms_suffixes_interval_16u(
7571    sa: &mut [SaSint],
7572    n: SaSint,
7573    mut m: SaSint,
7574    flags: SaSint,
7575    buckets: &mut [SaSint],
7576) {
7577    if (flags & LIBSAIS_FLAGS_GSA) != 0 {
7578        buckets[7 * ALPHABET_SIZE] -= 1;
7579    }
7580
7581    let mut j = n as isize;
7582    let mut c = ALPHABET_SIZE as isize - 2;
7583    while c >= 0 {
7584        let ci = c as usize;
7585        let l =
7586            buckets[buckets_index2(ci, 1) + buckets_index2(1, 0)] - buckets[buckets_index2(ci, 1)];
7587        if l > 0 {
7588            let i = buckets[7 * ALPHABET_SIZE + ci] as isize;
7589            if j - i > 0 {
7590                sa[i as usize..j as usize].fill(0);
7591            }
7592
7593            m -= l;
7594            j = i - l as isize;
7595            let src = m as usize;
7596            let dst = j as usize;
7597            sa.copy_within(src..src + l as usize, dst);
7598        }
7599        c -= 1;
7600    }
7601
7602    sa[..j as usize].fill(0);
7603
7604    if (flags & LIBSAIS_FLAGS_GSA) != 0 {
7605        buckets[7 * ALPHABET_SIZE] += 1;
7606    }
7607}
7608
7609#[allow(dead_code)]
7610fn place_lms_suffixes_interval_32s_4k(
7611    sa: &mut [SaSint],
7612    n: SaSint,
7613    k: SaSint,
7614    mut m: SaSint,
7615    buckets: &[SaSint],
7616) {
7617    let bucket_end = &buckets[3 * k as usize..4 * k as usize];
7618    let mut j = n as usize;
7619    let mut c = k - 2;
7620    while c >= 0 {
7621        let cu = c as usize;
7622        let l =
7623            buckets[buckets_index2(cu, 1) + buckets_index2(1, 0)] - buckets[buckets_index2(cu, 1)];
7624        if l > 0 {
7625            let i = bucket_end[cu] as usize;
7626            if j > i {
7627                sa[i..j].fill(0);
7628            }
7629
7630            m -= l;
7631            let dst = i - l as usize;
7632            sa.copy_within(m as usize..m as usize + l as usize, dst);
7633            j = dst;
7634        }
7635        c -= 1;
7636    }
7637
7638    sa[..j].fill(0);
7639}
7640
7641#[allow(dead_code)]
7642fn place_lms_suffixes_interval_32s_2k(
7643    sa: &mut [SaSint],
7644    n: SaSint,
7645    k: SaSint,
7646    mut m: SaSint,
7647    buckets: &[SaSint],
7648) {
7649    let mut j = n as usize;
7650    if k > 1 {
7651        let mut c = buckets_index2(k as usize - 2, 0) as isize;
7652        while c >= buckets_index2(0, 0) as isize {
7653            let cu = c as usize;
7654            let l = buckets[cu + buckets_index2(1, 1)] - buckets[cu + buckets_index2(0, 1)];
7655            if l > 0 {
7656                let i = buckets[cu] as usize;
7657                if j > i {
7658                    sa[i..j].fill(0);
7659                }
7660
7661                m -= l;
7662                let dst = i - l as usize;
7663                sa.copy_within(m as usize..m as usize + l as usize, dst);
7664                j = dst;
7665            }
7666            c -= buckets_index2(1, 0) as isize;
7667        }
7668    }
7669
7670    sa[..j].fill(0);
7671}
7672
7673#[allow(dead_code)]
7674fn place_lms_suffixes_interval_32s_1k(
7675    t: &[SaSint],
7676    sa: &mut [SaSint],
7677    k: SaSint,
7678    m: SaSint,
7679    buckets: &[SaSint],
7680) {
7681    let mut c = k - 1;
7682    let mut l = buckets[c as usize] as usize;
7683
7684    let mut i = m - 1;
7685    while i >= 0 {
7686        let p = sa[i as usize] as usize;
7687        if t[p] != c {
7688            c = t[p];
7689            let bucket_pos = buckets[c as usize] as usize;
7690            if l > bucket_pos {
7691                sa[bucket_pos..l].fill(0);
7692            }
7693            l = bucket_pos;
7694        }
7695        l -= 1;
7696        sa[l] = p as SaSint;
7697        i -= 1;
7698    }
7699
7700    sa[..l].fill(0);
7701}
7702
7703#[allow(dead_code)]
7704fn place_lms_suffixes_histogram_32s_6k(
7705    sa: &mut [SaSint],
7706    n: SaSint,
7707    k: SaSint,
7708    mut m: SaSint,
7709    buckets: &[SaSint],
7710) {
7711    let bucket_end = &buckets[5 * k as usize..6 * k as usize];
7712    let mut j = n as usize;
7713    let mut c = k - 2;
7714    while c >= 0 {
7715        let l = buckets[buckets_index4(c as usize, 1)] as usize;
7716        if l > 0 {
7717            let i = bucket_end[c as usize] as usize;
7718            if j > i {
7719                sa[i..j].fill(0);
7720            }
7721            let dst = i - l;
7722            m -= l as SaSint;
7723            sa.copy_within(m as usize..m as usize + l, dst);
7724            j = dst;
7725        }
7726        c -= 1;
7727    }
7728    sa[..j].fill(0);
7729}
7730
7731#[allow(dead_code)]
7732fn place_lms_suffixes_histogram_32s_4k(
7733    sa: &mut [SaSint],
7734    n: SaSint,
7735    k: SaSint,
7736    mut m: SaSint,
7737    buckets: &[SaSint],
7738) {
7739    let bucket_end = &buckets[3 * k as usize..4 * k as usize];
7740    let mut j = n as usize;
7741    let mut c = k - 2;
7742    while c >= 0 {
7743        let l = buckets[buckets_index2(c as usize, 1)] as usize;
7744        if l > 0 {
7745            let i = bucket_end[c as usize] as usize;
7746            if j > i {
7747                sa[i..j].fill(0);
7748            }
7749            let dst = i - l;
7750            m -= l as SaSint;
7751            sa.copy_within(m as usize..m as usize + l, dst);
7752            j = dst;
7753        }
7754        c -= 1;
7755    }
7756    sa[..j].fill(0);
7757}
7758
7759#[allow(dead_code)]
7760fn place_lms_suffixes_histogram_32s_2k(
7761    sa: &mut [SaSint],
7762    n: SaSint,
7763    k: SaSint,
7764    mut m: SaSint,
7765    buckets: &[SaSint],
7766) {
7767    let mut j = n as usize;
7768    if k > 1 {
7769        let mut c = buckets_index2(k as usize - 2, 0) as isize;
7770        while c >= buckets_index2(0, 0) as isize {
7771            let cu = c as usize;
7772            let l = buckets[cu + buckets_index2(0, 1)] as usize;
7773            if l > 0 {
7774                let i = buckets[cu] as usize;
7775                if j > i {
7776                    sa[i..j].fill(0);
7777                }
7778                let dst = i - l;
7779                m -= l as SaSint;
7780                sa.copy_within(m as usize..m as usize + l, dst);
7781                j = dst;
7782            }
7783            c -= buckets_index2(1, 0) as isize;
7784        }
7785    }
7786    sa[..j].fill(0);
7787}
7788
7789#[allow(dead_code)]
7790fn final_bwt_scan_left_to_right_16u_block_prepare(
7791    t: &[u16],
7792    sa: &mut [SaSint],
7793    k: SaSint,
7794    buckets: &mut [SaSint],
7795    cache: &mut [ThreadCache],
7796    omp_block_start: SaSint,
7797    omp_block_size: SaSint,
7798) -> SaSint {
7799    buckets[..k as usize].fill(0);
7800    let mut count = 0usize;
7801    for i in omp_block_start as usize..(omp_block_start + omp_block_size) as usize {
7802        let mut p = sa[i];
7803        sa[i] = p & SAINT_MAX;
7804        if p > 0 {
7805            p -= 1;
7806            let c = t[p as usize] as usize;
7807            sa[i] = c as SaSint | SAINT_MIN;
7808            buckets[c] += 1;
7809            cache[count].symbol = c as SaSint;
7810            cache[count].index = p
7811                | ((usize::from(t[(p - SaSint::from(p > 0)) as usize] < t[p as usize]) as SaSint)
7812                    << (SAINT_BIT - 1));
7813            count += 1;
7814        }
7815    }
7816    count as SaSint
7817}
7818
7819#[allow(dead_code)]
7820fn final_sorting_scan_left_to_right_16u_block_prepare(
7821    t: &[u16],
7822    sa: &mut [SaSint],
7823    k: SaSint,
7824    buckets: &mut [SaSint],
7825    cache: &mut [ThreadCache],
7826    omp_block_start: SaSint,
7827    omp_block_size: SaSint,
7828) -> SaSint {
7829    buckets[..k as usize].fill(0);
7830    let mut count = 0usize;
7831    for i in omp_block_start as usize..(omp_block_start + omp_block_size) as usize {
7832        let mut p = sa[i];
7833        sa[i] = p ^ SAINT_MIN;
7834        if p > 0 {
7835            p -= 1;
7836            let c = t[p as usize] as usize;
7837            buckets[c] += 1;
7838            cache[count].symbol = c as SaSint;
7839            cache[count].index = p
7840                | ((usize::from(t[(p - SaSint::from(p > 0)) as usize] < t[p as usize]) as SaSint)
7841                    << (SAINT_BIT - 1));
7842            count += 1;
7843        }
7844    }
7845    count as SaSint
7846}
7847
7848#[allow(dead_code)]
7849fn final_order_scan_left_to_right_16u_block_place(
7850    sa: &mut [SaSint],
7851    buckets: &mut [SaSint],
7852    cache: &[ThreadCache],
7853    count: SaSint,
7854) {
7855    for entry in cache.iter().take(count as usize) {
7856        let c = entry.symbol as usize;
7857        let dst = buckets[c] as usize;
7858        sa[dst] = entry.index;
7859        buckets[c] += 1;
7860    }
7861}
7862
7863#[allow(dead_code)]
7864fn final_bwt_aux_scan_left_to_right_16u_block_place(
7865    sa: &mut [SaSint],
7866    rm: SaSint,
7867    i_sample: &mut [SaSint],
7868    buckets: &mut [SaSint],
7869    cache: &[ThreadCache],
7870    count: SaSint,
7871) {
7872    for entry in cache.iter().take(count as usize) {
7873        let c = entry.symbol as usize;
7874        let dst = buckets[c] as usize;
7875        sa[dst] = entry.index;
7876        buckets[c] += 1;
7877        let p = entry.index & SAINT_MAX;
7878        if (p & rm) == 0 {
7879            i_sample[(p / (rm + 1)) as usize] = buckets[c];
7880        }
7881    }
7882}
7883
7884#[allow(dead_code)]
7885fn final_bwt_scan_left_to_right_16u_block_omp(
7886    t: &[u16],
7887    sa: &mut [SaSint],
7888    k: SaSint,
7889    induction_bucket: &mut [SaSint],
7890    block_start: SaSint,
7891    block_size: SaSint,
7892    threads: SaSint,
7893    thread_state: &mut [ThreadState],
7894) {
7895    let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
7896        usize::try_from(threads)
7897            .expect("threads must be non-negative")
7898            .min(thread_state.len())
7899    } else {
7900        1
7901    };
7902    if thread_count <= 1 {
7903        final_bwt_scan_left_to_right_16u(t, sa, induction_bucket, block_start, block_size);
7904        return;
7905    }
7906
7907    let k_usize = usize::try_from(k).expect("k must be non-negative");
7908    let block_stride = (block_size / thread_count as SaSint) & !15;
7909
7910    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
7911        let local_start = thread as SaSint * block_stride;
7912        let local_size = if thread + 1 < thread_count {
7913            block_stride
7914        } else {
7915            block_size - local_start
7916        };
7917        state.count = final_bwt_scan_left_to_right_16u_block_prepare(
7918            t,
7919            sa,
7920            k,
7921            &mut state.buckets[..k_usize],
7922            &mut state.cache,
7923            block_start + local_start,
7924            local_size,
7925        );
7926    }
7927
7928    for state in thread_state.iter_mut().take(thread_count) {
7929        for c in 0..k_usize {
7930            let a = induction_bucket[c];
7931            let b = state.buckets[c];
7932            induction_bucket[c] = a + b;
7933            state.buckets[c] = a;
7934        }
7935    }
7936
7937    for state in thread_state.iter_mut().take(thread_count) {
7938        final_order_scan_left_to_right_16u_block_place(
7939            sa,
7940            &mut state.buckets[..k_usize],
7941            &state.cache,
7942            state.count,
7943        );
7944    }
7945}
7946
7947#[allow(dead_code)]
7948fn final_bwt_aux_scan_left_to_right_16u_block_omp(
7949    t: &[u16],
7950    sa: &mut [SaSint],
7951    k: SaSint,
7952    rm: SaSint,
7953    i_sample: &mut [SaSint],
7954    induction_bucket: &mut [SaSint],
7955    block_start: SaSint,
7956    block_size: SaSint,
7957    threads: SaSint,
7958    thread_state: &mut [ThreadState],
7959) {
7960    let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
7961        usize::try_from(threads)
7962            .expect("threads must be non-negative")
7963            .min(thread_state.len())
7964    } else {
7965        1
7966    };
7967    if thread_count <= 1 {
7968        final_bwt_aux_scan_left_to_right_16u(
7969            t,
7970            sa,
7971            rm,
7972            i_sample,
7973            induction_bucket,
7974            block_start,
7975            block_size,
7976        );
7977        return;
7978    }
7979
7980    let k_usize = usize::try_from(k).expect("k must be non-negative");
7981    let block_stride = (block_size / thread_count as SaSint) & !15;
7982
7983    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
7984        let local_start = thread as SaSint * block_stride;
7985        let local_size = if thread + 1 < thread_count {
7986            block_stride
7987        } else {
7988            block_size - local_start
7989        };
7990        state.count = final_bwt_scan_left_to_right_16u_block_prepare(
7991            t,
7992            sa,
7993            k,
7994            &mut state.buckets[..k_usize],
7995            &mut state.cache,
7996            block_start + local_start,
7997            local_size,
7998        );
7999    }
8000
8001    for state in thread_state.iter_mut().take(thread_count) {
8002        for c in 0..k_usize {
8003            let a = induction_bucket[c];
8004            let b = state.buckets[c];
8005            induction_bucket[c] = a + b;
8006            state.buckets[c] = a;
8007        }
8008    }
8009
8010    for state in thread_state.iter_mut().take(thread_count) {
8011        final_bwt_aux_scan_left_to_right_16u_block_place(
8012            sa,
8013            rm,
8014            i_sample,
8015            &mut state.buckets[..k_usize],
8016            &state.cache,
8017            state.count,
8018        );
8019    }
8020}
8021
8022#[allow(dead_code)]
8023fn final_sorting_scan_left_to_right_16u_block_omp(
8024    t: &[u16],
8025    sa: &mut [SaSint],
8026    k: SaSint,
8027    induction_bucket: &mut [SaSint],
8028    block_start: SaSint,
8029    block_size: SaSint,
8030    threads: SaSint,
8031    thread_state: &mut [ThreadState],
8032) {
8033    let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
8034        usize::try_from(threads)
8035            .expect("threads must be non-negative")
8036            .min(thread_state.len())
8037    } else {
8038        1
8039    };
8040    if thread_count <= 1 {
8041        final_sorting_scan_left_to_right_16u(t, sa, induction_bucket, block_start, block_size);
8042        return;
8043    }
8044
8045    let k_usize = usize::try_from(k).expect("k must be non-negative");
8046    let block_stride = (block_size / thread_count as SaSint) & !15;
8047
8048    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
8049        let local_start = thread as SaSint * block_stride;
8050        let local_size = if thread + 1 < thread_count {
8051            block_stride
8052        } else {
8053            block_size - local_start
8054        };
8055        state.count = final_sorting_scan_left_to_right_16u_block_prepare(
8056            t,
8057            sa,
8058            k,
8059            &mut state.buckets[..k_usize],
8060            &mut state.cache,
8061            block_start + local_start,
8062            local_size,
8063        );
8064    }
8065
8066    for state in thread_state.iter_mut().take(thread_count) {
8067        for c in 0..k_usize {
8068            let a = induction_bucket[c];
8069            let b = state.buckets[c];
8070            induction_bucket[c] = a + b;
8071            state.buckets[c] = a;
8072        }
8073    }
8074
8075    for state in thread_state.iter_mut().take(thread_count) {
8076        final_order_scan_left_to_right_16u_block_place(
8077            sa,
8078            &mut state.buckets[..k_usize],
8079            &state.cache,
8080            state.count,
8081        );
8082    }
8083}
8084
8085#[allow(dead_code)]
8086fn final_bwt_scan_left_to_right_16u_omp(
8087    t: &[u16],
8088    sa: &mut [SaSint],
8089    n: SaSint,
8090    k: SaSint,
8091    induction_bucket: &mut [SaSint],
8092    threads: SaSint,
8093) {
8094    let c = t[(n - 1) as usize] as usize;
8095    let dst = induction_bucket[c] as usize;
8096    induction_bucket[c] += 1;
8097    let mark = if t[(n - 2) as usize] < t[(n - 1) as usize] {
8098        SAINT_MIN
8099    } else {
8100        0
8101    };
8102    sa[dst] = (n - 1) | mark;
8103
8104    if threads == 1 || n < 65536 {
8105        final_bwt_scan_left_to_right_16u(t, sa, induction_bucket, 0, n);
8106    } else {
8107        let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
8108        let mut block_start = 0;
8109        while block_start < n {
8110            if sa[block_start as usize] == 0 {
8111                block_start += 1;
8112            } else {
8113                let mut block_end =
8114                    block_start + threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
8115                if block_end > n {
8116                    block_end = n;
8117                }
8118                let mut block_scan_end = block_start + 1;
8119                while block_scan_end < block_end && sa[block_scan_end as usize] != 0 {
8120                    block_scan_end += 1;
8121                }
8122                let block_size = block_scan_end - block_start;
8123                if block_size < 32 {
8124                    while block_start < block_scan_end {
8125                        let mut p = sa[block_start as usize];
8126                        sa[block_start as usize] = p & SAINT_MAX;
8127                        if p > 0 {
8128                            p -= 1;
8129                            let c = t[p as usize] as usize;
8130                            sa[block_start as usize] = c as SaSint | SAINT_MIN;
8131                            let dst = induction_bucket[c] as usize;
8132                            induction_bucket[c] += 1;
8133                            let mark = if t[(p - SaSint::from(p > 0)) as usize] < t[p as usize] {
8134                                SAINT_MIN
8135                            } else {
8136                                0
8137                            };
8138                            sa[dst] = p | mark;
8139                        }
8140                        block_start += 1;
8141                    }
8142                } else {
8143                    final_bwt_scan_left_to_right_16u_block_omp(
8144                        t,
8145                        sa,
8146                        k,
8147                        induction_bucket,
8148                        block_start,
8149                        block_size,
8150                        threads,
8151                        &mut thread_state,
8152                    );
8153                    block_start = block_scan_end;
8154                }
8155            }
8156        }
8157    }
8158}
8159
8160#[allow(dead_code)]
8161fn final_bwt_aux_scan_left_to_right_16u_omp(
8162    t: &[u16],
8163    sa: &mut [SaSint],
8164    n: SaSint,
8165    k: SaSint,
8166    rm: SaSint,
8167    i_sample: &mut [SaSint],
8168    induction_bucket: &mut [SaSint],
8169    threads: SaSint,
8170) {
8171    let c = t[(n - 1) as usize] as usize;
8172    let dst = induction_bucket[c] as usize;
8173    induction_bucket[c] += 1;
8174    let mark = if t[(n - 2) as usize] < t[(n - 1) as usize] {
8175        SAINT_MIN
8176    } else {
8177        0
8178    };
8179    sa[dst] = (n - 1) | mark;
8180
8181    if ((n - 1) & rm) == 0 {
8182        i_sample[((n - 1) / (rm + 1)) as usize] = induction_bucket[c];
8183    }
8184
8185    if threads == 1 || n < 65536 {
8186        final_bwt_aux_scan_left_to_right_16u(t, sa, rm, i_sample, induction_bucket, 0, n);
8187    } else {
8188        let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
8189        let mut block_start = 0;
8190        while block_start < n {
8191            if sa[block_start as usize] == 0 {
8192                block_start += 1;
8193            } else {
8194                let mut block_end =
8195                    block_start + threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
8196                if block_end > n {
8197                    block_end = n;
8198                }
8199                let mut block_scan_end = block_start + 1;
8200                while block_scan_end < block_end && sa[block_scan_end as usize] != 0 {
8201                    block_scan_end += 1;
8202                }
8203                let block_size = block_scan_end - block_start;
8204                if block_size < 32 {
8205                    while block_start < block_scan_end {
8206                        let mut p = sa[block_start as usize];
8207                        sa[block_start as usize] = p & SAINT_MAX;
8208                        if p > 0 {
8209                            p -= 1;
8210                            let c = t[p as usize] as usize;
8211                            sa[block_start as usize] = c as SaSint | SAINT_MIN;
8212                            let dst = induction_bucket[c] as usize;
8213                            induction_bucket[c] += 1;
8214                            let mark = if t[(p - SaSint::from(p > 0)) as usize] < t[p as usize] {
8215                                SAINT_MIN
8216                            } else {
8217                                0
8218                            };
8219                            sa[dst] = p | mark;
8220                            if (p & rm) == 0 {
8221                                i_sample[(p / (rm + 1)) as usize] = induction_bucket[c];
8222                            }
8223                        }
8224                        block_start += 1;
8225                    }
8226                } else {
8227                    final_bwt_aux_scan_left_to_right_16u_block_omp(
8228                        t,
8229                        sa,
8230                        k,
8231                        rm,
8232                        i_sample,
8233                        induction_bucket,
8234                        block_start,
8235                        block_size,
8236                        threads,
8237                        &mut thread_state,
8238                    );
8239                    block_start = block_scan_end;
8240                }
8241            }
8242        }
8243    }
8244}
8245
8246#[allow(dead_code)]
8247fn final_sorting_scan_left_to_right_16u_omp(
8248    t: &[u16],
8249    sa: &mut [SaSint],
8250    n: SaSint,
8251    k: SaSint,
8252    induction_bucket: &mut [SaSint],
8253    threads: SaSint,
8254) {
8255    let c = t[(n - 1) as usize] as usize;
8256    let dst = induction_bucket[c] as usize;
8257    induction_bucket[c] += 1;
8258    let mark = if t[(n - 2) as usize] < t[(n - 1) as usize] {
8259        SAINT_MIN
8260    } else {
8261        0
8262    };
8263    sa[dst] = (n - 1) | mark;
8264
8265    if threads == 1 || n < 65536 {
8266        final_sorting_scan_left_to_right_16u(t, sa, induction_bucket, 0, n);
8267    } else {
8268        let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
8269        let mut block_start = 0;
8270        while block_start < n {
8271            if sa[block_start as usize] == 0 {
8272                block_start += 1;
8273            } else {
8274                let mut block_end =
8275                    block_start + threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
8276                if block_end > n {
8277                    block_end = n;
8278                }
8279                let mut block_scan_end = block_start + 1;
8280                while block_scan_end < block_end && sa[block_scan_end as usize] != 0 {
8281                    block_scan_end += 1;
8282                }
8283                let block_size = block_scan_end - block_start;
8284                if block_size < 32 {
8285                    while block_start < block_scan_end {
8286                        let mut p = sa[block_start as usize];
8287                        sa[block_start as usize] = p ^ SAINT_MIN;
8288                        if p > 0 {
8289                            p -= 1;
8290                            let c = t[p as usize] as usize;
8291                            let dst = induction_bucket[c] as usize;
8292                            induction_bucket[c] += 1;
8293                            let mark = if t[(p - SaSint::from(p > 0)) as usize] < t[p as usize] {
8294                                SAINT_MIN
8295                            } else {
8296                                0
8297                            };
8298                            sa[dst] = p | mark;
8299                        }
8300                        block_start += 1;
8301                    }
8302                } else {
8303                    final_sorting_scan_left_to_right_16u_block_omp(
8304                        t,
8305                        sa,
8306                        k,
8307                        induction_bucket,
8308                        block_start,
8309                        block_size,
8310                        threads,
8311                        &mut thread_state,
8312                    );
8313                    block_start = block_scan_end;
8314                }
8315            }
8316        }
8317    }
8318}
8319
8320#[allow(dead_code)]
8321fn final_bwt_scan_right_to_left_16u_block_prepare(
8322    t: &[u16],
8323    sa: &mut [SaSint],
8324    k: SaSint,
8325    buckets: &mut [SaSint],
8326    cache: &mut [ThreadCache],
8327    omp_block_start: SaSint,
8328    omp_block_size: SaSint,
8329) -> SaSint {
8330    buckets[..k as usize].fill(0);
8331    let mut count = 0usize;
8332    for i in (omp_block_start as usize..(omp_block_start + omp_block_size) as usize).rev() {
8333        let mut p = sa[i];
8334        sa[i] = p & SAINT_MAX;
8335        if p > 0 {
8336            p -= 1;
8337            let c0 = t[(p - SaSint::from(p > 0)) as usize];
8338            let c1 = t[p as usize];
8339            sa[i] = c1 as SaSint;
8340            buckets[c1 as usize] += 1;
8341            cache[count].symbol = c1 as SaSint;
8342            cache[count].index = if c0 <= c1 {
8343                p
8344            } else {
8345                c0 as SaSint | SAINT_MIN
8346            };
8347            count += 1;
8348        }
8349    }
8350    count as SaSint
8351}
8352
8353#[allow(dead_code)]
8354fn final_bwt_aux_scan_right_to_left_16u_block_prepare(
8355    t: &[u16],
8356    sa: &mut [SaSint],
8357    k: SaSint,
8358    buckets: &mut [SaSint],
8359    cache: &mut [ThreadCache],
8360    omp_block_start: SaSint,
8361    omp_block_size: SaSint,
8362) -> SaSint {
8363    buckets[..k as usize].fill(0);
8364    let mut count = 0usize;
8365    for i in (omp_block_start as usize..(omp_block_start + omp_block_size) as usize).rev() {
8366        let mut p = sa[i];
8367        sa[i] = p & SAINT_MAX;
8368        if p > 0 {
8369            p -= 1;
8370            let c0 = t[(p - SaSint::from(p > 0)) as usize];
8371            let c1 = t[p as usize];
8372            sa[i] = c1 as SaSint;
8373            buckets[c1 as usize] += 1;
8374            cache[count].symbol = c1 as SaSint;
8375            cache[count].index = if c0 <= c1 {
8376                p
8377            } else {
8378                c0 as SaSint | SAINT_MIN
8379            };
8380            cache[count + 1].index = p;
8381            count += 2;
8382        }
8383    }
8384    count as SaSint
8385}
8386
8387#[allow(dead_code)]
8388fn final_sorting_scan_right_to_left_16u_block_prepare(
8389    t: &[u16],
8390    sa: &mut [SaSint],
8391    k: SaSint,
8392    buckets: &mut [SaSint],
8393    cache: &mut [ThreadCache],
8394    omp_block_start: SaSint,
8395    omp_block_size: SaSint,
8396) -> SaSint {
8397    buckets[..k as usize].fill(0);
8398    let mut count = 0usize;
8399    for i in (omp_block_start as usize..(omp_block_start + omp_block_size) as usize).rev() {
8400        let mut p = sa[i];
8401        sa[i] = p & SAINT_MAX;
8402        if p > 0 {
8403            p -= 1;
8404            let c = t[p as usize] as usize;
8405            buckets[c] += 1;
8406            cache[count].symbol = c as SaSint;
8407            cache[count].index = p
8408                | ((usize::from(t[(p - SaSint::from(p > 0)) as usize] > t[p as usize]) as SaSint)
8409                    << (SAINT_BIT - 1));
8410            count += 1;
8411        }
8412    }
8413    count as SaSint
8414}
8415
8416#[allow(dead_code)]
8417fn final_order_scan_right_to_left_16u_block_place(
8418    sa: &mut [SaSint],
8419    buckets: &mut [SaSint],
8420    cache: &[ThreadCache],
8421    count: SaSint,
8422) {
8423    for entry in cache.iter().take(count as usize) {
8424        let c = entry.symbol as usize;
8425        buckets[c] -= 1;
8426        sa[buckets[c] as usize] = entry.index;
8427    }
8428}
8429
8430#[allow(dead_code)]
8431fn final_gsa_scan_right_to_left_16u_block_place(
8432    sa: &mut [SaSint],
8433    buckets: &mut [SaSint],
8434    cache: &[ThreadCache],
8435    count: SaSint,
8436) {
8437    for entry in cache.iter().take(count as usize) {
8438        let c = entry.symbol as usize;
8439        if c > 0 {
8440            buckets[c] -= 1;
8441            sa[buckets[c] as usize] = entry.index;
8442        }
8443    }
8444}
8445
8446#[allow(dead_code)]
8447fn final_bwt_aux_scan_right_to_left_16u_block_place(
8448    sa: &mut [SaSint],
8449    rm: SaSint,
8450    i_sample: &mut [SaSint],
8451    buckets: &mut [SaSint],
8452    cache: &[ThreadCache],
8453    count: SaSint,
8454) {
8455    let mut i = 0usize;
8456    while i < count as usize {
8457        let c = cache[i].symbol as usize;
8458        buckets[c] -= 1;
8459        sa[buckets[c] as usize] = cache[i].index;
8460        let p = cache[i + 1].index;
8461        if (p & rm) == 0 {
8462            i_sample[(p / (rm + 1)) as usize] = buckets[c] + 1;
8463        }
8464        i += 2;
8465    }
8466}
8467
8468#[allow(dead_code)]
8469fn final_bwt_scan_right_to_left_16u_block_omp(
8470    t: &[u16],
8471    sa: &mut [SaSint],
8472    k: SaSint,
8473    induction_bucket: &mut [SaSint],
8474    block_start: SaSint,
8475    block_size: SaSint,
8476    threads: SaSint,
8477    thread_state: &mut [ThreadState],
8478) -> SaSint {
8479    let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
8480        usize::try_from(threads)
8481            .expect("threads must be non-negative")
8482            .min(thread_state.len())
8483    } else {
8484        1
8485    };
8486    if thread_count <= 1 {
8487        return final_bwt_scan_right_to_left_16u(t, sa, induction_bucket, block_start, block_size);
8488    }
8489
8490    let k_usize = usize::try_from(k).expect("k must be non-negative");
8491    let block_stride = (block_size / thread_count as SaSint) & !15;
8492
8493    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
8494        let local_start = thread as SaSint * block_stride;
8495        let local_size = if thread + 1 < thread_count {
8496            block_stride
8497        } else {
8498            block_size - local_start
8499        };
8500        state.count = final_bwt_scan_right_to_left_16u_block_prepare(
8501            t,
8502            sa,
8503            k,
8504            &mut state.buckets[..k_usize],
8505            &mut state.cache,
8506            block_start + local_start,
8507            local_size,
8508        );
8509    }
8510
8511    for state in thread_state.iter_mut().take(thread_count).rev() {
8512        for c in 0..k_usize {
8513            let a = induction_bucket[c];
8514            let b = state.buckets[c];
8515            induction_bucket[c] = a - b;
8516            state.buckets[c] = a;
8517        }
8518    }
8519
8520    for state in thread_state.iter_mut().take(thread_count) {
8521        final_order_scan_right_to_left_16u_block_place(
8522            sa,
8523            &mut state.buckets[..k_usize],
8524            &state.cache,
8525            state.count,
8526        );
8527    }
8528
8529    -1
8530}
8531
8532#[allow(dead_code)]
8533fn final_bwt_aux_scan_right_to_left_16u_block_omp(
8534    t: &[u16],
8535    sa: &mut [SaSint],
8536    k: SaSint,
8537    rm: SaSint,
8538    i_sample: &mut [SaSint],
8539    induction_bucket: &mut [SaSint],
8540    block_start: SaSint,
8541    block_size: SaSint,
8542    threads: SaSint,
8543    thread_state: &mut [ThreadState],
8544) {
8545    let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
8546        usize::try_from(threads)
8547            .expect("threads must be non-negative")
8548            .min(thread_state.len())
8549    } else {
8550        1
8551    };
8552    if thread_count <= 1 {
8553        final_bwt_aux_scan_right_to_left_16u(
8554            t,
8555            sa,
8556            rm,
8557            i_sample,
8558            induction_bucket,
8559            block_start,
8560            block_size,
8561        );
8562        return;
8563    }
8564
8565    let k_usize = usize::try_from(k).expect("k must be non-negative");
8566    let block_stride = (block_size / thread_count as SaSint) & !15;
8567
8568    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
8569        let local_start = thread as SaSint * block_stride;
8570        let local_size = if thread + 1 < thread_count {
8571            block_stride
8572        } else {
8573            block_size - local_start
8574        };
8575        state.count = final_bwt_aux_scan_right_to_left_16u_block_prepare(
8576            t,
8577            sa,
8578            k,
8579            &mut state.buckets[..k_usize],
8580            &mut state.cache,
8581            block_start + local_start,
8582            local_size,
8583        );
8584    }
8585
8586    for state in thread_state.iter_mut().take(thread_count).rev() {
8587        for c in 0..k_usize {
8588            let a = induction_bucket[c];
8589            let b = state.buckets[c];
8590            induction_bucket[c] = a - b;
8591            state.buckets[c] = a;
8592        }
8593    }
8594
8595    for state in thread_state.iter_mut().take(thread_count) {
8596        final_bwt_aux_scan_right_to_left_16u_block_place(
8597            sa,
8598            rm,
8599            i_sample,
8600            &mut state.buckets[..k_usize],
8601            &state.cache,
8602            state.count,
8603        );
8604    }
8605}
8606
8607#[allow(dead_code)]
8608fn final_sorting_scan_right_to_left_16u_block_omp(
8609    t: &[u16],
8610    sa: &mut [SaSint],
8611    k: SaSint,
8612    induction_bucket: &mut [SaSint],
8613    block_start: SaSint,
8614    block_size: SaSint,
8615    threads: SaSint,
8616    thread_state: &mut [ThreadState],
8617) {
8618    let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
8619        usize::try_from(threads)
8620            .expect("threads must be non-negative")
8621            .min(thread_state.len())
8622    } else {
8623        1
8624    };
8625    if thread_count <= 1 {
8626        final_sorting_scan_right_to_left_16u(t, sa, induction_bucket, block_start, block_size);
8627        return;
8628    }
8629
8630    let k_usize = usize::try_from(k).expect("k must be non-negative");
8631    let block_stride = (block_size / thread_count as SaSint) & !15;
8632
8633    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
8634        let local_start = thread as SaSint * block_stride;
8635        let local_size = if thread + 1 < thread_count {
8636            block_stride
8637        } else {
8638            block_size - local_start
8639        };
8640        state.count = final_sorting_scan_right_to_left_16u_block_prepare(
8641            t,
8642            sa,
8643            k,
8644            &mut state.buckets[..k_usize],
8645            &mut state.cache,
8646            block_start + local_start,
8647            local_size,
8648        );
8649    }
8650
8651    for state in thread_state.iter_mut().take(thread_count).rev() {
8652        for c in 0..k_usize {
8653            let a = induction_bucket[c];
8654            let b = state.buckets[c];
8655            induction_bucket[c] = a - b;
8656            state.buckets[c] = a;
8657        }
8658    }
8659
8660    for state in thread_state.iter_mut().take(thread_count) {
8661        final_order_scan_right_to_left_16u_block_place(
8662            sa,
8663            &mut state.buckets[..k_usize],
8664            &state.cache,
8665            state.count,
8666        );
8667    }
8668}
8669
8670#[allow(dead_code)]
8671fn final_gsa_scan_right_to_left_16u_block_omp(
8672    t: &[u16],
8673    sa: &mut [SaSint],
8674    k: SaSint,
8675    induction_bucket: &mut [SaSint],
8676    block_start: SaSint,
8677    block_size: SaSint,
8678    threads: SaSint,
8679    thread_state: &mut [ThreadState],
8680) {
8681    let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
8682        usize::try_from(threads)
8683            .expect("threads must be non-negative")
8684            .min(thread_state.len())
8685    } else {
8686        1
8687    };
8688    if thread_count <= 1 {
8689        final_gsa_scan_right_to_left_16u(t, sa, induction_bucket, block_start, block_size);
8690        return;
8691    }
8692
8693    let k_usize = usize::try_from(k).expect("k must be non-negative");
8694    let block_stride = (block_size / thread_count as SaSint) & !15;
8695
8696    for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
8697        let local_start = thread as SaSint * block_stride;
8698        let local_size = if thread + 1 < thread_count {
8699            block_stride
8700        } else {
8701            block_size - local_start
8702        };
8703        state.count = final_sorting_scan_right_to_left_16u_block_prepare(
8704            t,
8705            sa,
8706            k,
8707            &mut state.buckets[..k_usize],
8708            &mut state.cache,
8709            block_start + local_start,
8710            local_size,
8711        );
8712    }
8713
8714    for state in thread_state.iter_mut().take(thread_count).rev() {
8715        for c in 0..k_usize {
8716            let a = induction_bucket[c];
8717            let b = state.buckets[c];
8718            induction_bucket[c] = a - b;
8719            state.buckets[c] = a;
8720        }
8721    }
8722
8723    for state in thread_state.iter_mut().take(thread_count) {
8724        final_gsa_scan_right_to_left_16u_block_place(
8725            sa,
8726            &mut state.buckets[..k_usize],
8727            &state.cache,
8728            state.count,
8729        );
8730    }
8731}
8732
8733#[allow(dead_code)]
8734fn final_bwt_scan_right_to_left_16u_omp(
8735    t: &[u16],
8736    sa: &mut [SaSint],
8737    n: SaSint,
8738    k: SaSint,
8739    induction_bucket: &mut [SaSint],
8740    threads: SaSint,
8741) -> SaSint {
8742    let mut index = -1;
8743
8744    if threads == 1 || n < 65536 {
8745        index = final_bwt_scan_right_to_left_16u(t, sa, induction_bucket, 0, n);
8746    } else {
8747        let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
8748        let mut block_start = n - 1;
8749        while block_start >= 0 {
8750            if sa[block_start as usize] == 0 {
8751                index = block_start;
8752                block_start -= 1;
8753            } else {
8754                let mut block_max_end =
8755                    block_start - threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
8756                if block_max_end < 0 {
8757                    block_max_end = -1;
8758                }
8759                let mut block_end = block_start - 1;
8760                while block_end > block_max_end && sa[block_end as usize] != 0 {
8761                    block_end -= 1;
8762                }
8763                let block_size = block_start - block_end;
8764                if block_size < 32 {
8765                    while block_start > block_end {
8766                        let mut p = sa[block_start as usize];
8767                        sa[block_start as usize] = p & SAINT_MAX;
8768                        if p > 0 {
8769                            p -= 1;
8770                            let c0 = t[(p - SaSint::from(p > 0)) as usize];
8771                            let c1 = t[p as usize] as usize;
8772                            sa[block_start as usize] = c1 as SaSint;
8773                            induction_bucket[c1] -= 1;
8774                            sa[induction_bucket[c1] as usize] = if c0 <= c1 as u16 {
8775                                p
8776                            } else {
8777                                c0 as SaSint | SAINT_MIN
8778                            };
8779                        }
8780                        block_start -= 1;
8781                    }
8782                } else {
8783                    final_bwt_scan_right_to_left_16u_block_omp(
8784                        t,
8785                        sa,
8786                        k,
8787                        induction_bucket,
8788                        block_end + 1,
8789                        block_size,
8790                        threads,
8791                        &mut thread_state,
8792                    );
8793                    block_start = block_end;
8794                }
8795            }
8796        }
8797    }
8798    index
8799}
8800
8801#[allow(dead_code)]
8802fn final_bwt_aux_scan_right_to_left_16u_omp(
8803    t: &[u16],
8804    sa: &mut [SaSint],
8805    n: SaSint,
8806    k: SaSint,
8807    rm: SaSint,
8808    i_sample: &mut [SaSint],
8809    induction_bucket: &mut [SaSint],
8810    threads: SaSint,
8811) {
8812    if threads == 1 || n < 65536 {
8813        final_bwt_aux_scan_right_to_left_16u(t, sa, rm, i_sample, induction_bucket, 0, n);
8814    } else {
8815        let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
8816        let mut block_start = n - 1;
8817        while block_start >= 0 {
8818            if sa[block_start as usize] == 0 {
8819                block_start -= 1;
8820            } else {
8821                let mut block_max_end =
8822                    block_start - threads * ((PER_THREAD_CACHE_SIZE as SaSint - 16 * threads) / 2);
8823                if block_max_end < 0 {
8824                    block_max_end = -1;
8825                }
8826                let mut block_end = block_start - 1;
8827                while block_end > block_max_end && sa[block_end as usize] != 0 {
8828                    block_end -= 1;
8829                }
8830                let block_size = block_start - block_end;
8831                if block_size < 32 {
8832                    while block_start > block_end {
8833                        let mut p = sa[block_start as usize];
8834                        sa[block_start as usize] = p & SAINT_MAX;
8835                        if p > 0 {
8836                            p -= 1;
8837                            let c0 = t[(p - SaSint::from(p > 0)) as usize];
8838                            let c1 = t[p as usize] as usize;
8839                            sa[block_start as usize] = c1 as SaSint;
8840                            induction_bucket[c1] -= 1;
8841                            sa[induction_bucket[c1] as usize] = if c0 <= c1 as u16 {
8842                                p
8843                            } else {
8844                                c0 as SaSint | SAINT_MIN
8845                            };
8846                            if (p & rm) == 0 {
8847                                i_sample[(p / (rm + 1)) as usize] = induction_bucket[c1] + 1;
8848                            }
8849                        }
8850                        block_start -= 1;
8851                    }
8852                } else {
8853                    final_bwt_aux_scan_right_to_left_16u_block_omp(
8854                        t,
8855                        sa,
8856                        k,
8857                        rm,
8858                        i_sample,
8859                        induction_bucket,
8860                        block_end + 1,
8861                        block_size,
8862                        threads,
8863                        &mut thread_state,
8864                    );
8865                    block_start = block_end;
8866                }
8867            }
8868        }
8869    }
8870}
8871
8872#[allow(dead_code)]
8873fn final_sorting_scan_right_to_left_16u_omp(
8874    t: &[u16],
8875    sa: &mut [SaSint],
8876    omp_block_start: SaSint,
8877    omp_block_size: SaSint,
8878    k: SaSint,
8879    induction_bucket: &mut [SaSint],
8880    threads: SaSint,
8881) {
8882    if threads == 1 || omp_block_size < 65536 {
8883        final_sorting_scan_right_to_left_16u(
8884            t,
8885            sa,
8886            induction_bucket,
8887            omp_block_start,
8888            omp_block_size,
8889        );
8890    } else {
8891        let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
8892        let mut block_start = omp_block_start + omp_block_size - 1;
8893        while block_start >= omp_block_start {
8894            if sa[block_start as usize] == 0 {
8895                block_start -= 1;
8896            } else {
8897                let mut block_max_end =
8898                    block_start - threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
8899                if block_max_end < omp_block_start {
8900                    block_max_end = omp_block_start - 1;
8901                }
8902                let mut block_end = block_start - 1;
8903                while block_end > block_max_end && sa[block_end as usize] != 0 {
8904                    block_end -= 1;
8905                }
8906                let block_size = block_start - block_end;
8907                if block_size < 32 {
8908                    while block_start > block_end {
8909                        let mut p = sa[block_start as usize];
8910                        sa[block_start as usize] = p & SAINT_MAX;
8911                        if p > 0 {
8912                            p -= 1;
8913                            let c = t[p as usize] as usize;
8914                            induction_bucket[c] -= 1;
8915                            let mark = if t[(p - SaSint::from(p > 0)) as usize] > t[p as usize] {
8916                                SAINT_MIN
8917                            } else {
8918                                0
8919                            };
8920                            sa[induction_bucket[c] as usize] = p | mark;
8921                        }
8922                        block_start -= 1;
8923                    }
8924                } else {
8925                    final_sorting_scan_right_to_left_16u_block_omp(
8926                        t,
8927                        sa,
8928                        k,
8929                        induction_bucket,
8930                        block_end + 1,
8931                        block_size,
8932                        threads,
8933                        &mut thread_state,
8934                    );
8935                    block_start = block_end;
8936                }
8937            }
8938        }
8939    }
8940}
8941
8942#[allow(dead_code)]
8943fn final_gsa_scan_right_to_left_16u_omp(
8944    t: &[u16],
8945    sa: &mut [SaSint],
8946    omp_block_start: SaSint,
8947    omp_block_size: SaSint,
8948    k: SaSint,
8949    induction_bucket: &mut [SaSint],
8950    threads: SaSint,
8951) {
8952    if threads == 1 || omp_block_size < 65536 {
8953        final_gsa_scan_right_to_left_16u(t, sa, induction_bucket, omp_block_start, omp_block_size);
8954    } else {
8955        let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
8956        let mut block_start = omp_block_start + omp_block_size - 1;
8957        while block_start >= omp_block_start {
8958            if sa[block_start as usize] == 0 {
8959                block_start -= 1;
8960            } else {
8961                let mut block_max_end =
8962                    block_start - threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
8963                if block_max_end < omp_block_start {
8964                    block_max_end = omp_block_start - 1;
8965                }
8966                let mut block_end = block_start - 1;
8967                while block_end > block_max_end && sa[block_end as usize] != 0 {
8968                    block_end -= 1;
8969                }
8970                let block_size = block_start - block_end;
8971                if block_size < 32 {
8972                    while block_start > block_end {
8973                        let mut p = sa[block_start as usize];
8974                        sa[block_start as usize] = p & SAINT_MAX;
8975                        if p > 0 && t[(p - 1) as usize] > 0 {
8976                            p -= 1;
8977                            let c = t[p as usize] as usize;
8978                            induction_bucket[c] -= 1;
8979                            let mark = if t[(p - SaSint::from(p > 0)) as usize] > t[p as usize] {
8980                                SAINT_MIN
8981                            } else {
8982                                0
8983                            };
8984                            sa[induction_bucket[c] as usize] = p | mark;
8985                        }
8986                        block_start -= 1;
8987                    }
8988                } else {
8989                    final_gsa_scan_right_to_left_16u_block_omp(
8990                        t,
8991                        sa,
8992                        k,
8993                        induction_bucket,
8994                        block_end + 1,
8995                        block_size,
8996                        threads,
8997                        &mut thread_state,
8998                    );
8999                    block_start = block_end;
9000                }
9001            }
9002        }
9003    }
9004}
9005
9006#[allow(dead_code)]
9007fn induce_final_order_16u_omp(
9008    t: &[u16],
9009    sa: &mut [SaSint],
9010    n: SaSint,
9011    k: SaSint,
9012    flags: SaSint,
9013    r: SaSint,
9014    i_out: Option<&mut [SaSint]>,
9015    buckets: &mut [SaSint],
9016    threads: SaSint,
9017    _thread_state: &mut [ThreadState],
9018) -> SaSint {
9019    if (flags & LIBSAIS_FLAGS_BWT) == 0 {
9020        if (flags & LIBSAIS_FLAGS_GSA) != 0 {
9021            buckets[6 * ALPHABET_SIZE] = buckets[7 * ALPHABET_SIZE] - 1;
9022        }
9023
9024        let (left_buckets, right_tail) = buckets.split_at_mut(7 * ALPHABET_SIZE);
9025        let bucket_start = &mut left_buckets[6 * ALPHABET_SIZE..7 * ALPHABET_SIZE];
9026        let bucket_end = &mut right_tail[..ALPHABET_SIZE];
9027
9028        final_sorting_scan_left_to_right_16u_omp(t, sa, n, k, bucket_start, threads);
9029        if threads > 1 && n >= 65_536 {
9030            clear_lms_suffixes_omp(
9031                sa,
9032                n,
9033                ALPHABET_SIZE as SaSint,
9034                bucket_start,
9035                bucket_end,
9036                threads,
9037            );
9038        }
9039
9040        if (flags & LIBSAIS_FLAGS_GSA) != 0 {
9041            flip_suffix_markers_omp(sa, bucket_end[0], threads);
9042            final_gsa_scan_right_to_left_16u_omp(
9043                t,
9044                sa,
9045                bucket_end[0],
9046                n - bucket_end[0],
9047                k,
9048                bucket_end,
9049                threads,
9050            );
9051        } else {
9052            final_sorting_scan_right_to_left_16u_omp(t, sa, 0, n, k, bucket_end, threads);
9053        }
9054
9055        0
9056    } else if let Some(i_out) = i_out {
9057        let (left_buckets, right_tail) = buckets.split_at_mut(7 * ALPHABET_SIZE);
9058        let bucket_start = &mut left_buckets[6 * ALPHABET_SIZE..7 * ALPHABET_SIZE];
9059        let bucket_end = &mut right_tail[..ALPHABET_SIZE];
9060
9061        final_bwt_aux_scan_left_to_right_16u_omp(t, sa, n, k, r - 1, i_out, bucket_start, threads);
9062        if threads > 1 && n >= 65_536 {
9063            clear_lms_suffixes_omp(
9064                sa,
9065                n,
9066                ALPHABET_SIZE as SaSint,
9067                bucket_start,
9068                bucket_end,
9069                threads,
9070            );
9071        }
9072        final_bwt_aux_scan_right_to_left_16u_omp(t, sa, n, k, r - 1, i_out, bucket_end, threads);
9073        0
9074    } else {
9075        let (left_buckets, right_tail) = buckets.split_at_mut(7 * ALPHABET_SIZE);
9076        let bucket_start = &mut left_buckets[6 * ALPHABET_SIZE..7 * ALPHABET_SIZE];
9077        let bucket_end = &mut right_tail[..ALPHABET_SIZE];
9078
9079        final_bwt_scan_left_to_right_16u_omp(t, sa, n, k, bucket_start, threads);
9080        if threads > 1 && n >= 65_536 {
9081            clear_lms_suffixes_omp(
9082                sa,
9083                n,
9084                ALPHABET_SIZE as SaSint,
9085                bucket_start,
9086                bucket_end,
9087                threads,
9088            );
9089        }
9090        final_bwt_scan_right_to_left_16u_omp(t, sa, n, k, bucket_end, threads)
9091    }
9092}
9093
9094#[allow(dead_code)]
9095fn bwt_copy_16u(u: &mut [u16], a: &[SaSint], n: SaSint) {
9096    let mut i = 0isize;
9097    let mut j = n as isize - 7;
9098    while i < j {
9099        u[i as usize] = a[i as usize] as u16;
9100        u[(i + 1) as usize] = a[(i + 1) as usize] as u16;
9101        u[(i + 2) as usize] = a[(i + 2) as usize] as u16;
9102        u[(i + 3) as usize] = a[(i + 3) as usize] as u16;
9103        u[(i + 4) as usize] = a[(i + 4) as usize] as u16;
9104        u[(i + 5) as usize] = a[(i + 5) as usize] as u16;
9105        u[(i + 6) as usize] = a[(i + 6) as usize] as u16;
9106        u[(i + 7) as usize] = a[(i + 7) as usize] as u16;
9107        i += 8;
9108    }
9109
9110    j += 7;
9111    while i < j {
9112        u[i as usize] = a[i as usize] as u16;
9113        i += 1;
9114    }
9115}
9116
9117#[allow(dead_code)]
9118fn bwt_copy_16u_omp(u: &mut [u16], a: &[SaSint], n: SaSint, threads: SaSint) {
9119    if threads == 1 || n < 65_536 {
9120        bwt_copy_16u(u, a, n);
9121        return;
9122    }
9123
9124    let block_stride = (n / threads) & !15;
9125    for thread in 0..threads {
9126        let block_start = thread * block_stride;
9127        let block_size = if thread < threads - 1 {
9128            block_stride
9129        } else {
9130            n - block_start
9131        };
9132        let start = block_start as usize;
9133        bwt_copy_16u(&mut u[start..], &a[start..], block_size);
9134    }
9135}
9136
9137#[allow(dead_code)]
9138fn final_bwt_ltr_step(t: &[u16], sa: &mut [SaSint], induction_bucket: &mut [SaSint], index: usize) {
9139    let mut p = sa[index];
9140    sa[index] = p & SAINT_MAX;
9141    if p > 0 {
9142        p -= 1;
9143        let c = t[p as usize] as usize;
9144        sa[index] = t[p as usize] as SaSint | SAINT_MIN;
9145        let mark = if t[(p - SaSint::from(p > 0)) as usize] < t[p as usize] {
9146            SAINT_MIN
9147        } else {
9148            0
9149        };
9150        let dst = induction_bucket[c] as usize;
9151        sa[dst] = p | mark;
9152        induction_bucket[c] += 1;
9153    }
9154}
9155
9156#[allow(dead_code)]
9157fn final_bwt_rtl_step(
9158    t: &[u16],
9159    sa: &mut [SaSint],
9160    induction_bucket: &mut [SaSint],
9161    index: usize,
9162    primary_index: &mut SaSint,
9163) {
9164    let mut p = sa[index];
9165    if p == 0 {
9166        *primary_index = index as SaSint;
9167    }
9168    sa[index] = p & SAINT_MAX;
9169    if p > 0 {
9170        p -= 1;
9171        let c0 = t[(p - SaSint::from(p > 0)) as usize];
9172        let c1 = t[p as usize];
9173        sa[index] = c1 as SaSint;
9174        let induced = if c0 <= c1 {
9175            p
9176        } else {
9177            c0 as SaSint | SAINT_MIN
9178        };
9179        induction_bucket[c1 as usize] -= 1;
9180        sa[induction_bucket[c1 as usize] as usize] = induced;
9181    }
9182}
9183
9184#[allow(dead_code)]
9185fn final_bwt_aux_ltr_step(
9186    t: &[u16],
9187    sa: &mut [SaSint],
9188    rm: SaSint,
9189    i_sample: &mut [SaSint],
9190    induction_bucket: &mut [SaSint],
9191    index: usize,
9192) {
9193    let mut p = sa[index];
9194    sa[index] = p & SAINT_MAX;
9195    if p > 0 {
9196        p -= 1;
9197        let c = t[p as usize] as usize;
9198        sa[index] = t[p as usize] as SaSint | SAINT_MIN;
9199        let mark = if t[(p - SaSint::from(p > 0)) as usize] < t[p as usize] {
9200            SAINT_MIN
9201        } else {
9202            0
9203        };
9204        let dst = induction_bucket[c] as usize;
9205        sa[dst] = p | mark;
9206        induction_bucket[c] += 1;
9207        if (p & rm) == 0 {
9208            i_sample[(p / (rm + 1)) as usize] = induction_bucket[c];
9209        }
9210    }
9211}
9212
9213#[allow(dead_code)]
9214fn final_bwt_aux_rtl_step(
9215    t: &[u16],
9216    sa: &mut [SaSint],
9217    rm: SaSint,
9218    i_sample: &mut [SaSint],
9219    induction_bucket: &mut [SaSint],
9220    index: usize,
9221) {
9222    let mut p = sa[index];
9223    sa[index] = p & SAINT_MAX;
9224    if p > 0 {
9225        p -= 1;
9226        let c0 = t[(p - SaSint::from(p > 0)) as usize];
9227        let c1 = t[p as usize];
9228        sa[index] = c1 as SaSint;
9229        let induced = if c0 <= c1 {
9230            p
9231        } else {
9232            c0 as SaSint | SAINT_MIN
9233        };
9234        induction_bucket[c1 as usize] -= 1;
9235        sa[induction_bucket[c1 as usize] as usize] = induced;
9236        if (p & rm) == 0 {
9237            i_sample[(p / (rm + 1)) as usize] = induction_bucket[c1 as usize] + 1;
9238        }
9239    }
9240}
9241
9242#[allow(dead_code)]
9243fn main_32s_recursion(
9244    t_ptr: *mut SaSint,
9245    sa_ptr: *mut SaSint,
9246    sa_capacity: usize,
9247    n: SaSint,
9248    k: SaSint,
9249    fs: SaSint,
9250    threads: SaSint,
9251    thread_state: &mut [ThreadState],
9252    local_buffer: &mut [SaSint],
9253) -> SaSint {
9254    let fs = fs.min(SAINT_MAX - n);
9255    let local_buffer_size = SaSint::try_from(LIBSAIS_LOCAL_BUFFER_SIZE).expect("fits");
9256    let n_usize = usize::try_from(n).expect("n must be non-negative");
9257    let fs_usize = usize::try_from(fs).expect("fs must be non-negative");
9258    let total_len = n_usize + fs_usize;
9259    assert!(total_len <= sa_capacity);
9260
9261    if k > 0 && ((fs / k) >= 6 || (local_buffer_size / k) >= 6) {
9262        let k_usize = usize::try_from(k).expect("k must be non-negative");
9263        let alignment = if fs >= 1024 && ((fs - 1024) / k) >= 6 {
9264            1024usize
9265        } else {
9266            16usize
9267        };
9268        let need = 6 * k_usize;
9269        let use_local_buffer = local_buffer_size > fs;
9270        let buckets_ptr = if use_local_buffer {
9271            local_buffer.as_mut_ptr()
9272        } else {
9273            unsafe {
9274                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9275                let start =
9276                    if fs_usize >= need + alignment && ((fs_usize - alignment) / k_usize) >= 6 {
9277                        let byte_ptr = sa[total_len - need - alignment..].as_mut_ptr() as usize;
9278                        let aligned = align_up(byte_ptr, alignment * mem::size_of::<SaSint>());
9279                        (aligned - sa_ptr as usize) / mem::size_of::<SaSint>()
9280                    } else {
9281                        total_len - need
9282                    };
9283                sa[start..].as_mut_ptr()
9284            }
9285        };
9286
9287        let m = unsafe {
9288            let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9289            let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9290            let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9291            count_and_gather_lms_suffixes_32s_4k_omp(
9292                t,
9293                sa,
9294                n,
9295                k,
9296                buckets,
9297                SaSint::from(use_local_buffer),
9298                threads,
9299                thread_state,
9300            )
9301        };
9302        if m > 1 {
9303            let m_usize = usize::try_from(m).expect("m must be non-negative");
9304            unsafe {
9305                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9306                sa[..n_usize - m_usize].fill(0);
9307            }
9308
9309            let first_lms_suffix = unsafe {
9310                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9311                sa[n_usize - m_usize]
9312            };
9313            let left_suffixes_count = unsafe {
9314                let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9315                initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(
9316                    std::slice::from_raw_parts(t_ptr, n_usize),
9317                    k,
9318                    buckets,
9319                    first_lms_suffix,
9320                )
9321            };
9322
9323            unsafe {
9324                let t = std::slice::from_raw_parts(t_ptr, n_usize);
9325                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9326                let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9327                let (_, induction_bucket) = buckets.split_at_mut(4 * k_usize);
9328                radix_sort_lms_suffixes_32s_6k_omp(t, sa, n, m, induction_bucket, threads);
9329                if (n / 8192) < k {
9330                    radix_sort_set_markers_32s_6k_omp(sa, k, induction_bucket, threads);
9331                }
9332                if threads > 1 && n >= 65_536 {
9333                    sa[n_usize - m_usize..n_usize].fill(0);
9334                }
9335                initialize_buckets_for_partial_sorting_32s_6k(
9336                    t,
9337                    k,
9338                    buckets,
9339                    first_lms_suffix,
9340                    left_suffixes_count,
9341                );
9342                induce_partial_order_32s_6k_omp(
9343                    t,
9344                    sa,
9345                    n,
9346                    k,
9347                    buckets,
9348                    first_lms_suffix,
9349                    left_suffixes_count,
9350                    threads,
9351                    thread_state,
9352                );
9353            }
9354
9355            let names = unsafe {
9356                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9357                if (n / 8192) < k {
9358                    renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(
9359                        sa,
9360                        n,
9361                        m,
9362                        threads,
9363                        thread_state,
9364                    )
9365                } else {
9366                    renumber_and_gather_lms_suffixes_omp(sa, n, m, fs, threads, thread_state)
9367                }
9368            };
9369
9370            if names < m {
9371                let f = if (n / 8192) < k {
9372                    unsafe {
9373                        let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9374                        let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9375                        compact_lms_suffixes_32s_omp(t, sa, n, m, fs, threads)
9376                    }
9377                } else {
9378                    0
9379                };
9380
9381                let new_t_start =
9382                    total_len - usize::try_from(m - f).expect("m - f must be non-negative");
9383                if main_32s_recursion(
9384                    unsafe {
9385                        std::slice::from_raw_parts_mut(sa_ptr, total_len)[new_t_start..]
9386                            .as_mut_ptr()
9387                    },
9388                    sa_ptr,
9389                    sa_capacity,
9390                    m - f,
9391                    names - f,
9392                    fs + n - 2 * m + f,
9393                    threads,
9394                    thread_state,
9395                    local_buffer,
9396                ) != 0
9397                {
9398                    return -2;
9399                }
9400
9401                unsafe {
9402                    let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9403                    let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9404                    let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9405                    reconstruct_compacted_lms_suffixes_32s_2k_omp(
9406                        t,
9407                        sa,
9408                        n,
9409                        k,
9410                        m,
9411                        fs,
9412                        f,
9413                        buckets,
9414                        SaSint::from(use_local_buffer),
9415                        threads,
9416                        thread_state,
9417                    );
9418                }
9419            } else {
9420                unsafe {
9421                    let t = std::slice::from_raw_parts(t_ptr, n_usize);
9422                    let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9423                    count_lms_suffixes_32s_2k(t, n, k, buckets);
9424                }
9425            }
9426
9427            unsafe {
9428                let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9429                initialize_buckets_start_and_end_32s_4k(k, buckets);
9430                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9431                place_lms_suffixes_histogram_32s_4k(sa, n, k, m, buckets);
9432                let t = std::slice::from_raw_parts(t_ptr, n_usize);
9433                induce_final_order_32s_4k(t, sa, n, k, buckets, threads, thread_state);
9434            }
9435        } else {
9436            unsafe {
9437                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9438                sa[0] = sa[n_usize - 1];
9439            }
9440
9441            unsafe {
9442                let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9443                initialize_buckets_start_and_end_32s_6k(k, buckets);
9444                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9445                place_lms_suffixes_histogram_32s_6k(sa, n, k, m, buckets);
9446                let t = std::slice::from_raw_parts(t_ptr, n_usize);
9447                induce_final_order_32s_6k(t, sa, n, k, buckets, threads, thread_state);
9448            }
9449        }
9450
9451        return 0;
9452    } else if k > 0 && n <= SAINT_MAX / 2 && ((fs / k) >= 4 || (local_buffer_size / k) >= 4) {
9453        let k_usize = usize::try_from(k).expect("k must be non-negative");
9454        let alignment = if fs >= 1024 && ((fs - 1024) / k) >= 4 {
9455            1024usize
9456        } else {
9457            16usize
9458        };
9459        let need = 4 * k_usize;
9460        let use_local_buffer = local_buffer_size > fs;
9461        let buckets_ptr = if use_local_buffer {
9462            local_buffer.as_mut_ptr()
9463        } else {
9464            unsafe {
9465                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9466                let start =
9467                    if fs_usize >= need + alignment && ((fs_usize - alignment) / k_usize) >= 4 {
9468                        let byte_ptr = sa[total_len - need - alignment..].as_mut_ptr() as usize;
9469                        let aligned = align_up(byte_ptr, alignment * mem::size_of::<SaSint>());
9470                        (aligned - sa_ptr as usize) / mem::size_of::<SaSint>()
9471                    } else {
9472                        total_len - need
9473                    };
9474                sa[start..].as_mut_ptr()
9475            }
9476        };
9477
9478        let m = unsafe {
9479            let t = std::slice::from_raw_parts(t_ptr, n_usize);
9480            let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9481            let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9482            count_and_gather_lms_suffixes_32s_2k_omp(
9483                t,
9484                sa,
9485                n,
9486                k,
9487                buckets,
9488                SaSint::from(use_local_buffer),
9489                threads,
9490                thread_state,
9491            )
9492        };
9493        if m > 1 {
9494            let m_usize = usize::try_from(m).expect("m must be non-negative");
9495            unsafe {
9496                let t = std::slice::from_raw_parts(t_ptr, n_usize);
9497                let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9498                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9499                initialize_buckets_for_radix_and_partial_sorting_32s_4k(
9500                    t,
9501                    k,
9502                    buckets,
9503                    sa[n_usize - m_usize],
9504                );
9505                let (_, induction_bucket) = buckets.split_at_mut(1);
9506                radix_sort_lms_suffixes_32s_2k_omp(t, sa, n, m, induction_bucket, threads);
9507                radix_sort_set_markers_32s_4k_omp(sa, k, induction_bucket, threads);
9508                place_lms_suffixes_interval_32s_4k(sa, n, k, m - 1, buckets);
9509                induce_partial_order_32s_4k_omp(t, sa, n, k, buckets, threads, thread_state);
9510            }
9511
9512            let names = unsafe {
9513                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9514                renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(sa, n, m, threads, thread_state)
9515            };
9516            if names < m {
9517                let f = unsafe {
9518                    let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9519                    let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9520                    compact_lms_suffixes_32s_omp(t, sa, n, m, fs, threads)
9521                };
9522
9523                let new_t_start =
9524                    total_len - usize::try_from(m - f).expect("m - f must be non-negative");
9525                if main_32s_recursion(
9526                    unsafe {
9527                        std::slice::from_raw_parts_mut(sa_ptr, total_len)[new_t_start..]
9528                            .as_mut_ptr()
9529                    },
9530                    sa_ptr,
9531                    sa_capacity,
9532                    m - f,
9533                    names - f,
9534                    fs + n - 2 * m + f,
9535                    threads,
9536                    thread_state,
9537                    local_buffer,
9538                ) != 0
9539                {
9540                    return -2;
9541                }
9542
9543                unsafe {
9544                    let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9545                    let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9546                    let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9547                    reconstruct_compacted_lms_suffixes_32s_2k_omp(
9548                        t,
9549                        sa,
9550                        n,
9551                        k,
9552                        m,
9553                        fs,
9554                        f,
9555                        buckets,
9556                        SaSint::from(use_local_buffer),
9557                        threads,
9558                        thread_state,
9559                    );
9560                }
9561            } else {
9562                unsafe {
9563                    let t = std::slice::from_raw_parts(t_ptr, n_usize);
9564                    let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9565                    count_lms_suffixes_32s_2k(t, n, k, buckets);
9566                }
9567            }
9568        } else {
9569            unsafe {
9570                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9571                sa[0] = sa[n_usize - 1];
9572            }
9573        }
9574
9575        unsafe {
9576            let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9577            initialize_buckets_start_and_end_32s_4k(k, buckets);
9578            let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9579            place_lms_suffixes_histogram_32s_4k(sa, n, k, m, buckets);
9580            let t = std::slice::from_raw_parts(t_ptr, n_usize);
9581            induce_final_order_32s_4k(t, sa, n, k, buckets, threads, thread_state);
9582        }
9583
9584        return 0;
9585    } else if k > 0 && ((fs / k) >= 2 || (local_buffer_size / k) >= 2) {
9586        let k_usize = usize::try_from(k).expect("k must be non-negative");
9587        let alignment = if fs >= 1024 && ((fs - 1024) / k) >= 2 {
9588            1024usize
9589        } else {
9590            16usize
9591        };
9592        let need = 2 * k_usize;
9593        let use_local_buffer = local_buffer_size > fs;
9594        let buckets_ptr = if use_local_buffer {
9595            local_buffer.as_mut_ptr()
9596        } else {
9597            unsafe {
9598                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9599                let start =
9600                    if fs_usize >= need + alignment && ((fs_usize - alignment) / k_usize) >= 2 {
9601                        let byte_ptr = sa[total_len - need - alignment..].as_mut_ptr() as usize;
9602                        let aligned = align_up(byte_ptr, alignment * mem::size_of::<SaSint>());
9603                        (aligned - sa_ptr as usize) / mem::size_of::<SaSint>()
9604                    } else {
9605                        total_len - need
9606                    };
9607                sa[start..].as_mut_ptr()
9608            }
9609        };
9610
9611        let m = unsafe {
9612            let t = std::slice::from_raw_parts(t_ptr, n_usize);
9613            let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9614            let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9615            count_and_gather_lms_suffixes_32s_2k_omp(
9616                t,
9617                sa,
9618                n,
9619                k,
9620                buckets,
9621                SaSint::from(use_local_buffer),
9622                threads,
9623                thread_state,
9624            )
9625        };
9626        if m > 1 {
9627            let m_usize = usize::try_from(m).expect("m must be non-negative");
9628            unsafe {
9629                let t = std::slice::from_raw_parts(t_ptr, n_usize);
9630                let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9631                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9632                initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(
9633                    t,
9634                    k,
9635                    buckets,
9636                    sa[n_usize - m_usize],
9637                );
9638                let (_, induction_bucket) = buckets.split_at_mut(1);
9639                radix_sort_lms_suffixes_32s_2k_omp(t, sa, n, m, induction_bucket, threads);
9640                place_lms_suffixes_interval_32s_2k(sa, n, k, m - 1, buckets);
9641            }
9642
9643            unsafe {
9644                let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9645                initialize_buckets_start_and_end_32s_2k(k, buckets);
9646                let t = std::slice::from_raw_parts(t_ptr, n_usize);
9647                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9648                induce_partial_order_32s_2k_omp(t, sa, n, k, buckets, threads, thread_state);
9649            }
9650
9651            let names = unsafe {
9652                let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9653                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9654                renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(t, sa, n, m, threads)
9655            };
9656            if names < m {
9657                let f = unsafe {
9658                    let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9659                    let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9660                    compact_lms_suffixes_32s_omp(t, sa, n, m, fs, threads)
9661                };
9662
9663                let new_t_start =
9664                    total_len - usize::try_from(m - f).expect("m - f must be non-negative");
9665                if main_32s_recursion(
9666                    unsafe {
9667                        std::slice::from_raw_parts_mut(sa_ptr, total_len)[new_t_start..]
9668                            .as_mut_ptr()
9669                    },
9670                    sa_ptr,
9671                    sa_capacity,
9672                    m - f,
9673                    names - f,
9674                    fs + n - 2 * m + f,
9675                    threads,
9676                    thread_state,
9677                    local_buffer,
9678                ) != 0
9679                {
9680                    return -2;
9681                }
9682
9683                unsafe {
9684                    let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9685                    let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9686                    let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9687                    reconstruct_compacted_lms_suffixes_32s_2k_omp(
9688                        t,
9689                        sa,
9690                        n,
9691                        k,
9692                        m,
9693                        fs,
9694                        f,
9695                        buckets,
9696                        SaSint::from(use_local_buffer),
9697                        threads,
9698                        thread_state,
9699                    );
9700                }
9701            } else {
9702                unsafe {
9703                    let t = std::slice::from_raw_parts(t_ptr, n_usize);
9704                    let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9705                    count_lms_suffixes_32s_2k(t, n, k, buckets);
9706                }
9707            }
9708        } else {
9709            unsafe {
9710                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9711                sa[0] = sa[n_usize - 1];
9712            }
9713        }
9714
9715        unsafe {
9716            let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9717            initialize_buckets_end_32s_2k(k, buckets);
9718            let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9719            place_lms_suffixes_histogram_32s_2k(sa, n, k, m, buckets);
9720        }
9721
9722        unsafe {
9723            let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9724            initialize_buckets_start_and_end_32s_2k(k, buckets);
9725            let t = std::slice::from_raw_parts(t_ptr, n_usize);
9726            let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9727            induce_final_order_32s_2k(t, sa, n, k, buckets, threads, thread_state);
9728        }
9729
9730        0
9731    } else {
9732        let k_usize = usize::try_from(k).expect("k must be non-negative");
9733        let mut heap_buckets = if fs < k { Some(vec![0; k_usize]) } else { None };
9734        let alignment = if fs >= 1024 && (fs - 1024) >= k {
9735            1024usize
9736        } else {
9737            16usize
9738        };
9739        let mut buckets_ptr = if let Some(ref mut heap) = heap_buckets {
9740            heap.as_mut_ptr()
9741        } else {
9742            unsafe {
9743                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9744                let start = if fs_usize >= k_usize + alignment {
9745                    let byte_ptr = sa[total_len - k_usize - alignment..].as_mut_ptr() as usize;
9746                    let aligned = align_up(byte_ptr, alignment * mem::size_of::<SaSint>());
9747                    (aligned - sa_ptr as usize) / mem::size_of::<SaSint>()
9748                } else {
9749                    total_len - k_usize
9750                };
9751                sa[start..].as_mut_ptr()
9752            }
9753        };
9754
9755        if buckets_ptr.is_null() {
9756            return -2;
9757        }
9758
9759        unsafe {
9760            let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9761            sa[..n_usize].fill(0);
9762        }
9763
9764        unsafe {
9765            let t = std::slice::from_raw_parts(t_ptr, n_usize);
9766            let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
9767            count_suffixes_32s(t, n, k, buckets);
9768            initialize_buckets_end_32s_1k(k, buckets);
9769        }
9770
9771        let m = unsafe {
9772            let t = std::slice::from_raw_parts(t_ptr, n_usize);
9773            let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9774            let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
9775            radix_sort_lms_suffixes_32s_1k(t, sa, n, buckets)
9776        };
9777        if m > 1 {
9778            unsafe {
9779                let t = std::slice::from_raw_parts(t_ptr, n_usize);
9780                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9781                let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
9782                induce_partial_order_32s_1k_omp(t, sa, n, k, buckets, threads, thread_state);
9783            }
9784
9785            let names = unsafe {
9786                let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9787                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9788                renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(t, sa, n, m, threads)
9789            };
9790            if names < m {
9791                if heap_buckets.is_some() {
9792                    let _ = heap_buckets.take();
9793                    buckets_ptr = std::ptr::null_mut();
9794                }
9795
9796                let f = unsafe {
9797                    let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9798                    let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9799                    compact_lms_suffixes_32s_omp(t, sa, n, m, fs, threads)
9800                };
9801
9802                let new_t_start =
9803                    total_len - usize::try_from(m - f).expect("m - f must be non-negative");
9804                if main_32s_recursion(
9805                    unsafe {
9806                        std::slice::from_raw_parts_mut(sa_ptr, total_len)[new_t_start..]
9807                            .as_mut_ptr()
9808                    },
9809                    sa_ptr,
9810                    sa_capacity,
9811                    m - f,
9812                    names - f,
9813                    fs + n - 2 * m + f,
9814                    threads,
9815                    thread_state,
9816                    local_buffer,
9817                ) != 0
9818                {
9819                    return -2;
9820                }
9821
9822                unsafe {
9823                    let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9824                    let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9825                    reconstruct_compacted_lms_suffixes_32s_1k_omp(t, sa, n, m, fs, f, threads);
9826                }
9827
9828                if buckets_ptr.is_null() {
9829                    heap_buckets = Some(vec![0; k_usize]);
9830                    buckets_ptr = heap_buckets.as_mut().unwrap().as_mut_ptr();
9831                    if buckets_ptr.is_null() {
9832                        return -2;
9833                    }
9834                }
9835            }
9836
9837            unsafe {
9838                let t = std::slice::from_raw_parts(t_ptr, n_usize);
9839                let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
9840                count_suffixes_32s(t, n, k, buckets);
9841                initialize_buckets_end_32s_1k(k, buckets);
9842            }
9843            unsafe {
9844                let t = std::slice::from_raw_parts(t_ptr, n_usize);
9845                let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9846                let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
9847                place_lms_suffixes_interval_32s_1k(t, sa, k, m, buckets);
9848            }
9849        }
9850
9851        unsafe {
9852            let t = std::slice::from_raw_parts(t_ptr, n_usize);
9853            let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9854            let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
9855            induce_final_order_32s_1k(t, sa, n, k, buckets, threads, thread_state);
9856        }
9857
9858        0
9859    }
9860}
9861
9862#[allow(dead_code)]
9863fn main_32s_entry(
9864    t_ptr: *mut SaSint,
9865    sa: &mut [SaSint],
9866    n: SaSint,
9867    k: SaSint,
9868    fs: SaSint,
9869    threads: SaSint,
9870    thread_state: &mut [ThreadState],
9871) -> SaSint {
9872    let mut local_buffer = [0; 2 * LIBSAIS_LOCAL_BUFFER_SIZE];
9873    main_32s_recursion(
9874        t_ptr,
9875        sa.as_mut_ptr(),
9876        sa.len(),
9877        n,
9878        k,
9879        fs,
9880        threads,
9881        thread_state,
9882        &mut local_buffer[LIBSAIS_LOCAL_BUFFER_SIZE..],
9883    )
9884}
9885
9886#[allow(dead_code)]
9887fn main_16u(
9888    t: &[u16],
9889    sa: &mut [SaSint],
9890    n: SaSint,
9891    buckets: &mut [SaSint],
9892    flags: SaSint,
9893    r: SaSint,
9894    i_out: Option<&mut [SaSint]>,
9895    fs: SaSint,
9896    freq: Option<&mut [SaSint]>,
9897    threads: SaSint,
9898    thread_state: &mut [ThreadState],
9899) -> SaSint {
9900    let fs = fs.min(SAINT_MAX - n);
9901
9902    let m = count_and_gather_lms_suffixes_16u_omp(t, sa, n, buckets, threads, thread_state);
9903    let k = initialize_buckets_start_and_end_16u(buckets, freq);
9904
9905    if (flags & LIBSAIS_FLAGS_GSA) != 0 && (buckets[0] != 0 || buckets[2] != 0 || buckets[3] != 1) {
9906        return -1;
9907    }
9908
9909    if m > 0 {
9910        let first_lms_suffix = sa[(n - m) as usize];
9911        let left_suffixes_count =
9912            initialize_buckets_for_lms_suffixes_radix_sort_16u(t, buckets, first_lms_suffix);
9913
9914        if threads > 1 && n >= 65_536 {
9915            sa[..(n - m) as usize].fill(0);
9916        }
9917        radix_sort_lms_suffixes_16u_omp(t, sa, n, m, flags, buckets, threads, thread_state);
9918        if threads > 1 && n >= 65_536 {
9919            sa[(n - m) as usize..n as usize].fill(0);
9920        }
9921
9922        initialize_buckets_for_partial_sorting_16u(
9923            t,
9924            buckets,
9925            first_lms_suffix,
9926            left_suffixes_count,
9927        );
9928        induce_partial_order_16u_omp(
9929            t,
9930            sa,
9931            n,
9932            k,
9933            flags,
9934            buckets,
9935            first_lms_suffix,
9936            left_suffixes_count,
9937            threads,
9938        );
9939
9940        let names = renumber_and_gather_lms_suffixes_omp(sa, n, m, fs, threads, thread_state);
9941        if names < m {
9942            let recursive_t_start = (n + fs - m) as usize;
9943            let recursive_t_ptr = sa[recursive_t_start..].as_mut_ptr();
9944            if main_32s_entry(
9945                recursive_t_ptr,
9946                sa,
9947                m,
9948                names,
9949                fs + n - 2 * m,
9950                threads,
9951                thread_state,
9952            ) != 0
9953            {
9954                return -2;
9955            }
9956
9957            gather_lms_suffixes_16u_omp(t, sa, n, threads, thread_state);
9958            reconstruct_lms_suffixes_omp(sa, n, m, threads);
9959        }
9960
9961        place_lms_suffixes_interval_16u(sa, n, m, flags, buckets);
9962    } else {
9963        sa[..n as usize].fill(0);
9964    }
9965
9966    induce_final_order_16u_omp(t, sa, n, k, flags, r, i_out, buckets, threads, thread_state)
9967}
9968
9969#[allow(dead_code)]
9970fn main_16u_alloc(
9971    t: &[u16],
9972    sa: &mut [SaSint],
9973    flags: SaSint,
9974    r: SaSint,
9975    i_out: Option<&mut [SaSint]>,
9976    fs: SaSint,
9977    freq: Option<&mut [SaSint]>,
9978    threads: SaSint,
9979) -> SaSint {
9980    if fs < 0
9981        || threads < 0
9982        || sa.len()
9983            < t.len()
9984                .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
9985        || freq.as_ref().is_some_and(|freq| freq.len() < ALPHABET_SIZE)
9986    {
9987        return -1;
9988    }
9989
9990    fill_freq(t, freq);
9991    if t.len() <= 1 {
9992        if t.len() == 1 {
9993            sa[0] = 0;
9994        }
9995        return if (flags & LIBSAIS_FLAGS_BWT) != 0 {
9996            t.len() as SaSint
9997        } else {
9998            0
9999        };
10000    }
10001
10002    let mut buckets = vec![0; 8 * ALPHABET_SIZE];
10003    let threads = normalize_threads(threads);
10004    let mut thread_state = if threads > 1 {
10005        match alloc_thread_state(threads) {
10006            Some(thread_state) => thread_state,
10007            None => return -2,
10008        }
10009    } else {
10010        Vec::new()
10011    };
10012
10013    main_16u(
10014        t,
10015        sa,
10016        t.len() as SaSint,
10017        &mut buckets,
10018        flags,
10019        r,
10020        i_out,
10021        fs,
10022        None,
10023        threads,
10024        &mut thread_state,
10025    )
10026}
10027
10028fn main_16u_ctx(
10029    ctx: &mut Context,
10030    t: &[u16],
10031    sa: &mut [SaSint],
10032    flags: SaSint,
10033    r: SaSint,
10034    i_out: Option<&mut [SaSint]>,
10035    fs: SaSint,
10036    freq: Option<&mut [SaSint]>,
10037) -> SaSint {
10038    if fs < 0
10039        || sa.len()
10040            < t.len()
10041                .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
10042        || freq.as_ref().is_some_and(|freq| freq.len() < ALPHABET_SIZE)
10043    {
10044        return -1;
10045    }
10046
10047    if ctx.threads <= 0 || ctx.buckets.len() < 8 * ALPHABET_SIZE {
10048        return -2;
10049    }
10050
10051    fill_freq(t, freq);
10052    if t.len() <= 1 {
10053        if t.len() == 1 {
10054            sa[0] = 0;
10055        }
10056        return if (flags & LIBSAIS_FLAGS_BWT) != 0 {
10057            t.len() as SaSint
10058        } else {
10059            0
10060        };
10061    }
10062
10063    let mut empty_thread_state = [];
10064    let thread_state = if ctx.threads > 1 {
10065        match ctx.thread_state.as_deref_mut() {
10066            Some(thread_state) if thread_state.len() >= ctx.threads as usize => thread_state,
10067            None => return -2,
10068            Some(_) => return -2,
10069        }
10070    } else {
10071        &mut empty_thread_state
10072    };
10073
10074    main_16u(
10075        t,
10076        sa,
10077        t.len() as SaSint,
10078        &mut ctx.buckets,
10079        flags,
10080        r,
10081        i_out,
10082        fs,
10083        None,
10084        ctx.threads,
10085        thread_state,
10086    )
10087}
10088
10089fn main_int(t: &mut [SaSint], sa: &mut [SaSint], k: SaSint, fs: SaSint, threads: SaSint) -> SaSint {
10090    let threads = normalize_threads(threads);
10091    let mut thread_state = if threads > 1 {
10092        match alloc_thread_state(threads) {
10093            Some(thread_state) => thread_state,
10094            None => return -2,
10095        }
10096    } else {
10097        Vec::new()
10098    };
10099
10100    main_32s_entry(
10101        t.as_mut_ptr(),
10102        sa,
10103        t.len() as SaSint,
10104        k,
10105        fs,
10106        threads,
10107        &mut thread_state,
10108    )
10109}
10110
10111/// Constructs the suffix array of a given 16-bit string.
10112///
10113/// - `t` (`[0..n-1]`): the input 16-bit string.
10114/// - `sa` (`[0..n-1+fs]`): the output array of suffixes.
10115/// - `fs`: extra space available at the end of `sa` (0 should be enough for most cases).
10116/// - `freq` (`[0..65535]`): optional output symbol frequency table.
10117///
10118/// Returns 0 on success, -1 or -2 on error.
10119pub fn libsais16(t: &[u16], sa: &mut [SaSint], fs: SaSint, freq: Option<&mut [SaSint]>) -> SaSint {
10120    main_16u_alloc(t, sa, 0, 0, None, fs, freq, 1)
10121}
10122
10123/// Constructs the generalized suffix array (GSA) of a given 16-bit string set.
10124///
10125/// - `t` (`[0..n-1]`): the input 16-bit string set using 0 as separators (`t[n-1]` must be 0).
10126/// - `sa` (`[0..n-1+fs]`): the output array of suffixes.
10127/// - `fs`: extra space available at the end of `sa` (0 should be enough for most cases).
10128/// - `freq` (`[0..65535]`): optional output symbol frequency table.
10129///
10130/// Returns 0 on success, -1 or -2 on error.
10131pub fn libsais16_gsa(
10132    t: &[u16],
10133    sa: &mut [SaSint],
10134    fs: SaSint,
10135    freq: Option<&mut [SaSint]>,
10136) -> SaSint {
10137    main_16u_alloc(t, sa, LIBSAIS_FLAGS_GSA, 0, None, fs, freq, 1)
10138}
10139
10140/// Constructs the suffix array of a given integer array.
10141///
10142/// During construction the input array is modified, but restored at the end if no error occurred.
10143///
10144/// - `t` (`[0..n-1]`): the input integer array.
10145/// - `sa` (`[0..n-1+fs]`): the output array of suffixes.
10146/// - `k`: the alphabet size of the input integer array.
10147/// - `fs`: extra space available at the end of `sa` (can be 0, but 4k or better 6k is recommended for optimal performance).
10148///
10149/// Returns 0 on success, -1 or -2 on error.
10150pub fn libsais16_int(t: &mut [SaSint], sa: &mut [SaSint], k: SaSint, fs: SaSint) -> SaSint {
10151    if fs < 0
10152        || sa.len()
10153            < t.len()
10154                .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
10155    {
10156        return -1;
10157    }
10158
10159    if t.len() <= 1 {
10160        if t.len() == 1 {
10161            sa[0] = 0;
10162        }
10163        return 0;
10164    }
10165
10166    main_int(t, sa, k, fs, 1)
10167}
10168
10169/// Constructs the suffix array of a given 16-bit string using a libsais16 context.
10170///
10171/// - `ctx`: the libsais16 context.
10172/// - `t` (`[0..n-1]`): the input 16-bit string.
10173/// - `sa` (`[0..n-1+fs]`): the output array of suffixes.
10174/// - `fs`: extra space available at the end of `sa` (0 should be enough for most cases).
10175/// - `freq` (`[0..65535]`): optional output symbol frequency table.
10176///
10177/// Returns 0 on success, -1 or -2 on error.
10178pub fn libsais16_ctx(
10179    ctx: &mut Context,
10180    t: &[u16],
10181    sa: &mut [SaSint],
10182    fs: SaSint,
10183    freq: Option<&mut [SaSint]>,
10184) -> SaSint {
10185    main_16u_ctx(ctx, t, sa, 0, 0, None, fs, freq)
10186}
10187
10188/// Constructs the generalized suffix array (GSA) of a given 16-bit string set using a libsais16 context.
10189///
10190/// - `ctx`: the libsais16 context.
10191/// - `t` (`[0..n-1]`): the input 16-bit string set using 0 as separators (`t[n-1]` must be 0).
10192/// - `sa` (`[0..n-1+fs]`): the output array of suffixes.
10193/// - `fs`: extra space available at the end of `sa` (0 should be enough for most cases).
10194/// - `freq` (`[0..65535]`): optional output symbol frequency table.
10195///
10196/// Returns 0 on success, -1 or -2 on error.
10197pub fn libsais16_gsa_ctx(
10198    ctx: &mut Context,
10199    t: &[u16],
10200    sa: &mut [SaSint],
10201    fs: SaSint,
10202    freq: Option<&mut [SaSint]>,
10203) -> SaSint {
10204    main_16u_ctx(ctx, t, sa, LIBSAIS_FLAGS_GSA, 0, None, fs, freq)
10205}
10206
10207/// Constructs the suffix array of a given 16-bit string in parallel using OpenMP-style threading.
10208///
10209/// - `t` (`[0..n-1]`): the input 16-bit string.
10210/// - `sa` (`[0..n-1+fs]`): the output array of suffixes.
10211/// - `fs`: extra space available at the end of `sa` (0 should be enough for most cases).
10212/// - `freq` (`[0..65535]`): optional output symbol frequency table.
10213/// - `threads`: number of worker threads (can be 0 for the implementation default).
10214///
10215/// Returns 0 on success, -1 or -2 on error.
10216pub fn libsais16_omp(
10217    t: &[u16],
10218    sa: &mut [SaSint],
10219    fs: SaSint,
10220    freq: Option<&mut [SaSint]>,
10221    threads: SaSint,
10222) -> SaSint {
10223    if threads < 0 {
10224        -1
10225    } else {
10226        main_16u_alloc(t, sa, 0, 0, None, fs, freq, threads)
10227    }
10228}
10229
10230/// Constructs the generalized suffix array (GSA) of a given 16-bit string set in parallel using OpenMP-style threading.
10231///
10232/// - `t` (`[0..n-1]`): the input 16-bit string set using 0 as separators (`t[n-1]` must be 0).
10233/// - `sa` (`[0..n-1+fs]`): the output array of suffixes.
10234/// - `fs`: extra space available at the end of `sa` (0 should be enough for most cases).
10235/// - `freq` (`[0..65535]`): optional output symbol frequency table.
10236/// - `threads`: number of worker threads (can be 0 for the implementation default).
10237///
10238/// Returns 0 on success, -1 or -2 on error.
10239pub fn libsais16_gsa_omp(
10240    t: &[u16],
10241    sa: &mut [SaSint],
10242    fs: SaSint,
10243    freq: Option<&mut [SaSint]>,
10244    threads: SaSint,
10245) -> SaSint {
10246    if threads < 0 {
10247        -1
10248    } else {
10249        main_16u_alloc(t, sa, LIBSAIS_FLAGS_GSA, 0, None, fs, freq, threads)
10250    }
10251}
10252
10253/// Constructs the suffix array of a given integer array in parallel using OpenMP-style threading.
10254///
10255/// During construction the input array is modified, but restored at the end if no error occurred.
10256///
10257/// - `t` (`[0..n-1]`): the input integer array.
10258/// - `sa` (`[0..n-1+fs]`): the output array of suffixes.
10259/// - `k`: the alphabet size of the input integer array.
10260/// - `fs`: extra space available at the end of `sa` (can be 0, but 4k or better 6k is recommended for optimal performance).
10261/// - `threads`: number of worker threads (can be 0 for the implementation default).
10262///
10263/// Returns 0 on success, -1 or -2 on error.
10264pub fn libsais16_int_omp(
10265    t: &mut [SaSint],
10266    sa: &mut [SaSint],
10267    k: SaSint,
10268    fs: SaSint,
10269    threads: SaSint,
10270) -> SaSint {
10271    if threads < 0
10272        || fs < 0
10273        || sa.len()
10274            < t.len()
10275                .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
10276    {
10277        return -1;
10278    }
10279
10280    if t.len() <= 1 {
10281        if t.len() == 1 {
10282            sa[0] = 0;
10283        }
10284        return 0;
10285    }
10286
10287    main_int(t, sa, k, fs, threads)
10288}
10289
10290fn build_bwt(
10291    t: &[u16],
10292    u: &mut [u16],
10293    a: &mut [SaSint],
10294    fs: SaSint,
10295    freq: Option<&mut [SaSint]>,
10296    threads: SaSint,
10297) -> SaSint {
10298    if fs < 0
10299        || threads < 0
10300        || u.len() < t.len()
10301        || a.len()
10302            < t.len()
10303                .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
10304        || freq.as_ref().is_some_and(|freq| freq.len() < ALPHABET_SIZE)
10305    {
10306        return -1;
10307    }
10308    if t.len() <= 1 {
10309        fill_freq(t, freq);
10310        if t.len() == 1 {
10311            u[0] = t[0];
10312        }
10313        return t.len() as SaSint;
10314    }
10315
10316    let n = t.len();
10317    let mut index = main_16u_alloc(t, a, LIBSAIS_FLAGS_BWT, 0, None, fs, freq, threads);
10318    if index >= 0 {
10319        index += 1;
10320        u[0] = t[n - 1];
10321        bwt_copy_16u(&mut u[1..], a, index - 1);
10322        bwt_copy_16u(
10323            &mut u[index as usize..],
10324            &a[index as usize..],
10325            n as SaSint - index,
10326        );
10327    }
10328    index
10329}
10330
10331/// Constructs the Burrows-Wheeler transformed 16-bit string (BWT) of a given 16-bit string.
10332///
10333/// - `t` (`[0..n-1]`): the input 16-bit string.
10334/// - `u` (`[0..n-1]`): the output 16-bit string (can alias `t`).
10335/// - `a` (`[0..n-1+fs]`): the temporary array.
10336/// - `fs`: extra space available at the end of `a` (0 should be enough for most cases).
10337/// - `freq` (`[0..65535]`): optional output symbol frequency table.
10338///
10339/// Returns the primary index on success, -1 or -2 on error.
10340pub fn libsais16_bwt(
10341    t: &[u16],
10342    u: &mut [u16],
10343    a: &mut [SaSint],
10344    fs: SaSint,
10345    freq: Option<&mut [SaSint]>,
10346) -> SaSint {
10347    build_bwt(t, u, a, fs, freq, 1)
10348}
10349
10350fn build_bwt_aux(
10351    t: &[u16],
10352    u: &mut [u16],
10353    a: &mut [SaSint],
10354    fs: SaSint,
10355    freq: Option<&mut [SaSint]>,
10356    r: SaSint,
10357    i: &mut [SaSint],
10358    threads: SaSint,
10359) -> SaSint {
10360    if threads < 0 || r < 2 || (r & (r - 1)) != 0 {
10361        return -1;
10362    }
10363    let samples = if t.is_empty() {
10364        1
10365    } else {
10366        (t.len() - 1) / r as usize + 1
10367    };
10368    if i.len() < samples {
10369        return -1;
10370    }
10371    let n = t.len();
10372    if n <= 1 {
10373        fill_freq(t, freq);
10374        if n == 1 {
10375            u[0] = t[0];
10376        }
10377        i[0] = n as SaSint;
10378        return 0;
10379    }
10380
10381    let index = main_16u_alloc(t, a, LIBSAIS_FLAGS_BWT, r, Some(i), fs, freq, threads);
10382    if index == 0 {
10383        u[0] = t[n - 1];
10384        bwt_copy_16u(&mut u[1..], a, i[0] - 1);
10385        bwt_copy_16u(
10386            &mut u[i[0] as usize..],
10387            &a[i[0] as usize..],
10388            n as SaSint - i[0],
10389        );
10390    }
10391    index
10392}
10393
10394/// Constructs the Burrows-Wheeler transformed 16-bit string (BWT) of a given 16-bit string with auxiliary indexes.
10395///
10396/// - `t` (`[0..n-1]`): the input 16-bit string.
10397/// - `u` (`[0..n-1]`): the output 16-bit string (can alias `t`).
10398/// - `a` (`[0..n-1+fs]`): the temporary array.
10399/// - `fs`: extra space available at the end of `a` (0 should be enough for most cases).
10400/// - `freq` (`[0..65535]`): optional output symbol frequency table.
10401/// - `r`: sampling rate for the auxiliary indexes (must be a power of two).
10402/// - `i` (`[0..(n-1)/r]`): output auxiliary indexes.
10403///
10404/// Returns 0 on success, -1 or -2 on error.
10405pub fn libsais16_bwt_aux(
10406    t: &[u16],
10407    u: &mut [u16],
10408    a: &mut [SaSint],
10409    fs: SaSint,
10410    freq: Option<&mut [SaSint]>,
10411    r: SaSint,
10412    i: &mut [SaSint],
10413) -> SaSint {
10414    build_bwt_aux(t, u, a, fs, freq, r, i, 1)
10415}
10416
10417/// Constructs the Burrows-Wheeler transformed 16-bit string (BWT) of a given 16-bit string using a libsais16 context.
10418///
10419/// - `ctx`: the libsais16 context.
10420/// - `t` (`[0..n-1]`): the input 16-bit string.
10421/// - `u` (`[0..n-1]`): the output 16-bit string (can alias `t`).
10422/// - `a` (`[0..n-1+fs]`): the temporary array.
10423/// - `fs`: extra space available at the end of `a` (0 should be enough for most cases).
10424/// - `freq` (`[0..65535]`): optional output symbol frequency table.
10425///
10426/// Returns the primary index on success, -1 or -2 on error.
10427pub fn libsais16_bwt_ctx(
10428    ctx: &mut Context,
10429    t: &[u16],
10430    u: &mut [u16],
10431    a: &mut [SaSint],
10432    fs: SaSint,
10433    freq: Option<&mut [SaSint]>,
10434) -> SaSint {
10435    if fs < 0
10436        || u.len() < t.len()
10437        || a.len()
10438            < t.len()
10439                .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
10440        || freq.as_ref().is_some_and(|freq| freq.len() < ALPHABET_SIZE)
10441    {
10442        return -1;
10443    }
10444    if t.len() <= 1 {
10445        fill_freq(t, freq);
10446        if t.len() == 1 {
10447            u[0] = t[0];
10448        }
10449        return t.len() as SaSint;
10450    }
10451
10452    let n = t.len();
10453    let mut index = main_16u_ctx(ctx, t, a, LIBSAIS_FLAGS_BWT, 0, None, fs, freq);
10454    if index >= 0 {
10455        index += 1;
10456        u[0] = t[n - 1];
10457        bwt_copy_16u(&mut u[1..], a, index - 1);
10458        bwt_copy_16u(
10459            &mut u[index as usize..],
10460            &a[index as usize..],
10461            n as SaSint - index,
10462        );
10463    }
10464    index
10465}
10466
10467/// Constructs the BWT of a given 16-bit string with auxiliary indexes using a libsais16 context.
10468///
10469/// - `ctx`: the libsais16 context.
10470/// - `t` (`[0..n-1]`): the input 16-bit string.
10471/// - `u` (`[0..n-1]`): the output 16-bit string (can alias `t`).
10472/// - `a` (`[0..n-1+fs]`): the temporary array.
10473/// - `fs`: extra space available at the end of `a` (0 should be enough for most cases).
10474/// - `freq` (`[0..65535]`): optional output symbol frequency table.
10475/// - `r`: sampling rate for the auxiliary indexes (must be a power of two).
10476/// - `i` (`[0..(n-1)/r]`): output auxiliary indexes.
10477///
10478/// Returns 0 on success, -1 or -2 on error.
10479pub fn libsais16_bwt_aux_ctx(
10480    ctx: &mut Context,
10481    t: &[u16],
10482    u: &mut [u16],
10483    a: &mut [SaSint],
10484    fs: SaSint,
10485    freq: Option<&mut [SaSint]>,
10486    r: SaSint,
10487    i: &mut [SaSint],
10488) -> SaSint {
10489    if fs < 0 || r < 2 || (r & (r - 1)) != 0 {
10490        return -1;
10491    }
10492    let samples = if t.is_empty() {
10493        1
10494    } else {
10495        (t.len() - 1) / r as usize + 1
10496    };
10497    if u.len() < t.len()
10498        || a.len()
10499            < t.len()
10500                .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
10501        || i.len() < samples
10502        || freq.as_ref().is_some_and(|freq| freq.len() < ALPHABET_SIZE)
10503    {
10504        return -1;
10505    }
10506    if t.len() <= 1 {
10507        fill_freq(t, freq);
10508        if t.len() == 1 {
10509            u[0] = t[0];
10510        }
10511        i[0] = t.len() as SaSint;
10512        return 0;
10513    }
10514
10515    let n = t.len();
10516    let index = main_16u_ctx(ctx, t, a, LIBSAIS_FLAGS_BWT, r, Some(i), fs, freq);
10517    if index == 0 {
10518        u[0] = t[n - 1];
10519        bwt_copy_16u(&mut u[1..], a, i[0] - 1);
10520        bwt_copy_16u(
10521            &mut u[i[0] as usize..],
10522            &a[i[0] as usize..],
10523            n as SaSint - i[0],
10524        );
10525    }
10526    index
10527}
10528
10529/// Constructs the Burrows-Wheeler transformed 16-bit string (BWT) of a given 16-bit string in parallel using OpenMP-style threading.
10530///
10531/// - `t` (`[0..n-1]`): the input 16-bit string.
10532/// - `u` (`[0..n-1]`): the output 16-bit string (can alias `t`).
10533/// - `a` (`[0..n-1+fs]`): the temporary array.
10534/// - `fs`: extra space available at the end of `a` (0 should be enough for most cases).
10535/// - `freq` (`[0..65535]`): optional output symbol frequency table.
10536/// - `threads`: number of worker threads (can be 0 for the implementation default).
10537///
10538/// Returns the primary index on success, -1 or -2 on error.
10539pub fn libsais16_bwt_omp(
10540    t: &[u16],
10541    u: &mut [u16],
10542    a: &mut [SaSint],
10543    fs: SaSint,
10544    freq: Option<&mut [SaSint]>,
10545    threads: SaSint,
10546) -> SaSint {
10547    if threads < 0 {
10548        -1
10549    } else {
10550        build_bwt(t, u, a, fs, freq, threads)
10551    }
10552}
10553
10554/// Constructs the BWT of a given 16-bit string with auxiliary indexes in parallel using OpenMP-style threading.
10555///
10556/// - `t` (`[0..n-1]`): the input 16-bit string.
10557/// - `u` (`[0..n-1]`): the output 16-bit string (can alias `t`).
10558/// - `a` (`[0..n-1+fs]`): the temporary array.
10559/// - `fs`: extra space available at the end of `a` (0 should be enough for most cases).
10560/// - `freq` (`[0..65535]`): optional output symbol frequency table.
10561/// - `r`: sampling rate for the auxiliary indexes (must be a power of two).
10562/// - `i` (`[0..(n-1)/r]`): output auxiliary indexes.
10563/// - `threads`: number of worker threads (can be 0 for the implementation default).
10564///
10565/// Returns 0 on success, -1 or -2 on error.
10566pub fn libsais16_bwt_aux_omp(
10567    t: &[u16],
10568    u: &mut [u16],
10569    a: &mut [SaSint],
10570    fs: SaSint,
10571    freq: Option<&mut [SaSint]>,
10572    r: SaSint,
10573    i: &mut [SaSint],
10574    threads: SaSint,
10575) -> SaSint {
10576    if threads < 0 {
10577        -1
10578    } else {
10579        build_bwt_aux(t, u, a, fs, freq, r, i, threads)
10580    }
10581}
10582
10583fn validate_unbwt_aux(
10584    t: &[u16],
10585    u: &[u16],
10586    a: &[SaSint],
10587    freq: Option<&[SaSint]>,
10588    r: SaSint,
10589    i: &[SaSint],
10590) -> SaSint {
10591    let n = t.len();
10592    if u.len() < n
10593        || a.len() < n
10594        || freq.is_some_and(|freq| freq.len() < ALPHABET_SIZE)
10595        || ((r != n as SaSint) && (r < 2 || (r & (r - 1)) != 0))
10596        || i.is_empty()
10597    {
10598        return -1;
10599    }
10600    if n <= 1 {
10601        return if i[0] == n as SaSint { 0 } else { -1 };
10602    }
10603
10604    let samples = (n - 1) / r as usize + 1;
10605    if i.len() < samples {
10606        return -1;
10607    }
10608
10609    for &index in &i[..samples] {
10610        if index <= 0 || index as usize > n {
10611            return -1;
10612        }
10613    }
10614    0
10615}
10616
10617fn unbwt_compute_histogram(t: &[u16], count: &mut [usize]) {
10618    for &symbol in t {
10619        count[symbol as usize] += 1;
10620    }
10621}
10622
10623fn unbwt_shift(n: usize) -> usize {
10624    let mut shift = 0usize;
10625    while (n >> shift) > (1usize << UNBWT_FASTBITS) {
10626        shift += 1;
10627    }
10628    shift
10629}
10630
10631fn unbwt_calculate_fastbits(bucket2: &mut [usize], fastbits: &mut [u16], shift: usize) {
10632    let mut v = 0usize;
10633    let mut sum = 1usize;
10634    for (w, bucket) in bucket2.iter_mut().enumerate().take(ALPHABET_SIZE) {
10635        let prev = sum;
10636        sum += *bucket;
10637        *bucket = prev;
10638        if prev != sum {
10639            while v <= ((sum - 1) >> shift) {
10640                fastbits[v] = w as u16;
10641                v += 1;
10642            }
10643        }
10644    }
10645}
10646
10647fn unbwt_calculate_p(t: &[u16], p: &mut [usize], bucket2: &mut [usize], index: usize) {
10648    for row in 0..index {
10649        let symbol = t[row] as usize;
10650        p[bucket2[symbol]] = row;
10651        bucket2[symbol] += 1;
10652    }
10653
10654    for row in index + 1..=t.len() {
10655        let symbol = t[row - 1] as usize;
10656        p[bucket2[symbol]] = row;
10657        bucket2[symbol] += 1;
10658    }
10659}
10660
10661#[allow(dead_code, non_snake_case)]
10662fn unbwt_calculate_P(
10663    t: &[u16],
10664    p: &mut [usize],
10665    bucket2: &mut [usize],
10666    index: usize,
10667    block_start: usize,
10668    block_end: usize,
10669) {
10670    let first_end = index.min(block_end);
10671    for row in block_start..first_end {
10672        let symbol = t[row] as usize;
10673        p[bucket2[symbol]] = row;
10674        bucket2[symbol] += 1;
10675    }
10676
10677    let second_start = block_start.max(index) + 1;
10678    for row in second_start..=block_end {
10679        let symbol = t[row - 1] as usize;
10680        p[bucket2[symbol]] = row;
10681        bucket2[symbol] += 1;
10682    }
10683}
10684
10685fn unbwt_init_single(
10686    t: &[u16],
10687    p: &mut [usize],
10688    freq: Option<&[SaSint]>,
10689    i: &[SaSint],
10690    bucket2: &mut [usize],
10691    fastbits: &mut [u16],
10692) {
10693    let shift = unbwt_shift(t.len());
10694    if let Some(freq) = freq {
10695        for c in 0..ALPHABET_SIZE {
10696            bucket2[c] = freq[c] as usize;
10697        }
10698    } else {
10699        bucket2.fill(0);
10700        unbwt_compute_histogram(t, bucket2);
10701    }
10702
10703    unbwt_calculate_fastbits(bucket2, fastbits, shift);
10704    unbwt_calculate_p(t, p, bucket2, i[0] as usize);
10705}
10706
10707#[allow(dead_code)]
10708fn unbwt_init_parallel(
10709    t: &[u16],
10710    p: &mut [usize],
10711    freq: Option<&[SaSint]>,
10712    i: &[SaSint],
10713    bucket2: &mut [usize],
10714    fastbits: &mut [u16],
10715    buckets: &mut [usize],
10716    threads: SaSint,
10717) {
10718    let n = t.len();
10719    let available_threads = buckets.len() / ALPHABET_SIZE;
10720    let num_threads = if threads > 1 && n >= 65_536 && available_threads > 1 {
10721        usize::try_from(threads)
10722            .expect("threads must be non-negative")
10723            .min(available_threads)
10724            .max(1)
10725    } else {
10726        1
10727    };
10728
10729    if num_threads == 1 {
10730        unbwt_init_single(t, p, freq, i, bucket2, fastbits);
10731        return;
10732    }
10733
10734    let index = usize::try_from(i[0]).expect("primary index must be non-negative");
10735    let shift = unbwt_shift(n);
10736    let block_stride = (n / num_threads) & !15usize;
10737
10738    for thread in 0..num_threads {
10739        let block_start = thread * block_stride;
10740        let block_size = if thread + 1 < num_threads {
10741            block_stride
10742        } else {
10743            n - block_start
10744        };
10745        let local = &mut buckets[thread * ALPHABET_SIZE..(thread + 1) * ALPHABET_SIZE];
10746        local.fill(0);
10747        unbwt_compute_histogram(&t[block_start..block_start + block_size], local);
10748    }
10749
10750    bucket2.fill(0);
10751    for thread in 0..num_threads {
10752        let local = &mut buckets[thread * ALPHABET_SIZE..(thread + 1) * ALPHABET_SIZE];
10753        for c in 0..ALPHABET_SIZE {
10754            let a = bucket2[c];
10755            let b = local[c];
10756            bucket2[c] = a + b;
10757            local[c] = a;
10758        }
10759    }
10760
10761    unbwt_calculate_fastbits(bucket2, fastbits, shift);
10762
10763    for thread in 0..num_threads {
10764        let block_start = thread * block_stride;
10765        let block_size = if thread + 1 < num_threads {
10766            block_stride
10767        } else {
10768            n - block_start
10769        };
10770        let local = &mut buckets[thread * ALPHABET_SIZE..(thread + 1) * ALPHABET_SIZE];
10771        for c in 0..ALPHABET_SIZE {
10772            local[c] += bucket2[c];
10773        }
10774        unbwt_calculate_P(t, p, local, index, block_start, block_start + block_size);
10775    }
10776
10777    let last_local = &buckets[(num_threads - 1) * ALPHABET_SIZE..num_threads * ALPHABET_SIZE];
10778    bucket2.copy_from_slice(last_local);
10779}
10780
10781fn unbwt_decode_symbol(
10782    p0: usize,
10783    p: &[usize],
10784    bucket2: &[usize],
10785    fastbits: &[u16],
10786    shift: usize,
10787) -> (u16, usize) {
10788    let mut c0 = fastbits[p0 >> shift] as usize;
10789    if bucket2[c0] <= p0 {
10790        while bucket2[c0] <= p0 {
10791            c0 += 1;
10792        }
10793    }
10794    (c0 as u16, p[p0])
10795}
10796
10797#[allow(dead_code)]
10798fn unbwt_decode_1(
10799    u: &mut [u16],
10800    p: &[usize],
10801    bucket2: &[usize],
10802    fastbits: &[u16],
10803    shift: usize,
10804    i0: &mut usize,
10805    k: usize,
10806) {
10807    let mut cursors = [*i0];
10808    unbwt_decode_lanes::<1>(u, p, bucket2, fastbits, shift, k, &mut cursors, k);
10809    *i0 = cursors[0];
10810}
10811
10812#[allow(dead_code)]
10813fn unbwt_decode_2(
10814    u: &mut [u16],
10815    p: &[usize],
10816    bucket2: &[usize],
10817    fastbits: &[u16],
10818    shift: usize,
10819    r: usize,
10820    i0: &mut usize,
10821    i1: &mut usize,
10822    k: usize,
10823) {
10824    let mut cursors = [*i0, *i1];
10825    unbwt_decode_lanes::<2>(u, p, bucket2, fastbits, shift, r, &mut cursors, k);
10826    *i0 = cursors[0];
10827    *i1 = cursors[1];
10828}
10829
10830#[allow(dead_code)]
10831fn unbwt_decode_3(
10832    u: &mut [u16],
10833    p: &[usize],
10834    bucket2: &[usize],
10835    fastbits: &[u16],
10836    shift: usize,
10837    r: usize,
10838    i0: &mut usize,
10839    i1: &mut usize,
10840    i2: &mut usize,
10841    k: usize,
10842) {
10843    let mut cursors = [*i0, *i1, *i2];
10844    unbwt_decode_lanes::<3>(u, p, bucket2, fastbits, shift, r, &mut cursors, k);
10845    *i0 = cursors[0];
10846    *i1 = cursors[1];
10847    *i2 = cursors[2];
10848}
10849
10850#[allow(dead_code)]
10851fn unbwt_decode_4(
10852    u: &mut [u16],
10853    p: &[usize],
10854    bucket2: &[usize],
10855    fastbits: &[u16],
10856    shift: usize,
10857    r: usize,
10858    i0: &mut usize,
10859    i1: &mut usize,
10860    i2: &mut usize,
10861    i3: &mut usize,
10862    k: usize,
10863) {
10864    let mut cursors = [*i0, *i1, *i2, *i3];
10865    unbwt_decode_lanes::<4>(u, p, bucket2, fastbits, shift, r, &mut cursors, k);
10866    *i0 = cursors[0];
10867    *i1 = cursors[1];
10868    *i2 = cursors[2];
10869    *i3 = cursors[3];
10870}
10871
10872#[allow(dead_code)]
10873fn unbwt_decode_5(
10874    u: &mut [u16],
10875    p: &[usize],
10876    bucket2: &[usize],
10877    fastbits: &[u16],
10878    shift: usize,
10879    r: usize,
10880    cursors: &mut [usize; 5],
10881    k: usize,
10882) {
10883    unbwt_decode_lanes::<5>(u, p, bucket2, fastbits, shift, r, cursors, k);
10884}
10885
10886#[allow(dead_code)]
10887fn unbwt_decode_6(
10888    u: &mut [u16],
10889    p: &[usize],
10890    bucket2: &[usize],
10891    fastbits: &[u16],
10892    shift: usize,
10893    r: usize,
10894    cursors: &mut [usize; 6],
10895    k: usize,
10896) {
10897    unbwt_decode_lanes::<6>(u, p, bucket2, fastbits, shift, r, cursors, k);
10898}
10899
10900#[allow(dead_code)]
10901fn unbwt_decode_7(
10902    u: &mut [u16],
10903    p: &[usize],
10904    bucket2: &[usize],
10905    fastbits: &[u16],
10906    shift: usize,
10907    r: usize,
10908    cursors: &mut [usize; 7],
10909    k: usize,
10910) {
10911    unbwt_decode_lanes::<7>(u, p, bucket2, fastbits, shift, r, cursors, k);
10912}
10913
10914#[allow(dead_code)]
10915fn unbwt_decode_8(
10916    u: &mut [u16],
10917    p: &[usize],
10918    bucket2: &[usize],
10919    fastbits: &[u16],
10920    shift: usize,
10921    r: usize,
10922    cursors: &mut [usize; 8],
10923    k: usize,
10924) {
10925    unbwt_decode_lanes::<8>(u, p, bucket2, fastbits, shift, r, cursors, k);
10926}
10927
10928fn unbwt_decode(
10929    u: &mut [u16],
10930    p: &[usize],
10931    n: usize,
10932    r: usize,
10933    i: &[SaSint],
10934    bucket2: &[usize],
10935    fastbits: &[u16],
10936) {
10937    let shift = unbwt_shift(n);
10938    let blocks = 1 + (n - 1) / r;
10939    let remainder = n - r * (blocks - 1);
10940    unbwt_decode_blocks(u, p, r, i, bucket2, fastbits, shift, blocks, remainder);
10941}
10942
10943fn unbwt_decode_blocks(
10944    u: &mut [u16],
10945    p: &[usize],
10946    r: usize,
10947    i: &[SaSint],
10948    bucket2: &[usize],
10949    fastbits: &[u16],
10950    shift: usize,
10951    blocks: usize,
10952    remainder: usize,
10953) {
10954    let mut blocks_left = blocks;
10955    let mut i_offset = 0usize;
10956    let mut u_offset = 0usize;
10957
10958    while blocks_left > 8 {
10959        let mut cursors = [
10960            i[i_offset] as usize,
10961            i[i_offset + 1] as usize,
10962            i[i_offset + 2] as usize,
10963            i[i_offset + 3] as usize,
10964            i[i_offset + 4] as usize,
10965            i[i_offset + 5] as usize,
10966            i[i_offset + 6] as usize,
10967            i[i_offset + 7] as usize,
10968        ];
10969        unbwt_decode_lanes::<8>(
10970            &mut u[u_offset..],
10971            p,
10972            bucket2,
10973            fastbits,
10974            shift,
10975            r,
10976            &mut cursors,
10977            r,
10978        );
10979        i_offset += 8;
10980        blocks_left -= 8;
10981        u_offset += 8 * r;
10982    }
10983
10984    match blocks_left {
10985        1 => {
10986            let mut cursors = [i[i_offset] as usize];
10987            unbwt_decode_lanes::<1>(
10988                &mut u[u_offset..],
10989                p,
10990                bucket2,
10991                fastbits,
10992                shift,
10993                r,
10994                &mut cursors,
10995                remainder,
10996            );
10997        }
10998        2 => {
10999            let mut cursors = [i[i_offset] as usize, i[i_offset + 1] as usize];
11000            unbwt_decode_lanes::<2>(
11001                &mut u[u_offset..],
11002                p,
11003                bucket2,
11004                fastbits,
11005                shift,
11006                r,
11007                &mut cursors,
11008                remainder,
11009            );
11010            let mut first = [cursors[0]];
11011            unbwt_decode_lanes::<1>(
11012                &mut u[u_offset + remainder..],
11013                p,
11014                bucket2,
11015                fastbits,
11016                shift,
11017                r,
11018                &mut first,
11019                r - remainder,
11020            );
11021        }
11022        3 => {
11023            let mut cursors = [
11024                i[i_offset] as usize,
11025                i[i_offset + 1] as usize,
11026                i[i_offset + 2] as usize,
11027            ];
11028            unbwt_decode_lanes::<3>(
11029                &mut u[u_offset..],
11030                p,
11031                bucket2,
11032                fastbits,
11033                shift,
11034                r,
11035                &mut cursors,
11036                remainder,
11037            );
11038            let mut first = [cursors[0], cursors[1]];
11039            unbwt_decode_lanes::<2>(
11040                &mut u[u_offset + remainder..],
11041                p,
11042                bucket2,
11043                fastbits,
11044                shift,
11045                r,
11046                &mut first,
11047                r - remainder,
11048            );
11049        }
11050        4 => {
11051            let mut cursors = [
11052                i[i_offset] as usize,
11053                i[i_offset + 1] as usize,
11054                i[i_offset + 2] as usize,
11055                i[i_offset + 3] as usize,
11056            ];
11057            unbwt_decode_lanes::<4>(
11058                &mut u[u_offset..],
11059                p,
11060                bucket2,
11061                fastbits,
11062                shift,
11063                r,
11064                &mut cursors,
11065                remainder,
11066            );
11067            let mut first = [cursors[0], cursors[1], cursors[2]];
11068            unbwt_decode_lanes::<3>(
11069                &mut u[u_offset + remainder..],
11070                p,
11071                bucket2,
11072                fastbits,
11073                shift,
11074                r,
11075                &mut first,
11076                r - remainder,
11077            );
11078        }
11079        5 => {
11080            let mut cursors = [
11081                i[i_offset] as usize,
11082                i[i_offset + 1] as usize,
11083                i[i_offset + 2] as usize,
11084                i[i_offset + 3] as usize,
11085                i[i_offset + 4] as usize,
11086            ];
11087            unbwt_decode_lanes::<5>(
11088                &mut u[u_offset..],
11089                p,
11090                bucket2,
11091                fastbits,
11092                shift,
11093                r,
11094                &mut cursors,
11095                remainder,
11096            );
11097            let mut first = [cursors[0], cursors[1], cursors[2], cursors[3]];
11098            unbwt_decode_lanes::<4>(
11099                &mut u[u_offset + remainder..],
11100                p,
11101                bucket2,
11102                fastbits,
11103                shift,
11104                r,
11105                &mut first,
11106                r - remainder,
11107            );
11108        }
11109        6 => {
11110            let mut cursors = [
11111                i[i_offset] as usize,
11112                i[i_offset + 1] as usize,
11113                i[i_offset + 2] as usize,
11114                i[i_offset + 3] as usize,
11115                i[i_offset + 4] as usize,
11116                i[i_offset + 5] as usize,
11117            ];
11118            unbwt_decode_lanes::<6>(
11119                &mut u[u_offset..],
11120                p,
11121                bucket2,
11122                fastbits,
11123                shift,
11124                r,
11125                &mut cursors,
11126                remainder,
11127            );
11128            let mut first = [cursors[0], cursors[1], cursors[2], cursors[3], cursors[4]];
11129            unbwt_decode_lanes::<5>(
11130                &mut u[u_offset + remainder..],
11131                p,
11132                bucket2,
11133                fastbits,
11134                shift,
11135                r,
11136                &mut first,
11137                r - remainder,
11138            );
11139        }
11140        7 => {
11141            let mut cursors = [
11142                i[i_offset] as usize,
11143                i[i_offset + 1] as usize,
11144                i[i_offset + 2] as usize,
11145                i[i_offset + 3] as usize,
11146                i[i_offset + 4] as usize,
11147                i[i_offset + 5] as usize,
11148                i[i_offset + 6] as usize,
11149            ];
11150            unbwt_decode_lanes::<7>(
11151                &mut u[u_offset..],
11152                p,
11153                bucket2,
11154                fastbits,
11155                shift,
11156                r,
11157                &mut cursors,
11158                remainder,
11159            );
11160            let mut first = [
11161                cursors[0], cursors[1], cursors[2], cursors[3], cursors[4], cursors[5],
11162            ];
11163            unbwt_decode_lanes::<6>(
11164                &mut u[u_offset + remainder..],
11165                p,
11166                bucket2,
11167                fastbits,
11168                shift,
11169                r,
11170                &mut first,
11171                r - remainder,
11172            );
11173        }
11174        _ => {
11175            let mut cursors = [
11176                i[i_offset] as usize,
11177                i[i_offset + 1] as usize,
11178                i[i_offset + 2] as usize,
11179                i[i_offset + 3] as usize,
11180                i[i_offset + 4] as usize,
11181                i[i_offset + 5] as usize,
11182                i[i_offset + 6] as usize,
11183                i[i_offset + 7] as usize,
11184            ];
11185            unbwt_decode_lanes::<8>(
11186                &mut u[u_offset..],
11187                p,
11188                bucket2,
11189                fastbits,
11190                shift,
11191                r,
11192                &mut cursors,
11193                remainder,
11194            );
11195            let mut first = [
11196                cursors[0], cursors[1], cursors[2], cursors[3], cursors[4], cursors[5], cursors[6],
11197            ];
11198            unbwt_decode_lanes::<7>(
11199                &mut u[u_offset + remainder..],
11200                p,
11201                bucket2,
11202                fastbits,
11203                shift,
11204                r,
11205                &mut first,
11206                r - remainder,
11207            );
11208        }
11209    }
11210}
11211
11212#[allow(dead_code)]
11213fn unbwt_decode_omp(
11214    u: &mut [u16],
11215    p: &[usize],
11216    n: usize,
11217    r: usize,
11218    i: &[SaSint],
11219    bucket2: &[usize],
11220    fastbits: &[u16],
11221    threads: SaSint,
11222) {
11223    let blocks = 1 + (n - 1) / r;
11224    let remainder = n - r * (blocks - 1);
11225    let num_threads = if threads > 1 && n >= 65_536 {
11226        usize::try_from(threads)
11227            .expect("threads must be non-negative")
11228            .min(blocks)
11229            .max(1)
11230    } else {
11231        1
11232    };
11233
11234    if num_threads == 1 {
11235        unbwt_decode(u, p, n, r, i, bucket2, fastbits);
11236        return;
11237    }
11238
11239    let shift = unbwt_shift(n);
11240    let block_stride = blocks / num_threads;
11241    let block_remainder = blocks % num_threads;
11242    for thread in 0..num_threads {
11243        let block_count = block_stride + usize::from(thread < block_remainder);
11244        let block_start = block_stride * thread + thread.min(block_remainder);
11245        let tail = if thread + 1 < num_threads {
11246            r
11247        } else {
11248            remainder
11249        };
11250        unbwt_decode_blocks(
11251            &mut u[r * block_start..],
11252            p,
11253            r,
11254            &i[block_start..],
11255            bucket2,
11256            fastbits,
11257            shift,
11258            block_count,
11259            tail,
11260        );
11261    }
11262}
11263
11264fn unbwt_decode_lanes<const LANES: usize>(
11265    u: &mut [u16],
11266    p: &[usize],
11267    bucket2: &[usize],
11268    fastbits: &[u16],
11269    shift: usize,
11270    r: usize,
11271    cursors: &mut [usize; LANES],
11272    k: usize,
11273) {
11274    for pos in 0..k {
11275        for lane in 0..LANES {
11276            let (symbol, next) = unbwt_decode_symbol(cursors[lane], p, bucket2, fastbits, shift);
11277            cursors[lane] = next;
11278            u[lane * r + pos] = symbol;
11279        }
11280    }
11281}
11282
11283fn unbwt_core(
11284    t: &[u16],
11285    u: &mut [u16],
11286    a: &mut [SaSint],
11287    freq: Option<&[SaSint]>,
11288    r: SaSint,
11289    i: &[SaSint],
11290) -> SaSint {
11291    let n = t.len();
11292    let shift = unbwt_shift(n);
11293    let mut bucket2 = vec![0usize; ALPHABET_SIZE];
11294    let mut fastbits = vec![0u16; 1 + (n >> shift)];
11295
11296    unbwt_core_with_buffers(t, u, a, freq, r, i, &mut bucket2, &mut fastbits, 1)
11297}
11298
11299fn unbwt_core_with_buffers(
11300    t: &[u16],
11301    u: &mut [u16],
11302    a: &mut [SaSint],
11303    freq: Option<&[SaSint]>,
11304    r: SaSint,
11305    i: &[SaSint],
11306    bucket2: &mut [usize],
11307    fastbits: &mut [u16],
11308    threads: SaSint,
11309) -> SaSint {
11310    let n = t.len();
11311    let shift = unbwt_shift(n);
11312    if bucket2.len() < ALPHABET_SIZE || fastbits.len() < 1 + (n >> shift) {
11313        return -2;
11314    }
11315
11316    let mut p = vec![0usize; n + 1];
11317    unbwt_init_single(
11318        t,
11319        &mut p,
11320        freq,
11321        i,
11322        &mut bucket2[..ALPHABET_SIZE],
11323        &mut fastbits[..1 + (n >> shift)],
11324    );
11325    unbwt_decode_omp(
11326        u,
11327        &p,
11328        n,
11329        r as usize,
11330        i,
11331        &bucket2[..ALPHABET_SIZE],
11332        &fastbits[..1 + (n >> shift)],
11333        threads,
11334    );
11335
11336    for (dst, &src) in a.iter_mut().zip(p.iter().skip(1)) {
11337        *dst = src as SaSint;
11338    }
11339    0
11340}
11341
11342fn inverse_bwt(
11343    t: &[u16],
11344    u: &mut [u16],
11345    a: &mut [SaSint],
11346    freq: Option<&[SaSint]>,
11347    primary: SaSint,
11348) -> SaSint {
11349    let n = t.len();
11350    let i = [primary];
11351    let rc = validate_unbwt_aux(t, u, a, freq, n as SaSint, &i);
11352    if rc != 0 {
11353        return rc;
11354    }
11355    if n <= 1 {
11356        if n == 1 {
11357            u[0] = t[0];
11358        }
11359        return 0;
11360    }
11361    unbwt_core(t, u, a, freq, n as SaSint, &i)
11362}
11363
11364/// Reconstructs the original 16-bit string from a given BWT and primary index.
11365///
11366/// - `t` (`[0..n-1]`): the input 16-bit string.
11367/// - `u` (`[0..n-1]`): the output 16-bit string (can alias `t`).
11368/// - `a` (`[0..n]`): the temporary array (must have length `n + 1`).
11369/// - `freq` (`[0..65535]`): optional input symbol frequency table.
11370/// - `i`: the primary index.
11371///
11372/// Returns 0 on success, -1 or -2 on error.
11373pub fn libsais16_unbwt(
11374    t: &[u16],
11375    u: &mut [u16],
11376    a: &mut [SaSint],
11377    freq: Option<&[SaSint]>,
11378    i: SaSint,
11379) -> SaSint {
11380    inverse_bwt(t, u, a, freq, i)
11381}
11382
11383/// Reconstructs the original 16-bit string from a given BWT and primary index using a libsais16 reverse-BWT context.
11384///
11385/// - `ctx`: the libsais16 reverse-BWT context.
11386/// - `t` (`[0..n-1]`): the input 16-bit string.
11387/// - `u` (`[0..n-1]`): the output 16-bit string (can alias `t`).
11388/// - `a` (`[0..n]`): the temporary array (must have length `n + 1`).
11389/// - `freq` (`[0..65535]`): optional input symbol frequency table.
11390/// - `i`: the primary index.
11391///
11392/// Returns 0 on success, -1 or -2 on error.
11393pub fn libsais16_unbwt_ctx(
11394    ctx: &mut UnbwtContext,
11395    t: &[u16],
11396    u: &mut [u16],
11397    a: &mut [SaSint],
11398    freq: Option<&[SaSint]>,
11399    i: SaSint,
11400) -> SaSint {
11401    libsais16_unbwt_aux_ctx(ctx, t, u, a, freq, t.len() as SaSint, &[i])
11402}
11403
11404/// Reconstructs the original 16-bit string from a given BWT with auxiliary indexes.
11405///
11406/// - `t` (`[0..n-1]`): the input 16-bit string.
11407/// - `u` (`[0..n-1]`): the output 16-bit string (can alias `t`).
11408/// - `a` (`[0..n]`): the temporary array (must have length `n + 1`).
11409/// - `freq` (`[0..65535]`): optional input symbol frequency table.
11410/// - `r`: sampling rate for the auxiliary indexes (must be a power of two).
11411/// - `i` (`[0..(n-1)/r]`): input auxiliary indexes.
11412///
11413/// Returns 0 on success, -1 or -2 on error.
11414pub fn libsais16_unbwt_aux(
11415    t: &[u16],
11416    u: &mut [u16],
11417    a: &mut [SaSint],
11418    freq: Option<&[SaSint]>,
11419    r: SaSint,
11420    i: &[SaSint],
11421) -> SaSint {
11422    let rc = validate_unbwt_aux(t, u, a, freq, r, i);
11423    if rc != 0 {
11424        return rc;
11425    }
11426    if t.len() <= 1 {
11427        if t.len() == 1 {
11428            u[0] = t[0];
11429        }
11430        return 0;
11431    }
11432    unbwt_core(t, u, a, freq, r, i)
11433}
11434
11435/// Reconstructs the original 16-bit string from a given BWT with auxiliary indexes using a libsais16 reverse-BWT context.
11436///
11437/// - `ctx`: the libsais16 reverse-BWT context.
11438/// - `t` (`[0..n-1]`): the input 16-bit string.
11439/// - `u` (`[0..n-1]`): the output 16-bit string (can alias `t`).
11440/// - `a` (`[0..n]`): the temporary array (must have length `n + 1`).
11441/// - `freq` (`[0..65535]`): optional input symbol frequency table.
11442/// - `r`: sampling rate for the auxiliary indexes (must be a power of two).
11443/// - `i` (`[0..(n-1)/r]`): input auxiliary indexes.
11444///
11445/// Returns 0 on success, -1 or -2 on error.
11446pub fn libsais16_unbwt_aux_ctx(
11447    ctx: &mut UnbwtContext,
11448    t: &[u16],
11449    u: &mut [u16],
11450    a: &mut [SaSint],
11451    freq: Option<&[SaSint]>,
11452    r: SaSint,
11453    i: &[SaSint],
11454) -> SaSint {
11455    let rc = validate_unbwt_aux(t, u, a, freq, r, i);
11456    if rc != 0 {
11457        return rc;
11458    }
11459    if t.len() <= 1 {
11460        if t.len() == 1 {
11461            u[0] = t[0];
11462        }
11463        return 0;
11464    }
11465    unbwt_core_with_buffers(
11466        t,
11467        u,
11468        a,
11469        freq,
11470        r,
11471        i,
11472        &mut ctx.bucket2,
11473        &mut ctx.fastbits,
11474        ctx.threads,
11475    )
11476}
11477
11478/// Reconstructs the original 16-bit string from a given BWT and primary index in parallel using OpenMP-style threading.
11479///
11480/// - `t` (`[0..n-1]`): the input 16-bit string.
11481/// - `u` (`[0..n-1]`): the output 16-bit string (can alias `t`).
11482/// - `a` (`[0..n]`): the temporary array (must have length `n + 1`).
11483/// - `freq` (`[0..65535]`): optional input symbol frequency table.
11484/// - `i`: the primary index.
11485/// - `threads`: number of worker threads (can be 0 for the implementation default).
11486///
11487/// Returns 0 on success, -1 or -2 on error.
11488pub fn libsais16_unbwt_omp(
11489    t: &[u16],
11490    u: &mut [u16],
11491    a: &mut [SaSint],
11492    freq: Option<&[SaSint]>,
11493    i: SaSint,
11494    threads: SaSint,
11495) -> SaSint {
11496    if threads < 0 {
11497        -1
11498    } else {
11499        let primary = [i];
11500        libsais16_unbwt_aux_omp(t, u, a, freq, t.len() as SaSint, &primary, threads)
11501    }
11502}
11503
11504/// Reconstructs the original 16-bit string from a given BWT with auxiliary indexes in parallel using OpenMP-style threading.
11505///
11506/// - `t` (`[0..n-1]`): the input 16-bit string.
11507/// - `u` (`[0..n-1]`): the output 16-bit string (can alias `t`).
11508/// - `a` (`[0..n]`): the temporary array (must have length `n + 1`).
11509/// - `freq` (`[0..65535]`): optional input symbol frequency table.
11510/// - `r`: sampling rate for the auxiliary indexes (must be a power of two).
11511/// - `i` (`[0..(n-1)/r]`): input auxiliary indexes.
11512/// - `threads`: number of worker threads (can be 0 for the implementation default).
11513///
11514/// Returns 0 on success, -1 or -2 on error.
11515pub fn libsais16_unbwt_aux_omp(
11516    t: &[u16],
11517    u: &mut [u16],
11518    a: &mut [SaSint],
11519    freq: Option<&[SaSint]>,
11520    r: SaSint,
11521    i: &[SaSint],
11522    threads: SaSint,
11523) -> SaSint {
11524    if threads < 0 {
11525        -1
11526    } else {
11527        let rc = validate_unbwt_aux(t, u, a, freq, r, i);
11528        if rc != 0 {
11529            return rc;
11530        }
11531        if t.len() <= 1 {
11532            if t.len() == 1 {
11533                u[0] = t[0];
11534            }
11535            return 0;
11536        }
11537        let n = t.len();
11538        let shift = unbwt_shift(n);
11539        let mut bucket2 = vec![0usize; ALPHABET_SIZE];
11540        let mut fastbits = vec![0u16; 1 + (n >> shift)];
11541        unbwt_core_with_buffers(
11542            t,
11543            u,
11544            a,
11545            freq,
11546            r,
11547            i,
11548            &mut bucket2,
11549            &mut fastbits,
11550            normalize_threads(threads),
11551        )
11552    }
11553}
11554
11555/// Constructs the permuted longest common prefix array (PLCP) of a given 16-bit string and suffix array.
11556///
11557/// - `t` (`[0..n-1]`): the input 16-bit string.
11558/// - `sa` (`[0..n-1]`): the input suffix array.
11559/// - `plcp` (`[0..n-1]`): the output permuted longest common prefix array.
11560///
11561/// Returns 0 on success, -1 on error.
11562pub fn libsais16_plcp(t: &[u16], sa: &[SaSint], plcp: &mut [SaSint]) -> SaSint {
11563    compute_plcp(t, sa, plcp, false)
11564}
11565
11566/// Constructs the PLCP of a given 16-bit string set and generalized suffix array (GSA).
11567///
11568/// - `t` (`[0..n-1]`): the input 16-bit string set using 0 as separators (`t[n-1]` must be 0).
11569/// - `sa` (`[0..n-1]`): the input generalized suffix array.
11570/// - `plcp` (`[0..n-1]`): the output permuted longest common prefix array.
11571///
11572/// Returns 0 on success, -1 on error.
11573pub fn libsais16_plcp_gsa(t: &[u16], sa: &[SaSint], plcp: &mut [SaSint]) -> SaSint {
11574    if t.last().copied().unwrap_or(0) != 0 {
11575        -1
11576    } else {
11577        compute_plcp(t, sa, plcp, true)
11578    }
11579}
11580
11581fn compute_plcp(t: &[u16], sa: &[SaSint], plcp: &mut [SaSint], gsa: bool) -> SaSint {
11582    if sa.len() != t.len() || plcp.len() != t.len() {
11583        return -1;
11584    }
11585    if t.len() <= 1 {
11586        if t.len() == 1 {
11587            plcp[0] = 0;
11588        }
11589        return 0;
11590    }
11591
11592    if compute_phi(sa, plcp) != 0 {
11593        return -1;
11594    }
11595
11596    compute_plcp_from_phi(t, plcp, gsa)
11597}
11598
11599fn compute_phi(sa: &[SaSint], plcp: &mut [SaSint]) -> SaSint {
11600    let n = sa.len();
11601    let mut previous = n as SaSint;
11602    for &suffix_value in sa {
11603        let Some(suffix) = suffix_index(suffix_value, n) else {
11604            return -1;
11605        };
11606        plcp[suffix] = previous;
11607        previous = suffix_value;
11608    }
11609    0
11610}
11611
11612fn compute_plcp_from_phi(t: &[u16], plcp: &mut [SaSint], gsa: bool) -> SaSint {
11613    let n = t.len();
11614    let mut l = 0usize;
11615    for i in 0..t.len() {
11616        let previous = plcp[i];
11617        if previous == n as SaSint {
11618            plcp[i] = 0;
11619            l = 0;
11620            continue;
11621        }
11622
11623        let Some(prev) = suffix_index(previous, n) else {
11624            return -1;
11625        };
11626
11627        while i + l < t.len()
11628            && prev + l < t.len()
11629            && t[i + l] == t[prev + l]
11630            && (!gsa || t[i + l] != 0)
11631        {
11632            l += 1;
11633        }
11634        plcp[i] = l as SaSint;
11635        l = l.saturating_sub(1);
11636    }
11637    0
11638}
11639
11640#[allow(dead_code)]
11641fn compute_phi_omp(sa: &[SaSint], plcp: &mut [SaSint], n: SaSint, threads: SaSint) -> SaSint {
11642    let n_usize = n as usize;
11643    if threads == 1 || n < 65_536 {
11644        return compute_phi(&sa[..n_usize], &mut plcp[..n_usize]);
11645    }
11646
11647    let block_stride = (n / threads) & !15;
11648    for thread in 0..threads {
11649        let block_start = thread * block_stride;
11650        let block_size = if thread < threads - 1 {
11651            block_stride
11652        } else {
11653            n - block_start
11654        };
11655        let start = block_start as usize;
11656        let end = (block_start + block_size) as usize;
11657        let mut previous = if start > 0 { sa[start - 1] } else { n };
11658        for &suffix_value in &sa[start..end] {
11659            let Some(suffix) = suffix_index(suffix_value, n_usize) else {
11660                return -1;
11661            };
11662            plcp[suffix] = previous;
11663            previous = suffix_value;
11664        }
11665    }
11666    0
11667}
11668
11669#[allow(dead_code)]
11670fn compute_plcp_omp(t: &[u16], plcp: &mut [SaSint], n: SaSint, threads: SaSint) -> SaSint {
11671    if threads == 1 || n < 65_536 {
11672        let n = n as usize;
11673        return compute_plcp_from_phi(&t[..n], &mut plcp[..n], false);
11674    }
11675
11676    let block_stride = (n / threads) & !15;
11677    for thread in 0..threads {
11678        let block_start = thread * block_stride;
11679        let block_size = if thread < threads - 1 {
11680            block_stride
11681        } else {
11682            n - block_start
11683        };
11684        let rc = compute_plcp_range(
11685            t,
11686            plcp,
11687            n as usize,
11688            block_start as isize,
11689            block_size as isize,
11690            false,
11691        );
11692        if rc != 0 {
11693            return rc;
11694        }
11695    }
11696    0
11697}
11698
11699fn compute_plcp_range(
11700    t: &[u16],
11701    plcp: &mut [SaSint],
11702    n: usize,
11703    omp_block_start: isize,
11704    omp_block_size: isize,
11705    gsa: bool,
11706) -> SaSint {
11707    let mut l = 0usize;
11708    let end = (omp_block_start + omp_block_size) as usize;
11709    for i in omp_block_start as usize..end {
11710        let previous = plcp[i];
11711        if previous == n as SaSint {
11712            plcp[i] = 0;
11713            l = 0;
11714            continue;
11715        }
11716
11717        let Some(prev) = suffix_index(previous, n) else {
11718            return -1;
11719        };
11720
11721        while i + l < t.len()
11722            && prev + l < t.len()
11723            && t[i + l] == t[prev + l]
11724            && (!gsa || t[i + l] != 0)
11725        {
11726            l += 1;
11727        }
11728        plcp[i] = l as SaSint;
11729        l = l.saturating_sub(1);
11730    }
11731    0
11732}
11733
11734#[allow(dead_code)]
11735fn compute_plcp_gsa(
11736    t: &[u16],
11737    plcp: &mut [SaSint],
11738    omp_block_start: isize,
11739    omp_block_size: isize,
11740) -> SaSint {
11741    let n = t.len();
11742    let mut l = 0usize;
11743    let end = (omp_block_start + omp_block_size) as usize;
11744    for i in omp_block_start as usize..end {
11745        let previous = plcp[i];
11746        if previous == n as SaSint {
11747            plcp[i] = 0;
11748            l = 0;
11749            continue;
11750        }
11751
11752        let Some(prev) = suffix_index(previous, n) else {
11753            return -1;
11754        };
11755
11756        while i + l < t.len() && prev + l < t.len() && t[i + l] == t[prev + l] && t[i + l] != 0 {
11757            l += 1;
11758        }
11759        plcp[i] = l as SaSint;
11760        l = l.saturating_sub(1);
11761    }
11762    0
11763}
11764
11765#[allow(dead_code)]
11766fn compute_plcp_gsa_omp(t: &[u16], plcp: &mut [SaSint], n: SaSint, threads: SaSint) -> SaSint {
11767    if threads == 1 || n < 65_536 {
11768        return compute_plcp_gsa(t, plcp, 0, n as isize);
11769    }
11770
11771    let block_stride = (n / threads) & !15;
11772    for thread in 0..threads {
11773        let block_start = thread * block_stride;
11774        let block_size = if thread < threads - 1 {
11775            block_stride
11776        } else {
11777            n - block_start
11778        };
11779        let rc = compute_plcp_gsa(t, plcp, block_start as isize, block_size as isize);
11780        if rc != 0 {
11781            return rc;
11782        }
11783    }
11784    0
11785}
11786
11787#[allow(dead_code)]
11788fn compute_lcp(
11789    plcp: &[SaSint],
11790    sa: &[SaSint],
11791    lcp: &mut [SaSint],
11792    omp_block_start: isize,
11793    omp_block_size: isize,
11794) -> SaSint {
11795    let end = (omp_block_start + omp_block_size) as usize;
11796    for row in omp_block_start as usize..end {
11797        let Some(suffix) = suffix_index(sa[row], plcp.len()) else {
11798            return -1;
11799        };
11800        lcp[row] = plcp[suffix];
11801    }
11802    0
11803}
11804
11805#[allow(dead_code)]
11806fn compute_lcp_omp(
11807    plcp: &[SaSint],
11808    sa: &[SaSint],
11809    lcp: &mut [SaSint],
11810    n: SaSint,
11811    threads: SaSint,
11812) -> SaSint {
11813    if threads == 1 || n < 65_536 {
11814        return compute_lcp(plcp, sa, lcp, 0, n as isize);
11815    }
11816
11817    let block_stride = (n / threads) & !15;
11818    for thread in 0..threads {
11819        let block_start = thread * block_stride;
11820        let block_size = if thread < threads - 1 {
11821            block_stride
11822        } else {
11823            n - block_start
11824        };
11825        let rc = compute_lcp(plcp, sa, lcp, block_start as isize, block_size as isize);
11826        if rc != 0 {
11827            return rc;
11828        }
11829    }
11830    0
11831}
11832
11833/// Constructs the longest common prefix array (LCP) from a PLCP and suffix array.
11834///
11835/// - `plcp` (`[0..n-1]`): the input permuted longest common prefix array.
11836/// - `sa` (`[0..n-1]`): the input suffix array or generalized suffix array (GSA).
11837/// - `lcp` (`[0..n-1]`): the output longest common prefix array (can alias `sa`).
11838///
11839/// Returns 0 on success, -1 on error.
11840pub fn libsais16_lcp(plcp: &[SaSint], sa: &[SaSint], lcp: &mut [SaSint]) -> SaSint {
11841    if plcp.len() != sa.len() || lcp.len() != sa.len() {
11842        return -1;
11843    }
11844    for (row, &suffix) in sa.iter().enumerate() {
11845        let Some(suffix) = suffix_index(suffix, plcp.len()) else {
11846            return -1;
11847        };
11848        lcp[row] = plcp[suffix];
11849    }
11850    0
11851}
11852
11853fn suffix_index(value: SaSint, len: usize) -> Option<usize> {
11854    usize::try_from(value).ok().filter(|&index| index < len)
11855}
11856
11857/// Constructs the PLCP of a given 16-bit string and suffix array in parallel using OpenMP-style threading.
11858///
11859/// - `t` (`[0..n-1]`): the input 16-bit string.
11860/// - `sa` (`[0..n-1]`): the input suffix array.
11861/// - `plcp` (`[0..n-1]`): the output permuted longest common prefix array.
11862/// - `threads`: number of worker threads (can be 0 for the implementation default).
11863///
11864/// Returns 0 on success, -1 on error.
11865pub fn libsais16_plcp_omp(
11866    t: &[u16],
11867    sa: &[SaSint],
11868    plcp: &mut [SaSint],
11869    threads: SaSint,
11870) -> SaSint {
11871    if threads < 0 {
11872        return -1;
11873    }
11874    if sa.len() != t.len() || plcp.len() != t.len() {
11875        return -1;
11876    }
11877    if t.len() <= 1 {
11878        if t.len() == 1 {
11879            plcp[0] = 0;
11880        }
11881        return 0;
11882    }
11883
11884    let n = t.len() as SaSint;
11885    let threads = normalize_threads(threads);
11886    if compute_phi_omp(sa, plcp, n, threads) != 0 {
11887        return -1;
11888    }
11889    compute_plcp_omp(t, plcp, n, threads)
11890}
11891
11892/// Constructs the PLCP of a given 16-bit string set and GSA in parallel using OpenMP-style threading.
11893///
11894/// - `t` (`[0..n-1]`): the input 16-bit string set using 0 as separators (`t[n-1]` must be 0).
11895/// - `sa` (`[0..n-1]`): the input generalized suffix array.
11896/// - `plcp` (`[0..n-1]`): the output permuted longest common prefix array.
11897/// - `threads`: number of worker threads (can be 0 for the implementation default).
11898///
11899/// Returns 0 on success, -1 on error.
11900pub fn libsais16_plcp_gsa_omp(
11901    t: &[u16],
11902    sa: &[SaSint],
11903    plcp: &mut [SaSint],
11904    threads: SaSint,
11905) -> SaSint {
11906    if threads < 0 {
11907        return -1;
11908    }
11909    if t.last().copied().unwrap_or(0) != 0 {
11910        return -1;
11911    }
11912    if sa.len() != t.len() || plcp.len() != t.len() {
11913        return -1;
11914    }
11915    if t.len() <= 1 {
11916        if t.len() == 1 {
11917            plcp[0] = 0;
11918        }
11919        return 0;
11920    }
11921
11922    let n = t.len() as SaSint;
11923    let threads = normalize_threads(threads);
11924    if compute_phi_omp(sa, plcp, n, threads) != 0 {
11925        return -1;
11926    }
11927    compute_plcp_gsa_omp(t, plcp, n, threads)
11928}
11929
11930/// Constructs the LCP from a PLCP and suffix array in parallel using OpenMP-style threading.
11931///
11932/// - `plcp` (`[0..n-1]`): the input permuted longest common prefix array.
11933/// - `sa` (`[0..n-1]`): the input suffix array or generalized suffix array (GSA).
11934/// - `lcp` (`[0..n-1]`): the output longest common prefix array (can alias `sa`).
11935/// - `threads`: number of worker threads (can be 0 for the implementation default).
11936///
11937/// Returns 0 on success, -1 on error.
11938pub fn libsais16_lcp_omp(
11939    plcp: &[SaSint],
11940    sa: &[SaSint],
11941    lcp: &mut [SaSint],
11942    threads: SaSint,
11943) -> SaSint {
11944    if threads < 0 {
11945        return -1;
11946    }
11947    if plcp.len() != sa.len() || lcp.len() != sa.len() {
11948        return -1;
11949    }
11950
11951    compute_lcp_omp(
11952        plcp,
11953        sa,
11954        lcp,
11955        sa.len() as SaSint,
11956        normalize_threads(threads),
11957    )
11958}
11959
11960#[cfg(all(test, feature = "upstream-c"))]
11961mod tests {
11962    use super::*;
11963
11964    unsafe extern "C" {
11965        fn probe_public_libsais16(t: *const u16, sa: *mut SaSint, n: SaSint, fs: SaSint) -> SaSint;
11966        fn probe_public_libsais16_freq(
11967            t: *const u16,
11968            sa: *mut SaSint,
11969            n: SaSint,
11970            fs: SaSint,
11971            freq: *mut SaSint,
11972        ) -> SaSint;
11973        fn probe_public_libsais16_gsa(
11974            t: *const u16,
11975            sa: *mut SaSint,
11976            n: SaSint,
11977            fs: SaSint,
11978        ) -> SaSint;
11979        fn probe_public_libsais16_gsa_freq(
11980            t: *const u16,
11981            sa: *mut SaSint,
11982            n: SaSint,
11983            fs: SaSint,
11984            freq: *mut SaSint,
11985        ) -> SaSint;
11986        fn probe_public_libsais16_int(
11987            t: *mut SaSint,
11988            sa: *mut SaSint,
11989            n: SaSint,
11990            k: SaSint,
11991            fs: SaSint,
11992        ) -> SaSint;
11993        fn probe_libsais16_main_32s_entry(
11994            t: *mut SaSint,
11995            sa: *mut SaSint,
11996            n: SaSint,
11997            k: SaSint,
11998            fs: SaSint,
11999            threads: SaSint,
12000        ) -> SaSint;
12001        fn probe_libsais16_final_sorting_scan_left_to_right_32s(
12002            t: *const SaSint,
12003            sa: *mut SaSint,
12004            induction_bucket: *mut SaSint,
12005            omp_block_start: SaSint,
12006            omp_block_size: SaSint,
12007        );
12008        fn probe_libsais16_final_sorting_scan_right_to_left_32s(
12009            t: *const SaSint,
12010            sa: *mut SaSint,
12011            induction_bucket: *mut SaSint,
12012            omp_block_start: SaSint,
12013            omp_block_size: SaSint,
12014        );
12015        fn probe_libsais16_clear_lms_suffixes_omp(
12016            sa: *mut SaSint,
12017            n: SaSint,
12018            k: SaSint,
12019            bucket_start: *mut SaSint,
12020            bucket_end: *mut SaSint,
12021            threads: SaSint,
12022        );
12023        fn probe_libsais16_flip_suffix_markers_omp(sa: *mut SaSint, l: SaSint, threads: SaSint);
12024        fn probe_libsais16_induce_final_order_32s_6k(
12025            t: *const SaSint,
12026            sa: *mut SaSint,
12027            n: SaSint,
12028            k: SaSint,
12029            buckets: *mut SaSint,
12030            threads: SaSint,
12031        );
12032        fn probe_libsais16_induce_final_order_32s_4k(
12033            t: *const SaSint,
12034            sa: *mut SaSint,
12035            n: SaSint,
12036            k: SaSint,
12037            buckets: *mut SaSint,
12038            threads: SaSint,
12039        );
12040        fn probe_libsais16_induce_final_order_32s_2k(
12041            t: *const SaSint,
12042            sa: *mut SaSint,
12043            n: SaSint,
12044            k: SaSint,
12045            buckets: *mut SaSint,
12046            threads: SaSint,
12047        );
12048        fn probe_libsais16_induce_final_order_32s_1k(
12049            t: *const SaSint,
12050            sa: *mut SaSint,
12051            n: SaSint,
12052            k: SaSint,
12053            buckets: *mut SaSint,
12054            threads: SaSint,
12055        );
12056        fn probe_libsais16_induce_partial_order_32s_6k_omp(
12057            t: *const SaSint,
12058            sa: *mut SaSint,
12059            n: SaSint,
12060            k: SaSint,
12061            buckets: *mut SaSint,
12062            first_lms_suffix: SaSint,
12063            left_suffixes_count: SaSint,
12064            threads: SaSint,
12065        );
12066        fn probe_libsais16_induce_partial_order_32s_4k_omp(
12067            t: *const SaSint,
12068            sa: *mut SaSint,
12069            n: SaSint,
12070            k: SaSint,
12071            buckets: *mut SaSint,
12072            threads: SaSint,
12073        );
12074        fn probe_libsais16_induce_partial_order_32s_2k_omp(
12075            t: *const SaSint,
12076            sa: *mut SaSint,
12077            n: SaSint,
12078            k: SaSint,
12079            buckets: *mut SaSint,
12080            threads: SaSint,
12081        );
12082        fn probe_libsais16_induce_partial_order_32s_1k_omp(
12083            t: *const SaSint,
12084            sa: *mut SaSint,
12085            n: SaSint,
12086            k: SaSint,
12087            buckets: *mut SaSint,
12088            threads: SaSint,
12089        );
12090        fn probe_libsais16_induce_partial_order_16u_omp(
12091            t: *const u16,
12092            sa: *mut SaSint,
12093            n: SaSint,
12094            k: SaSint,
12095            flags: SaSint,
12096            buckets: *mut SaSint,
12097            first_lms_suffix: SaSint,
12098            left_suffixes_count: SaSint,
12099            threads: SaSint,
12100        );
12101        fn probe_libsais16_induce_final_order_16u_omp(
12102            t: *const u16,
12103            sa: *mut SaSint,
12104            n: SaSint,
12105            k: SaSint,
12106            flags: SaSint,
12107            r: SaSint,
12108            i: *mut SaSint,
12109            buckets: *mut SaSint,
12110            threads: SaSint,
12111        ) -> SaSint;
12112        fn probe_public_libsais16_bwt(
12113            t: *const u16,
12114            u: *mut u16,
12115            a: *mut SaSint,
12116            n: SaSint,
12117            fs: SaSint,
12118        ) -> SaSint;
12119        fn probe_public_libsais16_bwt_freq(
12120            t: *const u16,
12121            u: *mut u16,
12122            a: *mut SaSint,
12123            n: SaSint,
12124            fs: SaSint,
12125            freq: *mut SaSint,
12126        ) -> SaSint;
12127        fn probe_public_libsais16_bwt_aux(
12128            t: *const u16,
12129            u: *mut u16,
12130            a: *mut SaSint,
12131            n: SaSint,
12132            fs: SaSint,
12133            r: SaSint,
12134            i: *mut SaSint,
12135        ) -> SaSint;
12136        fn probe_public_libsais16_bwt_aux_freq(
12137            t: *const u16,
12138            u: *mut u16,
12139            a: *mut SaSint,
12140            n: SaSint,
12141            fs: SaSint,
12142            freq: *mut SaSint,
12143            r: SaSint,
12144            i: *mut SaSint,
12145        ) -> SaSint;
12146        fn probe_public_libsais16_unbwt(
12147            t: *const u16,
12148            u: *mut u16,
12149            a: *mut SaSint,
12150            n: SaSint,
12151            i: SaSint,
12152        ) -> SaSint;
12153        fn probe_public_libsais16_unbwt_freq(
12154            t: *const u16,
12155            u: *mut u16,
12156            a: *mut SaSint,
12157            n: SaSint,
12158            freq: *const SaSint,
12159            i: SaSint,
12160        ) -> SaSint;
12161        fn probe_public_libsais16_unbwt_aux(
12162            t: *const u16,
12163            u: *mut u16,
12164            a: *mut SaSint,
12165            n: SaSint,
12166            r: SaSint,
12167            i: *const SaSint,
12168        ) -> SaSint;
12169        fn probe_public_libsais16_unbwt_aux_freq(
12170            t: *const u16,
12171            u: *mut u16,
12172            a: *mut SaSint,
12173            n: SaSint,
12174            freq: *const SaSint,
12175            r: SaSint,
12176            i: *const SaSint,
12177        ) -> SaSint;
12178        fn probe_public_libsais16_plcp(
12179            t: *const u16,
12180            sa: *const SaSint,
12181            plcp: *mut SaSint,
12182            n: SaSint,
12183        ) -> SaSint;
12184        fn probe_public_libsais16_plcp_gsa(
12185            t: *const u16,
12186            sa: *const SaSint,
12187            plcp: *mut SaSint,
12188            n: SaSint,
12189        ) -> SaSint;
12190        fn probe_public_libsais16_lcp(
12191            plcp: *const SaSint,
12192            sa: *const SaSint,
12193            lcp: *mut SaSint,
12194            n: SaSint,
12195        ) -> SaSint;
12196        fn probe_libsais16_gather_lms_suffixes_16u(
12197            t: *const u16,
12198            sa: *mut SaSint,
12199            n: SaSint,
12200            m: SaSint,
12201            omp_block_start: SaSint,
12202            omp_block_size: SaSint,
12203        );
12204        fn probe_libsais16_count_and_gather_lms_suffixes_16u(
12205            t: *const u16,
12206            sa: *mut SaSint,
12207            n: SaSint,
12208            buckets: *mut SaSint,
12209            omp_block_start: SaSint,
12210            omp_block_size: SaSint,
12211        ) -> SaSint;
12212        fn probe_libsais16_initialize_buckets_start_and_end_16u(
12213            buckets: *mut SaSint,
12214            freq: *mut SaSint,
12215        ) -> SaSint;
12216        fn probe_libsais16_initialize_buckets_for_lms_suffixes_radix_sort_16u(
12217            t: *const u16,
12218            buckets: *mut SaSint,
12219            first_lms_suffix: SaSint,
12220        ) -> SaSint;
12221        fn probe_libsais16_radix_sort_lms_suffixes_16u(
12222            t: *const u16,
12223            sa: *mut SaSint,
12224            induction_bucket: *mut SaSint,
12225            omp_block_start: SaSint,
12226            omp_block_size: SaSint,
12227        );
12228        fn probe_libsais16_initialize_buckets_for_partial_sorting_16u(
12229            t: *const u16,
12230            buckets: *mut SaSint,
12231            first_lms_suffix: SaSint,
12232            left_suffixes_count: SaSint,
12233        );
12234        fn probe_libsais16_partial_sorting_scan_left_to_right_16u(
12235            t: *const u16,
12236            sa: *mut SaSint,
12237            buckets: *mut SaSint,
12238            d: SaSint,
12239            omp_block_start: SaSint,
12240            omp_block_size: SaSint,
12241        ) -> SaSint;
12242        fn probe_libsais16_partial_sorting_scan_right_to_left_16u(
12243            t: *const u16,
12244            sa: *mut SaSint,
12245            buckets: *mut SaSint,
12246            d: SaSint,
12247            omp_block_start: SaSint,
12248            omp_block_size: SaSint,
12249        ) -> SaSint;
12250        fn probe_libsais16_partial_gsa_scan_right_to_left_16u(
12251            t: *const u16,
12252            sa: *mut SaSint,
12253            buckets: *mut SaSint,
12254            d: SaSint,
12255            omp_block_start: SaSint,
12256            omp_block_size: SaSint,
12257        ) -> SaSint;
12258        fn probe_libsais16_partial_sorting_shift_markers_16u_omp(
12259            sa: *mut SaSint,
12260            n: SaSint,
12261            buckets: *const SaSint,
12262            threads: SaSint,
12263        );
12264        fn probe_libsais16_final_sorting_scan_left_to_right_16u(
12265            t: *const u16,
12266            sa: *mut SaSint,
12267            induction_bucket: *mut SaSint,
12268            omp_block_start: SaSint,
12269            omp_block_size: SaSint,
12270        );
12271        fn probe_libsais16_final_sorting_scan_right_to_left_16u(
12272            t: *const u16,
12273            sa: *mut SaSint,
12274            induction_bucket: *mut SaSint,
12275            omp_block_start: SaSint,
12276            omp_block_size: SaSint,
12277        );
12278        fn probe_libsais16_final_gsa_scan_right_to_left_16u(
12279            t: *const u16,
12280            sa: *mut SaSint,
12281            induction_bucket: *mut SaSint,
12282            omp_block_start: SaSint,
12283            omp_block_size: SaSint,
12284        );
12285        fn probe_libsais16_final_bwt_scan_left_to_right_16u(
12286            t: *const u16,
12287            sa: *mut SaSint,
12288            induction_bucket: *mut SaSint,
12289            omp_block_start: SaSint,
12290            omp_block_size: SaSint,
12291        );
12292        fn probe_libsais16_final_bwt_scan_right_to_left_16u(
12293            t: *const u16,
12294            sa: *mut SaSint,
12295            induction_bucket: *mut SaSint,
12296            omp_block_start: SaSint,
12297            omp_block_size: SaSint,
12298        ) -> SaSint;
12299        fn probe_libsais16_final_bwt_aux_scan_left_to_right_16u(
12300            t: *const u16,
12301            sa: *mut SaSint,
12302            rm: SaSint,
12303            i_sample: *mut SaSint,
12304            induction_bucket: *mut SaSint,
12305            omp_block_start: SaSint,
12306            omp_block_size: SaSint,
12307        );
12308        fn probe_libsais16_final_bwt_aux_scan_right_to_left_16u(
12309            t: *const u16,
12310            sa: *mut SaSint,
12311            rm: SaSint,
12312            i_sample: *mut SaSint,
12313            induction_bucket: *mut SaSint,
12314            omp_block_start: SaSint,
12315            omp_block_size: SaSint,
12316        );
12317        fn probe_libsais16_renumber_lms_suffixes_16u(
12318            sa: *mut SaSint,
12319            m: SaSint,
12320            name: SaSint,
12321            omp_block_start: SaSint,
12322            omp_block_size: SaSint,
12323        ) -> SaSint;
12324        fn probe_libsais16_place_lms_suffixes_interval_16u(
12325            sa: *mut SaSint,
12326            n: SaSint,
12327            m: SaSint,
12328            flags: SaSint,
12329            buckets: *mut SaSint,
12330        );
12331        fn probe_libsais16_bwt_copy_16u(u: *mut u16, a: *mut SaSint, n: SaSint);
12332        fn probe_libsais16_gather_lms_suffixes_16u_omp(
12333            t: *const u16,
12334            sa: *mut SaSint,
12335            n: SaSint,
12336            threads: SaSint,
12337        );
12338        fn probe_libsais16_count_and_gather_lms_suffixes_16u_omp(
12339            t: *const u16,
12340            sa: *mut SaSint,
12341            n: SaSint,
12342            buckets: *mut SaSint,
12343            threads: SaSint,
12344        ) -> SaSint;
12345        fn probe_libsais16_radix_sort_lms_suffixes_16u_omp(
12346            t: *const u16,
12347            sa: *mut SaSint,
12348            n: SaSint,
12349            m: SaSint,
12350            flags: SaSint,
12351            buckets: *mut SaSint,
12352            threads: SaSint,
12353        );
12354        fn probe_libsais16_partial_sorting_scan_left_to_right_16u_omp(
12355            t: *const u16,
12356            sa: *mut SaSint,
12357            n: SaSint,
12358            k: SaSint,
12359            buckets: *mut SaSint,
12360            left_suffixes_count: SaSint,
12361            d: SaSint,
12362            threads: SaSint,
12363        ) -> SaSint;
12364        fn probe_libsais16_partial_sorting_scan_right_to_left_16u_omp(
12365            t: *const u16,
12366            sa: *mut SaSint,
12367            n: SaSint,
12368            k: SaSint,
12369            buckets: *mut SaSint,
12370            first_lms_suffix: SaSint,
12371            left_suffixes_count: SaSint,
12372            d: SaSint,
12373            threads: SaSint,
12374        );
12375        fn probe_libsais16_partial_gsa_scan_right_to_left_16u_omp(
12376            t: *const u16,
12377            sa: *mut SaSint,
12378            n: SaSint,
12379            k: SaSint,
12380            buckets: *mut SaSint,
12381            first_lms_suffix: SaSint,
12382            left_suffixes_count: SaSint,
12383            d: SaSint,
12384            threads: SaSint,
12385        );
12386        fn probe_libsais16_renumber_lms_suffixes_16u_omp(
12387            sa: *mut SaSint,
12388            m: SaSint,
12389            threads: SaSint,
12390        ) -> SaSint;
12391        fn probe_libsais16_final_bwt_scan_left_to_right_16u_omp(
12392            t: *const u16,
12393            sa: *mut SaSint,
12394            n: SaSint,
12395            k: SaSint,
12396            induction_bucket: *mut SaSint,
12397            threads: SaSint,
12398        );
12399        fn probe_libsais16_final_bwt_aux_scan_left_to_right_16u_omp(
12400            t: *const u16,
12401            sa: *mut SaSint,
12402            n: SaSint,
12403            k: SaSint,
12404            rm: SaSint,
12405            i_sample: *mut SaSint,
12406            induction_bucket: *mut SaSint,
12407            threads: SaSint,
12408        );
12409        fn probe_libsais16_final_sorting_scan_left_to_right_16u_omp(
12410            t: *const u16,
12411            sa: *mut SaSint,
12412            n: SaSint,
12413            k: SaSint,
12414            induction_bucket: *mut SaSint,
12415            threads: SaSint,
12416        );
12417        fn probe_libsais16_final_bwt_scan_right_to_left_16u_omp(
12418            t: *const u16,
12419            sa: *mut SaSint,
12420            n: SaSint,
12421            k: SaSint,
12422            induction_bucket: *mut SaSint,
12423            threads: SaSint,
12424        ) -> SaSint;
12425        fn probe_libsais16_final_bwt_aux_scan_right_to_left_16u_omp(
12426            t: *const u16,
12427            sa: *mut SaSint,
12428            n: SaSint,
12429            k: SaSint,
12430            rm: SaSint,
12431            i_sample: *mut SaSint,
12432            induction_bucket: *mut SaSint,
12433            threads: SaSint,
12434        );
12435        fn probe_libsais16_final_sorting_scan_right_to_left_16u_omp(
12436            t: *const u16,
12437            sa: *mut SaSint,
12438            omp_block_start: SaSint,
12439            omp_block_size: SaSint,
12440            k: SaSint,
12441            induction_bucket: *mut SaSint,
12442            threads: SaSint,
12443        );
12444        fn probe_libsais16_final_gsa_scan_right_to_left_16u_omp(
12445            t: *const u16,
12446            sa: *mut SaSint,
12447            omp_block_start: SaSint,
12448            omp_block_size: SaSint,
12449            k: SaSint,
12450            induction_bucket: *mut SaSint,
12451            threads: SaSint,
12452        );
12453        fn probe_libsais16_bwt_copy_16u_omp(
12454            u: *mut u16,
12455            a: *mut SaSint,
12456            n: SaSint,
12457            threads: SaSint,
12458        );
12459        fn probe_libsais16_gather_marked_lms_suffixes(
12460            sa: *mut SaSint,
12461            m: SaSint,
12462            l: SaSint,
12463            omp_block_start: SaSint,
12464            omp_block_size: SaSint,
12465        ) -> SaSint;
12466        fn probe_libsais16_gather_marked_lms_suffixes_omp(
12467            sa: *mut SaSint,
12468            n: SaSint,
12469            m: SaSint,
12470            fs: SaSint,
12471            threads: SaSint,
12472        );
12473        fn probe_libsais16_renumber_and_gather_lms_suffixes_omp(
12474            sa: *mut SaSint,
12475            n: SaSint,
12476            m: SaSint,
12477            fs: SaSint,
12478            threads: SaSint,
12479        ) -> SaSint;
12480        fn probe_libsais16_reconstruct_lms_suffixes(
12481            sa: *mut SaSint,
12482            n: SaSint,
12483            m: SaSint,
12484            omp_block_start: SaSint,
12485            omp_block_size: SaSint,
12486        );
12487        fn probe_libsais16_reconstruct_lms_suffixes_omp(
12488            sa: *mut SaSint,
12489            n: SaSint,
12490            m: SaSint,
12491            threads: SaSint,
12492        );
12493        fn probe_libsais16_renumber_distinct_lms_suffixes_32s_4k(
12494            sa: *mut SaSint,
12495            m: SaSint,
12496            name: SaSint,
12497            omp_block_start: SaSint,
12498            omp_block_size: SaSint,
12499        ) -> SaSint;
12500        fn probe_libsais16_mark_distinct_lms_suffixes_32s(
12501            sa: *mut SaSint,
12502            m: SaSint,
12503            omp_block_start: SaSint,
12504            omp_block_size: SaSint,
12505        );
12506        fn probe_libsais16_clamp_lms_suffixes_length_32s(
12507            sa: *mut SaSint,
12508            m: SaSint,
12509            omp_block_start: SaSint,
12510            omp_block_size: SaSint,
12511        );
12512        fn probe_libsais16_renumber_distinct_lms_suffixes_32s_4k_omp(
12513            sa: *mut SaSint,
12514            m: SaSint,
12515            threads: SaSint,
12516        ) -> SaSint;
12517        fn probe_libsais16_mark_distinct_lms_suffixes_32s_omp(
12518            sa: *mut SaSint,
12519            n: SaSint,
12520            m: SaSint,
12521            threads: SaSint,
12522        );
12523        fn probe_libsais16_clamp_lms_suffixes_length_32s_omp(
12524            sa: *mut SaSint,
12525            n: SaSint,
12526            m: SaSint,
12527            threads: SaSint,
12528        );
12529        fn probe_libsais16_renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(
12530            sa: *mut SaSint,
12531            n: SaSint,
12532            m: SaSint,
12533            threads: SaSint,
12534        ) -> SaSint;
12535        fn probe_libsais16_renumber_unique_and_nonunique_lms_suffixes_32s(
12536            t: *mut SaSint,
12537            sa: *mut SaSint,
12538            m: SaSint,
12539            f: SaSint,
12540            omp_block_start: SaSint,
12541            omp_block_size: SaSint,
12542        ) -> SaSint;
12543        fn probe_libsais16_compact_unique_and_nonunique_lms_suffixes_32s(
12544            sa: *mut SaSint,
12545            m: SaSint,
12546            pl: *mut SaSint,
12547            pr: *mut SaSint,
12548            omp_block_start: SaSint,
12549            omp_block_size: SaSint,
12550        );
12551        fn probe_libsais16_renumber_unique_and_nonunique_lms_suffixes_32s_omp(
12552            t: *mut SaSint,
12553            sa: *mut SaSint,
12554            m: SaSint,
12555            threads: SaSint,
12556        ) -> SaSint;
12557        fn probe_libsais16_compact_unique_and_nonunique_lms_suffixes_32s_omp(
12558            sa: *mut SaSint,
12559            n: SaSint,
12560            m: SaSint,
12561            fs: SaSint,
12562            f: SaSint,
12563            threads: SaSint,
12564        );
12565        fn probe_libsais16_compact_lms_suffixes_32s_omp(
12566            t: *mut SaSint,
12567            sa: *mut SaSint,
12568            n: SaSint,
12569            m: SaSint,
12570            fs: SaSint,
12571            threads: SaSint,
12572        ) -> SaSint;
12573        fn probe_libsais16_merge_unique_lms_suffixes_32s(
12574            t: *mut SaSint,
12575            sa: *mut SaSint,
12576            n: SaSint,
12577            m: SaSint,
12578            l: SaSint,
12579            omp_block_start: SaSint,
12580            omp_block_size: SaSint,
12581        );
12582        fn probe_libsais16_merge_nonunique_lms_suffixes_32s(
12583            sa: *mut SaSint,
12584            n: SaSint,
12585            m: SaSint,
12586            l: SaSint,
12587            omp_block_start: SaSint,
12588            omp_block_size: SaSint,
12589        );
12590        fn probe_libsais16_merge_unique_lms_suffixes_32s_omp(
12591            t: *mut SaSint,
12592            sa: *mut SaSint,
12593            n: SaSint,
12594            m: SaSint,
12595            threads: SaSint,
12596        );
12597        fn probe_libsais16_merge_nonunique_lms_suffixes_32s_omp(
12598            sa: *mut SaSint,
12599            n: SaSint,
12600            m: SaSint,
12601            f: SaSint,
12602            threads: SaSint,
12603        );
12604        fn probe_libsais16_merge_compacted_lms_suffixes_32s_omp(
12605            t: *mut SaSint,
12606            sa: *mut SaSint,
12607            n: SaSint,
12608            m: SaSint,
12609            f: SaSint,
12610            threads: SaSint,
12611        );
12612        fn probe_libsais16_radix_sort_lms_suffixes_32s_6k(
12613            t: *const SaSint,
12614            sa: *mut SaSint,
12615            induction_bucket: *mut SaSint,
12616            omp_block_start: SaSint,
12617            omp_block_size: SaSint,
12618        );
12619        fn probe_libsais16_radix_sort_lms_suffixes_32s_2k(
12620            t: *const SaSint,
12621            sa: *mut SaSint,
12622            induction_bucket: *mut SaSint,
12623            omp_block_start: SaSint,
12624            omp_block_size: SaSint,
12625        );
12626        fn probe_libsais16_radix_sort_lms_suffixes_32s_6k_omp(
12627            t: *const SaSint,
12628            sa: *mut SaSint,
12629            n: SaSint,
12630            m: SaSint,
12631            induction_bucket: *mut SaSint,
12632            threads: SaSint,
12633        );
12634        fn probe_libsais16_radix_sort_lms_suffixes_32s_2k_omp(
12635            t: *const SaSint,
12636            sa: *mut SaSint,
12637            n: SaSint,
12638            m: SaSint,
12639            induction_bucket: *mut SaSint,
12640            threads: SaSint,
12641        );
12642        fn probe_libsais16_radix_sort_lms_suffixes_32s_1k(
12643            t: *const SaSint,
12644            sa: *mut SaSint,
12645            n: SaSint,
12646            buckets: *mut SaSint,
12647        ) -> SaSint;
12648        fn probe_libsais16_radix_sort_set_markers_32s_6k(
12649            sa: *mut SaSint,
12650            induction_bucket: *mut SaSint,
12651            omp_block_start: SaSint,
12652            omp_block_size: SaSint,
12653        );
12654        fn probe_libsais16_radix_sort_set_markers_32s_4k(
12655            sa: *mut SaSint,
12656            induction_bucket: *mut SaSint,
12657            omp_block_start: SaSint,
12658            omp_block_size: SaSint,
12659        );
12660        fn probe_libsais16_radix_sort_set_markers_32s_6k_omp(
12661            sa: *mut SaSint,
12662            k: SaSint,
12663            induction_bucket: *mut SaSint,
12664            threads: SaSint,
12665        );
12666        fn probe_libsais16_radix_sort_set_markers_32s_4k_omp(
12667            sa: *mut SaSint,
12668            k: SaSint,
12669            induction_bucket: *mut SaSint,
12670            threads: SaSint,
12671        );
12672        fn probe_libsais16_place_lms_suffixes_histogram_32s_6k(
12673            sa: *mut SaSint,
12674            n: SaSint,
12675            k: SaSint,
12676            m: SaSint,
12677            buckets: *const SaSint,
12678        );
12679        fn probe_libsais16_place_lms_suffixes_histogram_32s_4k(
12680            sa: *mut SaSint,
12681            n: SaSint,
12682            k: SaSint,
12683            m: SaSint,
12684            buckets: *const SaSint,
12685        );
12686        fn probe_libsais16_place_lms_suffixes_histogram_32s_2k(
12687            sa: *mut SaSint,
12688            n: SaSint,
12689            k: SaSint,
12690            m: SaSint,
12691            buckets: *const SaSint,
12692        );
12693        fn probe_libsais16_gather_lms_suffixes_32s(
12694            t: *const SaSint,
12695            sa: *mut SaSint,
12696            n: SaSint,
12697        ) -> SaSint;
12698        fn probe_libsais16_gather_compacted_lms_suffixes_32s(
12699            t: *const SaSint,
12700            sa: *mut SaSint,
12701            n: SaSint,
12702        ) -> SaSint;
12703        fn probe_libsais16_count_lms_suffixes_32s_2k(
12704            t: *const SaSint,
12705            n: SaSint,
12706            k: SaSint,
12707            buckets: *mut SaSint,
12708        );
12709        fn probe_libsais16_count_and_gather_lms_suffixes_32s_4k(
12710            t: *const SaSint,
12711            sa: *mut SaSint,
12712            n: SaSint,
12713            k: SaSint,
12714            buckets: *mut SaSint,
12715            omp_block_start: SaSint,
12716            omp_block_size: SaSint,
12717        ) -> SaSint;
12718        fn probe_libsais16_count_and_gather_lms_suffixes_32s_4k_omp(
12719            t: *const SaSint,
12720            sa: *mut SaSint,
12721            n: SaSint,
12722            k: SaSint,
12723            buckets: *mut SaSint,
12724            local_buckets: SaSint,
12725            threads: SaSint,
12726        ) -> SaSint;
12727        fn probe_libsais16_count_suffixes_32s(
12728            t: *const SaSint,
12729            n: SaSint,
12730            k: SaSint,
12731            buckets: *mut SaSint,
12732        );
12733        fn probe_libsais16_initialize_buckets_start_and_end_32s_6k(k: SaSint, buckets: *mut SaSint);
12734        fn probe_libsais16_initialize_buckets_start_and_end_32s_4k(k: SaSint, buckets: *mut SaSint);
12735        fn probe_libsais16_initialize_buckets_end_32s_2k(k: SaSint, buckets: *mut SaSint);
12736        fn probe_libsais16_initialize_buckets_start_and_end_32s_2k(k: SaSint, buckets: *mut SaSint);
12737        fn probe_libsais16_initialize_buckets_start_32s_1k(k: SaSint, buckets: *mut SaSint);
12738        fn probe_libsais16_initialize_buckets_end_32s_1k(k: SaSint, buckets: *mut SaSint);
12739        fn probe_libsais16_initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(
12740            t: *const SaSint,
12741            k: SaSint,
12742            buckets: *mut SaSint,
12743            first_lms_suffix: SaSint,
12744        );
12745        fn probe_libsais16_initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(
12746            t: *const SaSint,
12747            k: SaSint,
12748            buckets: *mut SaSint,
12749            first_lms_suffix: SaSint,
12750        ) -> SaSint;
12751        fn probe_libsais16_initialize_buckets_for_radix_and_partial_sorting_32s_4k(
12752            t: *const SaSint,
12753            k: SaSint,
12754            buckets: *mut SaSint,
12755            first_lms_suffix: SaSint,
12756        );
12757        fn probe_libsais16_place_lms_suffixes_interval_32s_4k(
12758            sa: *mut SaSint,
12759            n: SaSint,
12760            k: SaSint,
12761            m: SaSint,
12762            buckets: *const SaSint,
12763        );
12764        fn probe_libsais16_place_lms_suffixes_interval_32s_2k(
12765            sa: *mut SaSint,
12766            n: SaSint,
12767            k: SaSint,
12768            m: SaSint,
12769            buckets: *const SaSint,
12770        );
12771        fn probe_libsais16_place_lms_suffixes_interval_32s_1k(
12772            t: *const SaSint,
12773            sa: *mut SaSint,
12774            k: SaSint,
12775            m: SaSint,
12776            buckets: *mut SaSint,
12777        );
12778        fn probe_libsais16_renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(
12779            t: *mut SaSint,
12780            sa: *mut SaSint,
12781            n: SaSint,
12782            m: SaSint,
12783            threads: SaSint,
12784        ) -> SaSint;
12785        fn probe_libsais16_partial_sorting_shift_markers_32s_6k_omp(
12786            sa: *mut SaSint,
12787            k: SaSint,
12788            buckets: *const SaSint,
12789            threads: SaSint,
12790        );
12791        fn probe_libsais16_partial_sorting_shift_markers_32s_4k(sa: *mut SaSint, n: SaSint);
12792        fn probe_libsais16_partial_sorting_shift_buckets_32s_6k(k: SaSint, buckets: *mut SaSint);
12793        fn probe_libsais16_partial_sorting_scan_left_to_right_32s_6k(
12794            t: *const SaSint,
12795            sa: *mut SaSint,
12796            buckets: *mut SaSint,
12797            d: SaSint,
12798            omp_block_start: SaSint,
12799            omp_block_size: SaSint,
12800        ) -> SaSint;
12801        fn probe_libsais16_partial_sorting_scan_left_to_right_32s_4k(
12802            t: *const SaSint,
12803            sa: *mut SaSint,
12804            k: SaSint,
12805            buckets: *mut SaSint,
12806            d: SaSint,
12807            omp_block_start: SaSint,
12808            omp_block_size: SaSint,
12809        ) -> SaSint;
12810        fn probe_libsais16_partial_sorting_scan_left_to_right_32s_1k(
12811            t: *const SaSint,
12812            sa: *mut SaSint,
12813            buckets: *mut SaSint,
12814            omp_block_start: SaSint,
12815            omp_block_size: SaSint,
12816        );
12817        fn probe_libsais16_partial_sorting_scan_left_to_right_32s_6k_omp(
12818            t: *const SaSint,
12819            sa: *mut SaSint,
12820            n: SaSint,
12821            buckets: *mut SaSint,
12822            left_suffixes_count: SaSint,
12823            d: SaSint,
12824            threads: SaSint,
12825        ) -> SaSint;
12826        fn probe_libsais16_partial_sorting_scan_left_to_right_32s_4k_omp(
12827            t: *const SaSint,
12828            sa: *mut SaSint,
12829            n: SaSint,
12830            k: SaSint,
12831            buckets: *mut SaSint,
12832            d: SaSint,
12833            threads: SaSint,
12834        ) -> SaSint;
12835        fn probe_libsais16_partial_sorting_scan_left_to_right_32s_1k_omp(
12836            t: *const SaSint,
12837            sa: *mut SaSint,
12838            n: SaSint,
12839            buckets: *mut SaSint,
12840            threads: SaSint,
12841        );
12842        fn probe_libsais16_partial_sorting_scan_right_to_left_32s_6k(
12843            t: *const SaSint,
12844            sa: *mut SaSint,
12845            buckets: *mut SaSint,
12846            d: SaSint,
12847            omp_block_start: SaSint,
12848            omp_block_size: SaSint,
12849        ) -> SaSint;
12850        fn probe_libsais16_partial_sorting_scan_right_to_left_32s_4k(
12851            t: *const SaSint,
12852            sa: *mut SaSint,
12853            k: SaSint,
12854            buckets: *mut SaSint,
12855            d: SaSint,
12856            omp_block_start: SaSint,
12857            omp_block_size: SaSint,
12858        ) -> SaSint;
12859        fn probe_libsais16_partial_sorting_scan_right_to_left_32s_1k(
12860            t: *const SaSint,
12861            sa: *mut SaSint,
12862            buckets: *mut SaSint,
12863            omp_block_start: SaSint,
12864            omp_block_size: SaSint,
12865        );
12866        fn probe_libsais16_partial_sorting_scan_right_to_left_32s_6k_omp(
12867            t: *const SaSint,
12868            sa: *mut SaSint,
12869            n: SaSint,
12870            buckets: *mut SaSint,
12871            first_lms_suffix: SaSint,
12872            left_suffixes_count: SaSint,
12873            d: SaSint,
12874            threads: SaSint,
12875        ) -> SaSint;
12876        fn probe_libsais16_partial_sorting_scan_right_to_left_32s_4k_omp(
12877            t: *const SaSint,
12878            sa: *mut SaSint,
12879            n: SaSint,
12880            k: SaSint,
12881            buckets: *mut SaSint,
12882            d: SaSint,
12883            threads: SaSint,
12884        ) -> SaSint;
12885        fn probe_libsais16_partial_sorting_scan_right_to_left_32s_1k_omp(
12886            t: *const SaSint,
12887            sa: *mut SaSint,
12888            n: SaSint,
12889            buckets: *mut SaSint,
12890            threads: SaSint,
12891        );
12892        fn probe_libsais16_partial_sorting_gather_lms_suffixes_32s_4k(
12893            sa: *mut SaSint,
12894            omp_block_start: SaSint,
12895            omp_block_size: SaSint,
12896        ) -> SaSint;
12897        fn probe_libsais16_partial_sorting_gather_lms_suffixes_32s_1k(
12898            sa: *mut SaSint,
12899            omp_block_start: SaSint,
12900            omp_block_size: SaSint,
12901        ) -> SaSint;
12902        fn probe_libsais16_partial_sorting_gather_lms_suffixes_32s_4k_omp(
12903            sa: *mut SaSint,
12904            n: SaSint,
12905            threads: SaSint,
12906        );
12907        fn probe_libsais16_partial_sorting_gather_lms_suffixes_32s_1k_omp(
12908            sa: *mut SaSint,
12909            n: SaSint,
12910            threads: SaSint,
12911        );
12912        fn probe_libsais16_count_and_gather_lms_suffixes_32s_2k(
12913            t: *const SaSint,
12914            sa: *mut SaSint,
12915            n: SaSint,
12916            k: SaSint,
12917            buckets: *mut SaSint,
12918            omp_block_start: SaSint,
12919            omp_block_size: SaSint,
12920        ) -> SaSint;
12921        fn probe_libsais16_count_and_gather_compacted_lms_suffixes_32s_2k(
12922            t: *const SaSint,
12923            sa: *mut SaSint,
12924            n: SaSint,
12925            k: SaSint,
12926            buckets: *mut SaSint,
12927            omp_block_start: SaSint,
12928            omp_block_size: SaSint,
12929        ) -> SaSint;
12930        fn probe_libsais16_count_and_gather_lms_suffixes_32s_2k_omp(
12931            t: *const SaSint,
12932            sa: *mut SaSint,
12933            n: SaSint,
12934            k: SaSint,
12935            buckets: *mut SaSint,
12936            local_buckets: SaSint,
12937            threads: SaSint,
12938        ) -> SaSint;
12939        fn probe_libsais16_count_and_gather_compacted_lms_suffixes_32s_2k_omp(
12940            t: *const SaSint,
12941            sa: *mut SaSint,
12942            n: SaSint,
12943            k: SaSint,
12944            buckets: *mut SaSint,
12945            local_buckets: SaSint,
12946            threads: SaSint,
12947        );
12948        fn probe_libsais16_reconstruct_compacted_lms_suffixes_32s_2k_omp(
12949            t: *mut SaSint,
12950            sa: *mut SaSint,
12951            n: SaSint,
12952            k: SaSint,
12953            m: SaSint,
12954            fs: SaSint,
12955            f: SaSint,
12956            buckets: *mut SaSint,
12957            local_buckets: SaSint,
12958            threads: SaSint,
12959        );
12960        fn probe_libsais16_reconstruct_compacted_lms_suffixes_32s_1k_omp(
12961            t: *mut SaSint,
12962            sa: *mut SaSint,
12963            n: SaSint,
12964            m: SaSint,
12965            fs: SaSint,
12966            f: SaSint,
12967            threads: SaSint,
12968        );
12969    }
12970
12971    fn brute_sa(t: &[u16]) -> Vec<SaSint> {
12972        let mut sa: Vec<_> = (0..t.len() as SaSint).collect();
12973        sa.sort_by(|&a, &b| t[a as usize..].cmp(&t[b as usize..]));
12974        sa
12975    }
12976
12977    #[test]
12978    fn libsais16_gather_lms_suffixes_16u_matches_c() {
12979        let cases: &[&[u16]] = &[
12980            &[2, 1, 3, 1, 2, 0],
12981            &[7, 7, 7, 7, 0],
12982            &[3, 1, 2, 1, 0, 4, 1, 0],
12983            &[9, 1, 9, 1, 9, 0, 2, 2, 0],
12984        ];
12985
12986        for &text in cases {
12987            let n = text.len() as SaSint;
12988            let mut rust_sa = vec![-99; text.len()];
12989            let mut c_sa = rust_sa.clone();
12990
12991            gather_lms_suffixes_16u(text, &mut rust_sa, n, n - 1, 0, n);
12992            unsafe {
12993                probe_libsais16_gather_lms_suffixes_16u(
12994                    text.as_ptr(),
12995                    c_sa.as_mut_ptr(),
12996                    n,
12997                    n - 1,
12998                    0,
12999                    n,
13000                );
13001            }
13002
13003            assert_eq!(rust_sa, c_sa);
13004        }
13005    }
13006
13007    #[test]
13008    fn libsais16_count_and_gather_lms_suffixes_16u_matches_c() {
13009        let cases: &[&[u16]] = &[
13010            &[2, 1, 3, 1, 2, 0],
13011            &[7, 7, 7, 7, 0],
13012            &[3, 1, 2, 1, 0, 4, 1, 0],
13013            &[9, 1, 9, 1, 9, 0, 2, 2, 0],
13014        ];
13015
13016        for &text in cases {
13017            let n = text.len() as SaSint;
13018            let mut rust_sa = vec![-99; text.len()];
13019            let mut c_sa = rust_sa.clone();
13020            let mut rust_buckets = vec![-1; 4 * ALPHABET_SIZE];
13021            let mut c_buckets = rust_buckets.clone();
13022
13023            let rust_m =
13024                count_and_gather_lms_suffixes_16u(text, &mut rust_sa, n, &mut rust_buckets, 0, n);
13025            let c_m = unsafe {
13026                probe_libsais16_count_and_gather_lms_suffixes_16u(
13027                    text.as_ptr(),
13028                    c_sa.as_mut_ptr(),
13029                    n,
13030                    c_buckets.as_mut_ptr(),
13031                    0,
13032                    n,
13033                )
13034            };
13035
13036            assert_eq!(rust_m, c_m);
13037            assert_eq!(rust_sa, c_sa);
13038            assert_eq!(rust_buckets, c_buckets);
13039        }
13040    }
13041
13042    #[test]
13043    fn libsais16_initialize_buckets_start_and_end_16u_matches_c() {
13044        let mut rust_buckets = vec![0; 8 * ALPHABET_SIZE];
13045        for (symbol, counts) in [
13046            (0usize, [1, 0, 0, 2]),
13047            (1, [0, 3, 1, 0]),
13048            (7, [2, 1, 0, 1]),
13049            (1024, [0, 0, 5, 0]),
13050        ] {
13051            for state in 0..4 {
13052                rust_buckets[buckets_index4(symbol, state)] = counts[state];
13053            }
13054        }
13055        let mut c_buckets = rust_buckets.clone();
13056        let mut rust_freq = vec![-1; ALPHABET_SIZE];
13057        let mut c_freq = rust_freq.clone();
13058
13059        let rust_k = initialize_buckets_start_and_end_16u(&mut rust_buckets, Some(&mut rust_freq));
13060        let c_k = unsafe {
13061            probe_libsais16_initialize_buckets_start_and_end_16u(
13062                c_buckets.as_mut_ptr(),
13063                c_freq.as_mut_ptr(),
13064            )
13065        };
13066
13067        assert_eq!(rust_k, c_k);
13068        assert_eq!(rust_buckets, c_buckets);
13069        assert_eq!(rust_freq, c_freq);
13070
13071        let mut rust_buckets_no_freq = vec![0; 8 * ALPHABET_SIZE];
13072        rust_buckets_no_freq[..4 * ALPHABET_SIZE]
13073            .copy_from_slice(&rust_buckets[..4 * ALPHABET_SIZE]);
13074        let mut c_buckets_no_freq = rust_buckets_no_freq.clone();
13075
13076        let rust_k = initialize_buckets_start_and_end_16u(&mut rust_buckets_no_freq, None);
13077        let c_k = unsafe {
13078            probe_libsais16_initialize_buckets_start_and_end_16u(
13079                c_buckets_no_freq.as_mut_ptr(),
13080                std::ptr::null_mut(),
13081            )
13082        };
13083
13084        assert_eq!(rust_k, c_k);
13085        assert_eq!(rust_buckets_no_freq, c_buckets_no_freq);
13086    }
13087
13088    #[test]
13089    fn libsais16_lms_radix_bucket_initialization_matches_c() {
13090        let text = [3, 1, 2, 1, 0, 4, 1, 0];
13091        let n = text.len() as SaSint;
13092        let mut rust_sa = vec![-99; text.len()];
13093        let mut rust_buckets = vec![0; 8 * ALPHABET_SIZE];
13094        let m = count_and_gather_lms_suffixes_16u(
13095            &text,
13096            &mut rust_sa,
13097            n,
13098            &mut rust_buckets[..4 * ALPHABET_SIZE],
13099            0,
13100            n,
13101        );
13102        initialize_buckets_start_and_end_16u(&mut rust_buckets, None);
13103        let first_lms_suffix = rust_sa[(n - m) as usize];
13104
13105        let mut c_buckets = rust_buckets.clone();
13106        let rust_count = initialize_buckets_for_lms_suffixes_radix_sort_16u(
13107            &text,
13108            &mut rust_buckets,
13109            first_lms_suffix,
13110        );
13111        let c_count = unsafe {
13112            probe_libsais16_initialize_buckets_for_lms_suffixes_radix_sort_16u(
13113                text.as_ptr(),
13114                c_buckets.as_mut_ptr(),
13115                first_lms_suffix,
13116            )
13117        };
13118
13119        assert_eq!(rust_count, c_count);
13120        assert_eq!(rust_buckets, c_buckets);
13121    }
13122
13123    #[test]
13124    fn libsais16_radix_sort_lms_suffixes_16u_matches_c() {
13125        let text = [3, 1, 2, 1, 0, 4, 1, 0];
13126        let n = text.len() as SaSint;
13127        let mut rust_sa = vec![-99; text.len()];
13128        let mut rust_buckets = vec![0; 8 * ALPHABET_SIZE];
13129        let m = count_and_gather_lms_suffixes_16u(
13130            &text,
13131            &mut rust_sa,
13132            n,
13133            &mut rust_buckets[..4 * ALPHABET_SIZE],
13134            0,
13135            n,
13136        );
13137        initialize_buckets_start_and_end_16u(&mut rust_buckets, None);
13138        let first_lms_suffix = rust_sa[(n - m) as usize];
13139        initialize_buckets_for_lms_suffixes_radix_sort_16u(
13140            &text,
13141            &mut rust_buckets,
13142            first_lms_suffix,
13143        );
13144
13145        let mut c_sa = rust_sa.clone();
13146        let mut c_buckets = rust_buckets.clone();
13147        {
13148            let induction_bucket = &mut rust_buckets[4 * ALPHABET_SIZE..];
13149            radix_sort_lms_suffixes_16u(&text, &mut rust_sa, induction_bucket, n - m + 1, m - 1);
13150        }
13151        unsafe {
13152            probe_libsais16_radix_sort_lms_suffixes_16u(
13153                text.as_ptr(),
13154                c_sa.as_mut_ptr(),
13155                c_buckets[4 * ALPHABET_SIZE..].as_mut_ptr(),
13156                n - m + 1,
13157                m - 1,
13158            );
13159        }
13160
13161        assert_eq!(rust_sa, c_sa);
13162        assert_eq!(rust_buckets, c_buckets);
13163    }
13164
13165    #[test]
13166    fn libsais16_initialize_buckets_for_partial_sorting_16u_matches_c() {
13167        let text = [3, 1, 2, 1, 0, 4, 1, 0];
13168        let n = text.len() as SaSint;
13169        let mut rust_sa = vec![-99; text.len()];
13170        let mut rust_buckets = vec![0; 8 * ALPHABET_SIZE];
13171        let m = count_and_gather_lms_suffixes_16u(
13172            &text,
13173            &mut rust_sa,
13174            n,
13175            &mut rust_buckets[..4 * ALPHABET_SIZE],
13176            0,
13177            n,
13178        );
13179        initialize_buckets_start_and_end_16u(&mut rust_buckets, None);
13180        let first_lms_suffix = rust_sa[(n - m) as usize];
13181        let left_suffixes_count = initialize_buckets_for_lms_suffixes_radix_sort_16u(
13182            &text,
13183            &mut rust_buckets,
13184            first_lms_suffix,
13185        );
13186        let mut c_buckets = rust_buckets.clone();
13187
13188        initialize_buckets_for_partial_sorting_16u(
13189            &text,
13190            &mut rust_buckets,
13191            first_lms_suffix,
13192            left_suffixes_count,
13193        );
13194        unsafe {
13195            probe_libsais16_initialize_buckets_for_partial_sorting_16u(
13196                text.as_ptr(),
13197                c_buckets.as_mut_ptr(),
13198                first_lms_suffix,
13199                left_suffixes_count,
13200            );
13201        }
13202
13203        assert_eq!(rust_buckets, c_buckets);
13204    }
13205
13206    fn partial_scan_fixture() -> ([u16; 10], Vec<SaSint>, Vec<SaSint>) {
13207        let text = [1, 0, 2, 1, 3, 0, 2, 4, 1, 0];
13208        let mut sa = vec![0; 128];
13209        sa[..5].copy_from_slice(&[3, 5 | SAINT_MIN, 7, 2, 9 | SAINT_MIN]);
13210
13211        let mut buckets = vec![0; 6 * ALPHABET_SIZE];
13212        for v in 0..32 {
13213            buckets[v] = 80 + (v as SaSint) * 4;
13214            buckets[2 * ALPHABET_SIZE + v] = if v % 3 == 0 { 2 } else { 0 };
13215            buckets[4 * ALPHABET_SIZE + v] = 20 + (v as SaSint) * 4;
13216        }
13217
13218        (text, sa, buckets)
13219    }
13220
13221    #[test]
13222    fn libsais16_partial_sorting_scan_left_to_right_16u_matches_c() {
13223        let (text, mut rust_sa, mut rust_buckets) = partial_scan_fixture();
13224        let mut c_sa = rust_sa.clone();
13225        let mut c_buckets = rust_buckets.clone();
13226
13227        let rust_d =
13228            partial_sorting_scan_left_to_right_16u(&text, &mut rust_sa, &mut rust_buckets, 3, 0, 5);
13229        let c_d = unsafe {
13230            probe_libsais16_partial_sorting_scan_left_to_right_16u(
13231                text.as_ptr(),
13232                c_sa.as_mut_ptr(),
13233                c_buckets.as_mut_ptr(),
13234                3,
13235                0,
13236                5,
13237            )
13238        };
13239
13240        assert_eq!(rust_d, c_d);
13241        assert_eq!(rust_sa, c_sa);
13242        assert_eq!(rust_buckets, c_buckets);
13243    }
13244
13245    #[test]
13246    fn libsais16_partial_sorting_scan_right_to_left_16u_matches_c() {
13247        let (text, mut rust_sa, mut rust_buckets) = partial_scan_fixture();
13248        let mut c_sa = rust_sa.clone();
13249        let mut c_buckets = rust_buckets.clone();
13250
13251        let rust_d =
13252            partial_sorting_scan_right_to_left_16u(&text, &mut rust_sa, &mut rust_buckets, 3, 0, 5);
13253        let c_d = unsafe {
13254            probe_libsais16_partial_sorting_scan_right_to_left_16u(
13255                text.as_ptr(),
13256                c_sa.as_mut_ptr(),
13257                c_buckets.as_mut_ptr(),
13258                3,
13259                0,
13260                5,
13261            )
13262        };
13263
13264        assert_eq!(rust_d, c_d);
13265        assert_eq!(rust_sa, c_sa);
13266        assert_eq!(rust_buckets, c_buckets);
13267    }
13268
13269    #[test]
13270    fn libsais16_partial_gsa_scan_right_to_left_16u_matches_c() {
13271        let (text, mut rust_sa, mut rust_buckets) = partial_scan_fixture();
13272        let mut c_sa = rust_sa.clone();
13273        let mut c_buckets = rust_buckets.clone();
13274
13275        let rust_d =
13276            partial_gsa_scan_right_to_left_16u(&text, &mut rust_sa, &mut rust_buckets, 3, 0, 5);
13277        let c_d = unsafe {
13278            probe_libsais16_partial_gsa_scan_right_to_left_16u(
13279                text.as_ptr(),
13280                c_sa.as_mut_ptr(),
13281                c_buckets.as_mut_ptr(),
13282                3,
13283                0,
13284                5,
13285            )
13286        };
13287
13288        assert_eq!(rust_d, c_d);
13289        assert_eq!(rust_sa, c_sa);
13290        assert_eq!(rust_buckets, c_buckets);
13291    }
13292
13293    #[test]
13294    fn libsais16_partial_sorting_shift_markers_16u_matches_c() {
13295        let mut rust_sa = vec![0; 16];
13296        rust_sa[2..6].copy_from_slice(&[1, 2 | SAINT_MIN, 3 | SAINT_MIN, 4]);
13297        rust_sa[8..12].copy_from_slice(&[5 | SAINT_MIN, 6, 7 | SAINT_MIN, 8]);
13298        let mut c_sa = rust_sa.clone();
13299
13300        let mut buckets = vec![0; 6 * ALPHABET_SIZE];
13301        buckets[0] = 2;
13302        buckets[2] = 8;
13303        buckets[4 * ALPHABET_SIZE + 2] = 6;
13304        buckets[4 * ALPHABET_SIZE + 4] = 12;
13305
13306        let n = rust_sa.len() as SaSint;
13307        partial_sorting_shift_markers_16u_omp(&mut rust_sa, n, &buckets, 1);
13308        unsafe {
13309            probe_libsais16_partial_sorting_shift_markers_16u_omp(
13310                c_sa.as_mut_ptr(),
13311                c_sa.len() as SaSint,
13312                buckets.as_ptr(),
13313                1,
13314            );
13315        }
13316
13317        assert_eq!(rust_sa, c_sa);
13318    }
13319
13320    #[test]
13321    fn libsais16_partial_left_to_right_16u_block_omp_uses_cache_pipeline() {
13322        let block_size = 65_536usize;
13323        let k = 512usize;
13324        let text: Vec<u16> = (0..block_size + 2)
13325            .map(|i| 1 + ((i * 17 + i / 7) % (k - 1)) as u16)
13326            .collect();
13327        let sa_len = block_size + 2 * k * 100;
13328        let mut base_sa = vec![0; sa_len];
13329        for (i, slot) in base_sa.iter_mut().take(block_size).enumerate() {
13330            *slot = (i + 2) as SaSint;
13331        }
13332        let mut base_buckets = vec![0; 8 * ALPHABET_SIZE];
13333        for v in 0..2 * k {
13334            base_buckets[4 * ALPHABET_SIZE + v] = (block_size + v * 100) as SaSint;
13335        }
13336
13337        let mut scalar_sa = base_sa.clone();
13338        let mut threaded_sa = base_sa;
13339        let mut scalar_buckets = base_buckets.clone();
13340        let mut threaded_buckets = base_buckets;
13341        let mut thread_state = alloc_thread_state(4).unwrap();
13342        let scalar_d = partial_sorting_scan_left_to_right_16u(
13343            &text,
13344            &mut scalar_sa,
13345            &mut scalar_buckets,
13346            0,
13347            0,
13348            block_size as SaSint,
13349        );
13350        let threaded_d = partial_sorting_scan_left_to_right_16u_block_omp(
13351            &text,
13352            &mut threaded_sa,
13353            k as SaSint,
13354            &mut threaded_buckets,
13355            0,
13356            0,
13357            block_size as SaSint,
13358            4,
13359            &mut thread_state,
13360        );
13361
13362        assert_eq!(threaded_d, scalar_d);
13363        assert_eq!(threaded_sa, scalar_sa);
13364        assert_eq!(threaded_buckets, scalar_buckets);
13365    }
13366
13367    #[test]
13368    fn libsais16_partial_left_to_right_16u_omp_uses_block_pipeline() {
13369        let block_size = 65_536usize;
13370        let k = 512usize;
13371        let text: Vec<u16> = (0..block_size + 2)
13372            .map(|i| 1 + ((i * 17 + i / 7) % (k - 1)) as u16)
13373            .collect();
13374        let sa_len = block_size + 2 * k * 100;
13375        let mut base_sa = vec![0; sa_len];
13376        for (i, slot) in base_sa.iter_mut().take(block_size).enumerate() {
13377            let value = (i + 2) as SaSint;
13378            *slot = if i % 17 == 0 {
13379                value | SAINT_MIN
13380            } else {
13381                value
13382            };
13383        }
13384        let mut base_buckets = vec![0; 8 * ALPHABET_SIZE];
13385        for v in 0..2 * k {
13386            base_buckets[4 * ALPHABET_SIZE + v] = (block_size + v * 100) as SaSint;
13387            base_buckets[2 * ALPHABET_SIZE + v] = if v % 5 == 0 { 3 } else { 0 };
13388        }
13389
13390        let mut scalar_sa = base_sa.clone();
13391        let mut threaded_sa = base_sa;
13392        let mut scalar_buckets = base_buckets.clone();
13393        let mut threaded_buckets = base_buckets;
13394        let scalar_d = partial_sorting_scan_left_to_right_16u_omp(
13395            &text,
13396            &mut scalar_sa,
13397            text.len() as SaSint,
13398            k as SaSint,
13399            &mut scalar_buckets,
13400            block_size as SaSint,
13401            7,
13402            1,
13403        );
13404        let threaded_d = partial_sorting_scan_left_to_right_16u_omp(
13405            &text,
13406            &mut threaded_sa,
13407            text.len() as SaSint,
13408            k as SaSint,
13409            &mut threaded_buckets,
13410            block_size as SaSint,
13411            7,
13412            4,
13413        );
13414
13415        assert_eq!(threaded_d, scalar_d);
13416        assert_eq!(threaded_sa, scalar_sa);
13417        assert_eq!(threaded_buckets, scalar_buckets);
13418    }
13419
13420    #[test]
13421    fn libsais16_partial_right_to_left_16u_block_omp_uses_cache_pipeline() {
13422        let block_size = 65_536usize;
13423        let k = 512usize;
13424        let width = 2 * k;
13425        let block_start = width * 200 + 1024;
13426        let text: Vec<u16> = (0..block_size + 2)
13427            .map(|i| 1 + ((i * 17 + i / 7) % (k - 1)) as u16)
13428            .collect();
13429        let sa_len = block_start + block_size + 1;
13430        let mut base_sa = vec![0; sa_len];
13431        for i in 0..block_size {
13432            let value = (i + 2) as SaSint;
13433            base_sa[block_start + i] = if i % 17 == 0 {
13434                value | SAINT_MIN
13435            } else {
13436                value
13437            };
13438        }
13439        let mut base_buckets = vec![0; 8 * ALPHABET_SIZE];
13440        for v in 0..width {
13441            base_buckets[v] = ((v + 1) * 200) as SaSint;
13442            base_buckets[2 * ALPHABET_SIZE + v] = if v % 5 == 0 { 3 } else { 0 };
13443        }
13444
13445        let mut scalar_sa = base_sa.clone();
13446        let mut threaded_sa = base_sa.clone();
13447        let mut scalar_buckets = base_buckets.clone();
13448        let mut threaded_buckets = base_buckets.clone();
13449        let mut thread_state = alloc_thread_state(4).unwrap();
13450        let scalar_d = partial_sorting_scan_right_to_left_16u(
13451            &text,
13452            &mut scalar_sa,
13453            &mut scalar_buckets,
13454            7,
13455            block_start as SaSint,
13456            block_size as SaSint,
13457        );
13458        let threaded_d = partial_sorting_scan_right_to_left_16u_block_omp(
13459            &text,
13460            &mut threaded_sa,
13461            k as SaSint,
13462            &mut threaded_buckets,
13463            7,
13464            block_start as SaSint,
13465            block_size as SaSint,
13466            4,
13467            &mut thread_state,
13468        );
13469        assert_eq!(threaded_d, scalar_d);
13470        assert_eq!(threaded_sa, scalar_sa);
13471        assert_eq!(threaded_buckets, scalar_buckets);
13472
13473        let mut scalar_sa = base_sa;
13474        let mut threaded_sa = scalar_sa.clone();
13475        let mut scalar_buckets = base_buckets.clone();
13476        let mut threaded_buckets = base_buckets;
13477        let scalar_d = partial_gsa_scan_right_to_left_16u(
13478            &text,
13479            &mut scalar_sa,
13480            &mut scalar_buckets,
13481            7,
13482            block_start as SaSint,
13483            block_size as SaSint,
13484        );
13485        let threaded_d = partial_gsa_scan_right_to_left_16u_block_omp(
13486            &text,
13487            &mut threaded_sa,
13488            k as SaSint,
13489            &mut threaded_buckets,
13490            7,
13491            block_start as SaSint,
13492            block_size as SaSint,
13493            4,
13494            &mut thread_state,
13495        );
13496        assert_eq!(threaded_d, scalar_d);
13497        assert_eq!(threaded_sa, scalar_sa);
13498        assert_eq!(threaded_buckets, scalar_buckets);
13499    }
13500
13501    #[test]
13502    fn libsais16_partial_right_to_left_16u_omp_uses_block_pipeline() {
13503        let block_size = 65_536usize;
13504        let k = 512usize;
13505        let width = 2 * k;
13506        let block_start = width * 200 + 1024;
13507        let text: Vec<u16> = (0..block_size + 2)
13508            .map(|i| 1 + ((i * 17 + i / 7) % (k - 1)) as u16)
13509            .collect();
13510        let sa_len = block_start + block_size + 1;
13511        let n = sa_len as SaSint;
13512        let first_lms_suffix = n - (block_start + block_size) as SaSint;
13513        let left_suffixes_count = block_start as SaSint - 1;
13514        let mut base_sa = vec![0; sa_len];
13515        for i in 0..block_size {
13516            let value = (i + 2) as SaSint;
13517            base_sa[block_start + i] = if i % 17 == 0 {
13518                value | SAINT_MIN
13519            } else {
13520                value
13521            };
13522        }
13523        let mut base_buckets = vec![0; 8 * ALPHABET_SIZE];
13524        for v in 0..width {
13525            base_buckets[v] = ((v + 1) * 200) as SaSint;
13526            base_buckets[2 * ALPHABET_SIZE + v] = if v % 5 == 0 { 3 } else { 0 };
13527        }
13528
13529        let mut scalar_sa = base_sa.clone();
13530        let mut threaded_sa = base_sa.clone();
13531        let mut scalar_buckets = base_buckets.clone();
13532        let mut threaded_buckets = base_buckets.clone();
13533        partial_sorting_scan_right_to_left_16u_omp(
13534            &text,
13535            &mut scalar_sa,
13536            n,
13537            k as SaSint,
13538            &mut scalar_buckets,
13539            first_lms_suffix,
13540            left_suffixes_count,
13541            7,
13542            1,
13543        );
13544        partial_sorting_scan_right_to_left_16u_omp(
13545            &text,
13546            &mut threaded_sa,
13547            n,
13548            k as SaSint,
13549            &mut threaded_buckets,
13550            first_lms_suffix,
13551            left_suffixes_count,
13552            7,
13553            4,
13554        );
13555        assert_eq!(threaded_sa, scalar_sa);
13556        assert_eq!(threaded_buckets, scalar_buckets);
13557
13558        let mut scalar_sa = base_sa;
13559        let mut threaded_sa = scalar_sa.clone();
13560        let mut scalar_buckets = base_buckets.clone();
13561        let mut threaded_buckets = base_buckets;
13562        partial_gsa_scan_right_to_left_16u_omp(
13563            &text,
13564            &mut scalar_sa,
13565            n,
13566            k as SaSint,
13567            &mut scalar_buckets,
13568            first_lms_suffix,
13569            left_suffixes_count,
13570            7,
13571            1,
13572        );
13573        partial_gsa_scan_right_to_left_16u_omp(
13574            &text,
13575            &mut threaded_sa,
13576            n,
13577            k as SaSint,
13578            &mut threaded_buckets,
13579            first_lms_suffix,
13580            left_suffixes_count,
13581            7,
13582            4,
13583        );
13584        assert_eq!(threaded_sa, scalar_sa);
13585        assert_eq!(threaded_buckets, scalar_buckets);
13586    }
13587
13588    fn final_scan_fixture() -> ([u16; 10], Vec<SaSint>, Vec<SaSint>) {
13589        let text = [1, 0, 2, 1, 3, 0, 2, 4, 1, 0];
13590        let mut sa = vec![0; 96];
13591        sa[..6].copy_from_slice(&[3, 0, 5 | SAINT_MIN, 7, 2, 9 | SAINT_MIN]);
13592
13593        let mut induction_bucket = vec![0; ALPHABET_SIZE];
13594        for c in 0..8 {
13595            induction_bucket[c] = 24 + (c as SaSint) * 6;
13596        }
13597
13598        (text, sa, induction_bucket)
13599    }
13600
13601    fn final_order_buckets(induction_bucket: &[SaSint]) -> Vec<SaSint> {
13602        let mut buckets = vec![0; 8 * ALPHABET_SIZE];
13603        buckets[6 * ALPHABET_SIZE..7 * ALPHABET_SIZE].copy_from_slice(induction_bucket);
13604        buckets[7 * ALPHABET_SIZE..8 * ALPHABET_SIZE].copy_from_slice(induction_bucket);
13605        buckets
13606    }
13607
13608    #[test]
13609    fn libsais16_final_sorting_scan_left_to_right_16u_matches_c() {
13610        let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
13611        let mut c_sa = rust_sa.clone();
13612        let mut c_bucket = rust_bucket.clone();
13613
13614        final_sorting_scan_left_to_right_16u(&text, &mut rust_sa, &mut rust_bucket, 0, 6);
13615        unsafe {
13616            probe_libsais16_final_sorting_scan_left_to_right_16u(
13617                text.as_ptr(),
13618                c_sa.as_mut_ptr(),
13619                c_bucket.as_mut_ptr(),
13620                0,
13621                6,
13622            );
13623        }
13624
13625        assert_eq!(rust_sa, c_sa);
13626        assert_eq!(rust_bucket, c_bucket);
13627    }
13628
13629    #[test]
13630    fn libsais16_final_sorting_scan_right_to_left_16u_matches_c() {
13631        let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
13632        let mut c_sa = rust_sa.clone();
13633        let mut c_bucket = rust_bucket.clone();
13634
13635        final_sorting_scan_right_to_left_16u(&text, &mut rust_sa, &mut rust_bucket, 0, 6);
13636        unsafe {
13637            probe_libsais16_final_sorting_scan_right_to_left_16u(
13638                text.as_ptr(),
13639                c_sa.as_mut_ptr(),
13640                c_bucket.as_mut_ptr(),
13641                0,
13642                6,
13643            );
13644        }
13645
13646        assert_eq!(rust_sa, c_sa);
13647        assert_eq!(rust_bucket, c_bucket);
13648    }
13649
13650    #[test]
13651    fn libsais16_final_gsa_scan_right_to_left_16u_matches_c() {
13652        let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
13653        let mut c_sa = rust_sa.clone();
13654        let mut c_bucket = rust_bucket.clone();
13655
13656        final_gsa_scan_right_to_left_16u(&text, &mut rust_sa, &mut rust_bucket, 0, 6);
13657        unsafe {
13658            probe_libsais16_final_gsa_scan_right_to_left_16u(
13659                text.as_ptr(),
13660                c_sa.as_mut_ptr(),
13661                c_bucket.as_mut_ptr(),
13662                0,
13663                6,
13664            );
13665        }
13666
13667        assert_eq!(rust_sa, c_sa);
13668        assert_eq!(rust_bucket, c_bucket);
13669    }
13670
13671    #[test]
13672    fn libsais16_final_sorting_32s_helpers_behave_like_upstream_shapes() {
13673        let t = vec![0, 1, 2, 1, 0, 1, 2, 1, 0];
13674
13675        let mut rust_sa = vec![1, 0, 0];
13676        let mut rust_bucket = vec![0, 1, 3];
13677        let mut c_sa = rust_sa.clone();
13678        let mut c_bucket = rust_bucket.clone();
13679        final_sorting_scan_left_to_right_32s(&t, &mut rust_sa, &mut rust_bucket, 0, 1);
13680        unsafe {
13681            probe_libsais16_final_sorting_scan_left_to_right_32s(
13682                t.as_ptr(),
13683                c_sa.as_mut_ptr(),
13684                c_bucket.as_mut_ptr(),
13685                0,
13686                1,
13687            );
13688        }
13689        assert_eq!(rust_sa, c_sa);
13690        assert_eq!(rust_bucket, c_bucket);
13691
13692        let mut rust_sa = vec![0, 2, 0];
13693        let mut rust_bucket = vec![1, 2, 3];
13694        let mut c_sa = rust_sa.clone();
13695        let mut c_bucket = rust_bucket.clone();
13696        final_sorting_scan_right_to_left_32s(&t, &mut rust_sa, &mut rust_bucket, 0, 2);
13697        unsafe {
13698            probe_libsais16_final_sorting_scan_right_to_left_32s(
13699                t.as_ptr(),
13700                c_sa.as_mut_ptr(),
13701                c_bucket.as_mut_ptr(),
13702                0,
13703                2,
13704            );
13705        }
13706        assert_eq!(rust_sa, c_sa);
13707        assert_eq!(rust_bucket, c_bucket);
13708
13709        let mut sa = vec![1, 2, 0, 0];
13710        let mut induction_bucket = vec![0, 1, 3];
13711        let mut cache = vec![ThreadCache::default(); PER_THREAD_CACHE_SIZE];
13712        final_sorting_scan_left_to_right_32s_block_omp(
13713            &t,
13714            &mut sa,
13715            &mut induction_bucket,
13716            &mut cache,
13717            0,
13718            2,
13719            2,
13720        );
13721        assert_eq!(sa[0] & SAINT_MAX, 0);
13722        assert_eq!(sa[1] & SAINT_MAX, 1);
13723        assert_eq!(induction_bucket[0], 1);
13724        assert_eq!(induction_bucket[1], 2);
13725
13726        let mut sa = vec![0, 2, 0, 0];
13727        let mut induction_bucket = vec![1, 2, 3];
13728        let mut cache = vec![ThreadCache::default(); PER_THREAD_CACHE_SIZE];
13729        final_sorting_scan_right_to_left_32s_block_omp(
13730            &t,
13731            &mut sa,
13732            &mut induction_bucket,
13733            &mut cache,
13734            0,
13735            2,
13736            2,
13737        );
13738        assert_eq!(sa[1] & SAINT_MAX, 1);
13739        assert_eq!(induction_bucket[1], 1);
13740    }
13741
13742    #[test]
13743    fn libsais16_final_left_to_right_16u_block_omp_uses_cache_pipeline() {
13744        let block_size = 65_536usize;
13745        let k = 512usize;
13746        let text: Vec<u16> = (0..=block_size).map(|i| 1 + (i % (k - 1)) as u16).collect();
13747        let sa_len = block_size + k * 200;
13748        let mut base_sa = vec![0; sa_len];
13749        for (i, slot) in base_sa.iter_mut().take(block_size).enumerate() {
13750            *slot = (i + 1) as SaSint;
13751        }
13752        let mut base_bucket = vec![0; k];
13753        for c in 0..k {
13754            base_bucket[c] = (block_size + c * 200) as SaSint;
13755        }
13756
13757        let mut scalar_sa = base_sa.clone();
13758        let mut threaded_sa = base_sa.clone();
13759        let mut scalar_bucket = base_bucket.clone();
13760        let mut threaded_bucket = base_bucket.clone();
13761        let mut thread_state = alloc_thread_state(4).unwrap();
13762        final_bwt_scan_left_to_right_16u(
13763            &text,
13764            &mut scalar_sa,
13765            &mut scalar_bucket,
13766            0,
13767            block_size as SaSint,
13768        );
13769        final_bwt_scan_left_to_right_16u_block_omp(
13770            &text,
13771            &mut threaded_sa,
13772            k as SaSint,
13773            &mut threaded_bucket,
13774            0,
13775            block_size as SaSint,
13776            4,
13777            &mut thread_state,
13778        );
13779        assert_eq!(threaded_sa, scalar_sa);
13780        assert_eq!(threaded_bucket, scalar_bucket);
13781
13782        let rm = 3;
13783        let mut scalar_sa = base_sa.clone();
13784        let mut threaded_sa = base_sa.clone();
13785        let mut scalar_bucket = base_bucket.clone();
13786        let mut threaded_bucket = base_bucket.clone();
13787        let mut scalar_i = vec![-1; (block_size / (rm as usize + 1)) + 2];
13788        let mut threaded_i = scalar_i.clone();
13789        final_bwt_aux_scan_left_to_right_16u(
13790            &text,
13791            &mut scalar_sa,
13792            rm,
13793            &mut scalar_i,
13794            &mut scalar_bucket,
13795            0,
13796            block_size as SaSint,
13797        );
13798        final_bwt_aux_scan_left_to_right_16u_block_omp(
13799            &text,
13800            &mut threaded_sa,
13801            k as SaSint,
13802            rm,
13803            &mut threaded_i,
13804            &mut threaded_bucket,
13805            0,
13806            block_size as SaSint,
13807            4,
13808            &mut thread_state,
13809        );
13810        assert_eq!(threaded_sa, scalar_sa);
13811        assert_eq!(threaded_i, scalar_i);
13812        assert_eq!(threaded_bucket, scalar_bucket);
13813
13814        let mut scalar_sa = base_sa;
13815        let mut threaded_sa = scalar_sa.clone();
13816        let mut scalar_bucket = base_bucket.clone();
13817        let mut threaded_bucket = base_bucket;
13818        final_sorting_scan_left_to_right_16u(
13819            &text,
13820            &mut scalar_sa,
13821            &mut scalar_bucket,
13822            0,
13823            block_size as SaSint,
13824        );
13825        final_sorting_scan_left_to_right_16u_block_omp(
13826            &text,
13827            &mut threaded_sa,
13828            k as SaSint,
13829            &mut threaded_bucket,
13830            0,
13831            block_size as SaSint,
13832            4,
13833            &mut thread_state,
13834        );
13835        assert_eq!(threaded_sa, scalar_sa);
13836        assert_eq!(threaded_bucket, scalar_bucket);
13837    }
13838
13839    #[test]
13840    fn libsais16_final_right_to_left_16u_block_omp_uses_cache_pipeline() {
13841        let block_size = 65_536usize;
13842        let k = 512usize;
13843        let block_start = k * 200 + 1024;
13844        let text: Vec<u16> = (0..=block_size + 1)
13845            .map(|i| 1 + (i % (k - 1)) as u16)
13846            .collect();
13847        let sa_len = block_start + block_size + 1;
13848        let mut base_sa = vec![0; sa_len];
13849        for i in 0..block_size {
13850            base_sa[block_start + i] = (i + 1) as SaSint;
13851        }
13852        let mut base_bucket = vec![0; k];
13853        for c in 0..k {
13854            base_bucket[c] = ((c + 1) * 200) as SaSint;
13855        }
13856
13857        let mut scalar_sa = base_sa.clone();
13858        let mut threaded_sa = base_sa.clone();
13859        let mut scalar_bucket = base_bucket.clone();
13860        let mut threaded_bucket = base_bucket.clone();
13861        let mut thread_state = alloc_thread_state(4).unwrap();
13862        final_bwt_scan_right_to_left_16u(
13863            &text,
13864            &mut scalar_sa,
13865            &mut scalar_bucket,
13866            block_start as SaSint,
13867            block_size as SaSint,
13868        );
13869        final_bwt_scan_right_to_left_16u_block_omp(
13870            &text,
13871            &mut threaded_sa,
13872            k as SaSint,
13873            &mut threaded_bucket,
13874            block_start as SaSint,
13875            block_size as SaSint,
13876            4,
13877            &mut thread_state,
13878        );
13879        assert_eq!(threaded_sa, scalar_sa);
13880        assert_eq!(threaded_bucket, scalar_bucket);
13881
13882        let rm = 3;
13883        let mut scalar_sa = base_sa.clone();
13884        let mut threaded_sa = base_sa.clone();
13885        let mut scalar_bucket = base_bucket.clone();
13886        let mut threaded_bucket = base_bucket.clone();
13887        let mut scalar_i = vec![-1; (block_size / (rm as usize + 1)) + 2];
13888        let mut threaded_i = scalar_i.clone();
13889        final_bwt_aux_scan_right_to_left_16u(
13890            &text,
13891            &mut scalar_sa,
13892            rm,
13893            &mut scalar_i,
13894            &mut scalar_bucket,
13895            block_start as SaSint,
13896            block_size as SaSint,
13897        );
13898        final_bwt_aux_scan_right_to_left_16u_block_omp(
13899            &text,
13900            &mut threaded_sa,
13901            k as SaSint,
13902            rm,
13903            &mut threaded_i,
13904            &mut threaded_bucket,
13905            block_start as SaSint,
13906            block_size as SaSint,
13907            4,
13908            &mut thread_state,
13909        );
13910        assert_eq!(threaded_sa, scalar_sa);
13911        assert_eq!(threaded_i, scalar_i);
13912        assert_eq!(threaded_bucket, scalar_bucket);
13913
13914        let mut scalar_sa = base_sa.clone();
13915        let mut threaded_sa = base_sa.clone();
13916        let mut scalar_bucket = base_bucket.clone();
13917        let mut threaded_bucket = base_bucket.clone();
13918        final_sorting_scan_right_to_left_16u(
13919            &text,
13920            &mut scalar_sa,
13921            &mut scalar_bucket,
13922            block_start as SaSint,
13923            block_size as SaSint,
13924        );
13925        final_sorting_scan_right_to_left_16u_block_omp(
13926            &text,
13927            &mut threaded_sa,
13928            k as SaSint,
13929            &mut threaded_bucket,
13930            block_start as SaSint,
13931            block_size as SaSint,
13932            4,
13933            &mut thread_state,
13934        );
13935        assert_eq!(threaded_sa, scalar_sa);
13936        assert_eq!(threaded_bucket, scalar_bucket);
13937
13938        let mut scalar_sa = base_sa;
13939        let mut threaded_sa = scalar_sa.clone();
13940        let mut scalar_bucket = base_bucket.clone();
13941        let mut threaded_bucket = base_bucket;
13942        final_gsa_scan_right_to_left_16u(
13943            &text,
13944            &mut scalar_sa,
13945            &mut scalar_bucket,
13946            block_start as SaSint,
13947            block_size as SaSint,
13948        );
13949        final_gsa_scan_right_to_left_16u_block_omp(
13950            &text,
13951            &mut threaded_sa,
13952            k as SaSint,
13953            &mut threaded_bucket,
13954            block_start as SaSint,
13955            block_size as SaSint,
13956            4,
13957            &mut thread_state,
13958        );
13959        assert_eq!(threaded_sa, scalar_sa);
13960        assert_eq!(threaded_bucket, scalar_bucket);
13961    }
13962
13963    #[test]
13964    fn libsais16_clear_lms_suffixes_omp_zeroes_requested_bucket_ranges() {
13965        let mut rust_sa = vec![5, 4, 3, 2, 1, 9];
13966        let mut c_sa = rust_sa.clone();
13967        let n = rust_sa.len() as SaSint;
13968        let mut bucket_start = vec![1, 4, 5];
13969        let mut bucket_end = vec![3, 5, 5];
13970
13971        clear_lms_suffixes_omp(&mut rust_sa, n, 3, &bucket_start, &bucket_end, 2);
13972        unsafe {
13973            probe_libsais16_clear_lms_suffixes_omp(
13974                c_sa.as_mut_ptr(),
13975                n,
13976                3,
13977                bucket_start.as_mut_ptr(),
13978                bucket_end.as_mut_ptr(),
13979                2,
13980            );
13981        }
13982
13983        assert_eq!(rust_sa, c_sa);
13984    }
13985
13986    #[test]
13987    fn libsais16_partial_order_wrapper_helpers_match_manual_sequence() {
13988        let mut rust_sa = vec![1, 2, 3, 4];
13989        let mut c_sa = rust_sa.clone();
13990        flip_suffix_markers_omp(&mut rust_sa, 3, 2);
13991        unsafe {
13992            probe_libsais16_flip_suffix_markers_omp(c_sa.as_mut_ptr(), 3, 2);
13993        }
13994        assert_eq!(rust_sa, c_sa);
13995
13996        let t = vec![0, 1, 2, 1, 0, 1, 2, 1, 0];
13997        let n = t.len() as SaSint;
13998        let k = 3;
13999        let mut wrapped_sa = vec![0; t.len()];
14000        let mut wrapped_buckets = vec![0; k as usize];
14001        let mut wrapped_state = alloc_thread_state(1).unwrap();
14002        induce_partial_order_32s_1k_omp(
14003            &t,
14004            &mut wrapped_sa,
14005            n,
14006            k,
14007            &mut wrapped_buckets,
14008            1,
14009            &mut wrapped_state,
14010        );
14011
14012        let mut manual_sa = vec![0; t.len()];
14013        let mut manual_buckets = vec![0; k as usize];
14014        let mut manual_state = alloc_thread_state(1).unwrap();
14015        count_suffixes_32s(&t, n, k, &mut manual_buckets);
14016        initialize_buckets_start_32s_1k(k, &mut manual_buckets);
14017        partial_sorting_scan_left_to_right_32s_1k_omp(
14018            &t,
14019            &mut manual_sa,
14020            n,
14021            &mut manual_buckets,
14022            1,
14023            &mut manual_state,
14024        );
14025        count_suffixes_32s(&t, n, k, &mut manual_buckets);
14026        initialize_buckets_end_32s_1k(k, &mut manual_buckets);
14027        partial_sorting_scan_right_to_left_32s_1k_omp(
14028            &t,
14029            &mut manual_sa,
14030            n,
14031            &mut manual_buckets,
14032            1,
14033            &mut manual_state,
14034        );
14035        partial_sorting_gather_lms_suffixes_32s_1k_omp(&mut manual_sa, n, 1, &mut manual_state);
14036
14037        assert_eq!(wrapped_sa, manual_sa);
14038        assert_eq!(wrapped_buckets, manual_buckets);
14039    }
14040
14041    #[test]
14042    fn libsais16_induce_partial_order_32s_wrappers_match_c() {
14043        let t = make_main_32s_stress_text(128, 24);
14044        let n = t.len() as SaSint;
14045        let k = 24;
14046        let threads = 1;
14047
14048        let mut rust_sa = vec![0; t.len()];
14049        let mut rust_buckets = vec![0; 6 * k as usize];
14050        let mut rust_state = alloc_thread_state(threads).unwrap();
14051        let m = count_and_gather_lms_suffixes_32s_4k_omp(
14052            &t,
14053            &mut rust_sa,
14054            n,
14055            k,
14056            &mut rust_buckets,
14057            1,
14058            threads,
14059            &mut rust_state,
14060        );
14061        assert!(m > 1);
14062        rust_sa[..(n - m) as usize].fill(0);
14063        let first_lms_suffix = rust_sa[(n - m) as usize];
14064        let left_suffixes_count = initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(
14065            &t,
14066            k,
14067            &mut rust_buckets,
14068            first_lms_suffix,
14069        );
14070        let (_, induction_bucket) = rust_buckets.split_at_mut(4 * k as usize);
14071        radix_sort_lms_suffixes_32s_6k_omp(&t, &mut rust_sa, n, m, induction_bucket, threads);
14072        radix_sort_set_markers_32s_6k_omp(&mut rust_sa, k, induction_bucket, threads);
14073        initialize_buckets_for_partial_sorting_32s_6k(
14074            &t,
14075            k,
14076            &mut rust_buckets,
14077            first_lms_suffix,
14078            left_suffixes_count,
14079        );
14080        let mut c_sa = rust_sa.clone();
14081        let mut c_buckets = rust_buckets.clone();
14082        induce_partial_order_32s_6k_omp(
14083            &t,
14084            &mut rust_sa,
14085            n,
14086            k,
14087            &mut rust_buckets,
14088            first_lms_suffix,
14089            left_suffixes_count,
14090            threads,
14091            &mut rust_state,
14092        );
14093        unsafe {
14094            probe_libsais16_induce_partial_order_32s_6k_omp(
14095                t.as_ptr(),
14096                c_sa.as_mut_ptr(),
14097                n,
14098                k,
14099                c_buckets.as_mut_ptr(),
14100                first_lms_suffix,
14101                left_suffixes_count,
14102                threads,
14103            );
14104        }
14105        assert_eq!(rust_sa, c_sa);
14106        assert_eq!(rust_buckets, c_buckets);
14107
14108        let mut rust_sa = vec![0; t.len()];
14109        let mut rust_buckets = vec![0; 4 * k as usize];
14110        let mut rust_state = alloc_thread_state(threads).unwrap();
14111        let m = count_and_gather_lms_suffixes_32s_2k_omp(
14112            &t,
14113            &mut rust_sa,
14114            n,
14115            k,
14116            &mut rust_buckets,
14117            1,
14118            threads,
14119            &mut rust_state,
14120        );
14121        assert!(m > 1);
14122        let first_lms_suffix = rust_sa[(n - m) as usize];
14123        initialize_buckets_for_radix_and_partial_sorting_32s_4k(
14124            &t,
14125            k,
14126            &mut rust_buckets,
14127            first_lms_suffix,
14128        );
14129        let (_, induction_bucket) = rust_buckets.split_at_mut(1);
14130        radix_sort_lms_suffixes_32s_2k_omp(&t, &mut rust_sa, n, m, induction_bucket, threads);
14131        radix_sort_set_markers_32s_4k_omp(&mut rust_sa, k, induction_bucket, threads);
14132        place_lms_suffixes_interval_32s_4k(&mut rust_sa, n, k, m - 1, &rust_buckets);
14133        let mut c_sa = rust_sa.clone();
14134        let mut c_buckets = rust_buckets.clone();
14135        induce_partial_order_32s_4k_omp(
14136            &t,
14137            &mut rust_sa,
14138            n,
14139            k,
14140            &mut rust_buckets,
14141            threads,
14142            &mut rust_state,
14143        );
14144        unsafe {
14145            probe_libsais16_induce_partial_order_32s_4k_omp(
14146                t.as_ptr(),
14147                c_sa.as_mut_ptr(),
14148                n,
14149                k,
14150                c_buckets.as_mut_ptr(),
14151                threads,
14152            );
14153        }
14154        assert_eq!(rust_sa, c_sa);
14155        assert_eq!(rust_buckets, c_buckets);
14156
14157        let mut rust_sa = vec![0; t.len()];
14158        let mut rust_buckets = vec![0; 2 * k as usize];
14159        let mut rust_state = alloc_thread_state(threads).unwrap();
14160        let m = count_and_gather_lms_suffixes_32s_2k_omp(
14161            &t,
14162            &mut rust_sa,
14163            n,
14164            k,
14165            &mut rust_buckets,
14166            1,
14167            threads,
14168            &mut rust_state,
14169        );
14170        assert!(m > 1);
14171        let first_lms_suffix = rust_sa[(n - m) as usize];
14172        initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(
14173            &t,
14174            k,
14175            &mut rust_buckets,
14176            first_lms_suffix,
14177        );
14178        let (_, induction_bucket) = rust_buckets.split_at_mut(1);
14179        radix_sort_lms_suffixes_32s_2k_omp(&t, &mut rust_sa, n, m, induction_bucket, threads);
14180        place_lms_suffixes_interval_32s_2k(&mut rust_sa, n, k, m - 1, &rust_buckets);
14181        initialize_buckets_start_and_end_32s_2k(k, &mut rust_buckets);
14182        let mut c_sa = rust_sa.clone();
14183        let mut c_buckets = rust_buckets.clone();
14184        induce_partial_order_32s_2k_omp(
14185            &t,
14186            &mut rust_sa,
14187            n,
14188            k,
14189            &mut rust_buckets,
14190            threads,
14191            &mut rust_state,
14192        );
14193        unsafe {
14194            probe_libsais16_induce_partial_order_32s_2k_omp(
14195                t.as_ptr(),
14196                c_sa.as_mut_ptr(),
14197                n,
14198                k,
14199                c_buckets.as_mut_ptr(),
14200                threads,
14201            );
14202        }
14203        assert_eq!(rust_sa, c_sa);
14204        assert_eq!(rust_buckets, c_buckets);
14205
14206        let mut rust_sa = vec![0; t.len()];
14207        let mut rust_buckets = vec![0; k as usize];
14208        let mut rust_state = alloc_thread_state(threads).unwrap();
14209        count_suffixes_32s(&t, n, k, &mut rust_buckets);
14210        initialize_buckets_end_32s_1k(k, &mut rust_buckets);
14211        let m = radix_sort_lms_suffixes_32s_1k(&t, &mut rust_sa, n, &mut rust_buckets);
14212        assert!(m > 1);
14213        let mut c_sa = rust_sa.clone();
14214        let mut c_buckets = rust_buckets.clone();
14215        induce_partial_order_32s_1k_omp(
14216            &t,
14217            &mut rust_sa,
14218            n,
14219            k,
14220            &mut rust_buckets,
14221            threads,
14222            &mut rust_state,
14223        );
14224        unsafe {
14225            probe_libsais16_induce_partial_order_32s_1k_omp(
14226                t.as_ptr(),
14227                c_sa.as_mut_ptr(),
14228                n,
14229                k,
14230                c_buckets.as_mut_ptr(),
14231                threads,
14232            );
14233        }
14234        assert_eq!(rust_sa, c_sa);
14235        assert_eq!(rust_buckets, c_buckets);
14236    }
14237
14238    #[test]
14239    fn libsais16_induce_partial_order_16u_omp_matches_c() {
14240        let text = [3, 1, 2, 1, 0, 4, 1, 0];
14241        let n = text.len() as SaSint;
14242        let flags = 0;
14243        let threads = 1;
14244        let mut rust_sa = vec![0; text.len()];
14245        let mut rust_buckets = vec![0; 8 * ALPHABET_SIZE];
14246
14247        let m = count_and_gather_lms_suffixes_16u_omp(
14248            &text,
14249            &mut rust_sa,
14250            n,
14251            &mut rust_buckets[..4 * ALPHABET_SIZE],
14252            threads,
14253            &mut [],
14254        );
14255        let k = initialize_buckets_start_and_end_16u(&mut rust_buckets, None);
14256        assert!(m > 0);
14257        let first_lms_suffix = rust_sa[(n - m) as usize];
14258        let left_suffixes_count = initialize_buckets_for_lms_suffixes_radix_sort_16u(
14259            &text,
14260            &mut rust_buckets,
14261            first_lms_suffix,
14262        );
14263        radix_sort_lms_suffixes_16u_omp(
14264            &text,
14265            &mut rust_sa,
14266            n,
14267            m,
14268            flags,
14269            &mut rust_buckets,
14270            threads,
14271            &mut [],
14272        );
14273        initialize_buckets_for_partial_sorting_16u(
14274            &text,
14275            &mut rust_buckets,
14276            first_lms_suffix,
14277            left_suffixes_count,
14278        );
14279
14280        let mut c_sa = rust_sa.clone();
14281        let mut c_buckets = rust_buckets.clone();
14282        induce_partial_order_16u_omp(
14283            &text,
14284            &mut rust_sa,
14285            n,
14286            k,
14287            flags,
14288            &mut rust_buckets,
14289            first_lms_suffix,
14290            left_suffixes_count,
14291            threads,
14292        );
14293        unsafe {
14294            probe_libsais16_induce_partial_order_16u_omp(
14295                text.as_ptr(),
14296                c_sa.as_mut_ptr(),
14297                n,
14298                k,
14299                flags,
14300                c_buckets.as_mut_ptr(),
14301                first_lms_suffix,
14302                left_suffixes_count,
14303                threads,
14304            );
14305        }
14306
14307        assert_eq!(rust_sa, c_sa);
14308        assert_eq!(rust_buckets, c_buckets);
14309    }
14310
14311    fn final_order_32s_fixture() -> (Vec<SaSint>, Vec<SaSint>) {
14312        (
14313            vec![0, 1, 2, 1, 0, 1, 2, 1, 0],
14314            vec![1, 0, 2, 0, 0, 0, 0, 0, 0],
14315        )
14316    }
14317
14318    fn seed_final_order_bucket_sections(buckets: &mut [SaSint], k: usize, branch_k: usize) {
14319        let left = [0, 1, 3];
14320        let right = [1, 2, 3];
14321        let left_section = match branch_k {
14322            6 => 4 * k,
14323            4 => 2 * k,
14324            2 => k,
14325            _ => 0,
14326        };
14327        let right_section = match branch_k {
14328            6 => 5 * k,
14329            4 => 3 * k,
14330            2 => 0,
14331            _ => 0,
14332        };
14333        buckets[left_section..left_section + k].copy_from_slice(&left);
14334        buckets[right_section..right_section + k].copy_from_slice(&right);
14335    }
14336
14337    #[test]
14338    fn libsais16_induce_final_order_32s_wrappers_match_c() {
14339        let (t, sa) = final_order_32s_fixture();
14340        let n = t.len() as SaSint;
14341        let k = 3;
14342        let threads = 1;
14343
14344        let mut rust_sa = sa.clone();
14345        let mut rust_buckets = vec![0; 6 * k as usize];
14346        seed_final_order_bucket_sections(&mut rust_buckets, k as usize, 6);
14347        let mut c_sa = rust_sa.clone();
14348        let mut c_buckets = rust_buckets.clone();
14349        let mut rust_state = alloc_thread_state(threads).unwrap();
14350        induce_final_order_32s_6k(
14351            &t,
14352            &mut rust_sa,
14353            n,
14354            k,
14355            &mut rust_buckets,
14356            threads,
14357            &mut rust_state,
14358        );
14359        unsafe {
14360            probe_libsais16_induce_final_order_32s_6k(
14361                t.as_ptr(),
14362                c_sa.as_mut_ptr(),
14363                n,
14364                k,
14365                c_buckets.as_mut_ptr(),
14366                threads,
14367            );
14368        }
14369        assert_eq!(rust_sa, c_sa);
14370        assert_eq!(rust_buckets, c_buckets);
14371
14372        let mut rust_sa = sa.clone();
14373        let mut rust_buckets = vec![0; 4 * k as usize];
14374        seed_final_order_bucket_sections(&mut rust_buckets, k as usize, 4);
14375        let mut c_sa = rust_sa.clone();
14376        let mut c_buckets = rust_buckets.clone();
14377        let mut rust_state = alloc_thread_state(threads).unwrap();
14378        induce_final_order_32s_4k(
14379            &t,
14380            &mut rust_sa,
14381            n,
14382            k,
14383            &mut rust_buckets,
14384            threads,
14385            &mut rust_state,
14386        );
14387        unsafe {
14388            probe_libsais16_induce_final_order_32s_4k(
14389                t.as_ptr(),
14390                c_sa.as_mut_ptr(),
14391                n,
14392                k,
14393                c_buckets.as_mut_ptr(),
14394                threads,
14395            );
14396        }
14397        assert_eq!(rust_sa, c_sa);
14398        assert_eq!(rust_buckets, c_buckets);
14399
14400        let mut rust_sa = sa.clone();
14401        let mut rust_buckets = vec![0; 2 * k as usize];
14402        seed_final_order_bucket_sections(&mut rust_buckets, k as usize, 2);
14403        let mut c_sa = rust_sa.clone();
14404        let mut c_buckets = rust_buckets.clone();
14405        let mut rust_state = alloc_thread_state(threads).unwrap();
14406        induce_final_order_32s_2k(
14407            &t,
14408            &mut rust_sa,
14409            n,
14410            k,
14411            &mut rust_buckets,
14412            threads,
14413            &mut rust_state,
14414        );
14415        unsafe {
14416            probe_libsais16_induce_final_order_32s_2k(
14417                t.as_ptr(),
14418                c_sa.as_mut_ptr(),
14419                n,
14420                k,
14421                c_buckets.as_mut_ptr(),
14422                threads,
14423            );
14424        }
14425        assert_eq!(rust_sa, c_sa);
14426        assert_eq!(rust_buckets, c_buckets);
14427
14428        let mut rust_sa = sa;
14429        let mut rust_buckets = vec![0; k as usize];
14430        let mut c_sa = rust_sa.clone();
14431        let mut c_buckets = rust_buckets.clone();
14432        let mut rust_state = alloc_thread_state(threads).unwrap();
14433        induce_final_order_32s_1k(
14434            &t,
14435            &mut rust_sa,
14436            n,
14437            k,
14438            &mut rust_buckets,
14439            threads,
14440            &mut rust_state,
14441        );
14442        unsafe {
14443            probe_libsais16_induce_final_order_32s_1k(
14444                t.as_ptr(),
14445                c_sa.as_mut_ptr(),
14446                n,
14447                k,
14448                c_buckets.as_mut_ptr(),
14449                threads,
14450            );
14451        }
14452        assert_eq!(rust_sa, c_sa);
14453        assert_eq!(rust_buckets, c_buckets);
14454    }
14455
14456    #[test]
14457    fn libsais16_induce_final_order_16u_omp_matches_manual_sequence() {
14458        let (text, mut wrapped_sa, induction_bucket) = final_scan_fixture();
14459        let mut wrapped_buckets = final_order_buckets(&induction_bucket);
14460        let mut c_sa = wrapped_sa.clone();
14461        let mut c_buckets = wrapped_buckets.clone();
14462        let mut wrapped_state = alloc_thread_state(1).unwrap();
14463        let wrapped_index = induce_final_order_16u_omp(
14464            &text,
14465            &mut wrapped_sa,
14466            text.len() as SaSint,
14467            8,
14468            0,
14469            0,
14470            None,
14471            &mut wrapped_buckets,
14472            1,
14473            &mut wrapped_state,
14474        );
14475        let c_index = unsafe {
14476            probe_libsais16_induce_final_order_16u_omp(
14477                text.as_ptr(),
14478                c_sa.as_mut_ptr(),
14479                text.len() as SaSint,
14480                8,
14481                0,
14482                0,
14483                std::ptr::null_mut(),
14484                c_buckets.as_mut_ptr(),
14485                1,
14486            )
14487        };
14488
14489        let (text, mut manual_sa, induction_bucket) = final_scan_fixture();
14490        let mut manual_buckets = final_order_buckets(&induction_bucket);
14491        {
14492            let (left_buckets, right_tail) = manual_buckets.split_at_mut(7 * ALPHABET_SIZE);
14493            final_sorting_scan_left_to_right_16u_omp(
14494                &text,
14495                &mut manual_sa,
14496                text.len() as SaSint,
14497                8,
14498                &mut left_buckets[6 * ALPHABET_SIZE..7 * ALPHABET_SIZE],
14499                1,
14500            );
14501            final_sorting_scan_right_to_left_16u_omp(
14502                &text,
14503                &mut manual_sa,
14504                0,
14505                text.len() as SaSint,
14506                8,
14507                &mut right_tail[..ALPHABET_SIZE],
14508                1,
14509            );
14510        }
14511
14512        assert_eq!(wrapped_index, 0);
14513        assert_eq!(wrapped_index, c_index);
14514        assert_eq!(wrapped_sa, manual_sa);
14515        assert_eq!(wrapped_sa, c_sa);
14516        assert_eq!(wrapped_buckets, manual_buckets);
14517        assert_eq!(wrapped_buckets, c_buckets);
14518
14519        let (text, mut wrapped_sa, induction_bucket) = final_scan_fixture();
14520        let mut wrapped_buckets = final_order_buckets(&induction_bucket);
14521        let mut c_sa = wrapped_sa.clone();
14522        let mut c_buckets = wrapped_buckets.clone();
14523        let mut wrapped_state = alloc_thread_state(1).unwrap();
14524        let wrapped_index = induce_final_order_16u_omp(
14525            &text,
14526            &mut wrapped_sa,
14527            text.len() as SaSint,
14528            8,
14529            LIBSAIS_FLAGS_BWT,
14530            0,
14531            None,
14532            &mut wrapped_buckets,
14533            1,
14534            &mut wrapped_state,
14535        );
14536        let c_index = unsafe {
14537            probe_libsais16_induce_final_order_16u_omp(
14538                text.as_ptr(),
14539                c_sa.as_mut_ptr(),
14540                text.len() as SaSint,
14541                8,
14542                LIBSAIS_FLAGS_BWT,
14543                0,
14544                std::ptr::null_mut(),
14545                c_buckets.as_mut_ptr(),
14546                1,
14547            )
14548        };
14549
14550        let (text, mut manual_sa, induction_bucket) = final_scan_fixture();
14551        let mut manual_buckets = final_order_buckets(&induction_bucket);
14552        let manual_index = {
14553            let (left_buckets, right_tail) = manual_buckets.split_at_mut(7 * ALPHABET_SIZE);
14554            final_bwt_scan_left_to_right_16u_omp(
14555                &text,
14556                &mut manual_sa,
14557                text.len() as SaSint,
14558                8,
14559                &mut left_buckets[6 * ALPHABET_SIZE..7 * ALPHABET_SIZE],
14560                1,
14561            );
14562            final_bwt_scan_right_to_left_16u_omp(
14563                &text,
14564                &mut manual_sa,
14565                text.len() as SaSint,
14566                8,
14567                &mut right_tail[..ALPHABET_SIZE],
14568                1,
14569            )
14570        };
14571
14572        assert_eq!(wrapped_index, manual_index);
14573        assert_eq!(wrapped_index, c_index);
14574        assert_eq!(wrapped_sa, manual_sa);
14575        assert_eq!(wrapped_sa, c_sa);
14576        assert_eq!(wrapped_buckets, manual_buckets);
14577        assert_eq!(wrapped_buckets, c_buckets);
14578
14579        let (text, mut wrapped_sa, induction_bucket) = final_scan_fixture();
14580        let mut wrapped_buckets = final_order_buckets(&induction_bucket);
14581        let mut c_sa = wrapped_sa.clone();
14582        let mut c_buckets = wrapped_buckets.clone();
14583        let mut wrapped_state = alloc_thread_state(1).unwrap();
14584        let mut wrapped_i = vec![-1; 8];
14585        let mut c_i = wrapped_i.clone();
14586        let wrapped_index = induce_final_order_16u_omp(
14587            &text,
14588            &mut wrapped_sa,
14589            text.len() as SaSint,
14590            8,
14591            LIBSAIS_FLAGS_BWT,
14592            2,
14593            Some(&mut wrapped_i),
14594            &mut wrapped_buckets,
14595            1,
14596            &mut wrapped_state,
14597        );
14598        let c_index = unsafe {
14599            probe_libsais16_induce_final_order_16u_omp(
14600                text.as_ptr(),
14601                c_sa.as_mut_ptr(),
14602                text.len() as SaSint,
14603                8,
14604                LIBSAIS_FLAGS_BWT,
14605                2,
14606                c_i.as_mut_ptr(),
14607                c_buckets.as_mut_ptr(),
14608                1,
14609            )
14610        };
14611
14612        let (text, mut manual_sa, induction_bucket) = final_scan_fixture();
14613        let mut manual_buckets = final_order_buckets(&induction_bucket);
14614        let mut manual_i = vec![-1; 8];
14615        {
14616            let (left_buckets, right_tail) = manual_buckets.split_at_mut(7 * ALPHABET_SIZE);
14617            final_bwt_aux_scan_left_to_right_16u_omp(
14618                &text,
14619                &mut manual_sa,
14620                text.len() as SaSint,
14621                8,
14622                1,
14623                &mut manual_i,
14624                &mut left_buckets[6 * ALPHABET_SIZE..7 * ALPHABET_SIZE],
14625                1,
14626            );
14627            final_bwt_aux_scan_right_to_left_16u_omp(
14628                &text,
14629                &mut manual_sa,
14630                text.len() as SaSint,
14631                8,
14632                1,
14633                &mut manual_i,
14634                &mut right_tail[..ALPHABET_SIZE],
14635                1,
14636            );
14637        }
14638
14639        assert_eq!(wrapped_index, 0);
14640        assert_eq!(wrapped_index, c_index);
14641        assert_eq!(wrapped_sa, manual_sa);
14642        assert_eq!(wrapped_sa, c_sa);
14643        assert_eq!(wrapped_buckets, manual_buckets);
14644        assert_eq!(wrapped_buckets, c_buckets);
14645        assert_eq!(wrapped_i, manual_i);
14646        assert_eq!(wrapped_i, c_i);
14647    }
14648
14649    #[test]
14650    fn libsais16_main_16u_matches_public_c_suffix_array_paths() {
14651        let text = [3, 1, 4, 1, 5, 9, 0, 2];
14652        let n = text.len() as SaSint;
14653        let fs = 32;
14654        let mut rust_sa = vec![0; text.len() + fs as usize];
14655        let mut rust_buckets = vec![0; 8 * ALPHABET_SIZE];
14656        let mut rust_freq = vec![0; ALPHABET_SIZE];
14657        let mut rust_state = alloc_thread_state(1).unwrap();
14658        let rust_index = main_16u(
14659            &text,
14660            &mut rust_sa,
14661            n,
14662            &mut rust_buckets,
14663            0,
14664            0,
14665            None,
14666            fs,
14667            Some(&mut rust_freq),
14668            1,
14669            &mut rust_state,
14670        );
14671
14672        let mut c_sa = vec![0; text.len() + fs as usize];
14673        let mut c_freq = vec![0; ALPHABET_SIZE];
14674        let c_index = unsafe {
14675            probe_public_libsais16_freq(
14676                text.as_ptr(),
14677                c_sa.as_mut_ptr(),
14678                n,
14679                fs,
14680                c_freq.as_mut_ptr(),
14681            )
14682        };
14683
14684        assert_eq!(rust_index, c_index);
14685        assert_eq!(&rust_sa[..text.len()], &c_sa[..text.len()]);
14686        assert_eq!(rust_freq, c_freq);
14687
14688        let text = [2, 1, 0, 2, 0];
14689        let n = text.len() as SaSint;
14690        let fs = 24;
14691        let mut rust_sa = vec![0; text.len() + fs as usize];
14692        let mut rust_buckets = vec![0; 8 * ALPHABET_SIZE];
14693        let mut rust_freq = vec![0; ALPHABET_SIZE];
14694        let mut rust_state = alloc_thread_state(1).unwrap();
14695        let rust_index = main_16u(
14696            &text,
14697            &mut rust_sa,
14698            n,
14699            &mut rust_buckets,
14700            LIBSAIS_FLAGS_GSA,
14701            0,
14702            None,
14703            fs,
14704            Some(&mut rust_freq),
14705            1,
14706            &mut rust_state,
14707        );
14708
14709        let mut c_sa = vec![0; text.len() + fs as usize];
14710        let mut c_freq = vec![0; ALPHABET_SIZE];
14711        let c_index = unsafe {
14712            probe_public_libsais16_gsa_freq(
14713                text.as_ptr(),
14714                c_sa.as_mut_ptr(),
14715                n,
14716                fs,
14717                c_freq.as_mut_ptr(),
14718            )
14719        };
14720
14721        assert_eq!(rust_index, c_index);
14722        assert_eq!(&rust_sa[..text.len()], &c_sa[..text.len()]);
14723        assert_eq!(rust_freq, c_freq);
14724    }
14725
14726    fn make_main_32s_stress_text(len: usize, alphabet: SaSint) -> Vec<SaSint> {
14727        let mut state: u32 = 0x1357_9bdf;
14728        let mut t = Vec::with_capacity(len + 1);
14729
14730        for i in 0..len {
14731            state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223);
14732            let mut value = ((state >> 16) % (alphabet as u32 - 1)) as SaSint + 1;
14733            if i % 17 < 8 {
14734                value = ((i / 17) as SaSint % 11) + 1;
14735            }
14736            if i % 29 < 10 {
14737                value = (((i / 29) as SaSint * 3) % 19) + 1;
14738            }
14739            if i % 64 >= 48 {
14740                value = t[i - 48];
14741            }
14742            t.push(value);
14743        }
14744
14745        t.push(0);
14746        t
14747    }
14748
14749    fn make_recursive_main_32s_text(repeats: usize) -> Vec<SaSint> {
14750        let motif = [9, 4, 9, 2, 9, 4, 9, 1];
14751        let mut t = Vec::with_capacity(repeats * motif.len() + 1);
14752        for _ in 0..repeats {
14753            t.extend_from_slice(&motif);
14754        }
14755        t.push(0);
14756        t
14757    }
14758
14759    fn assert_main_32s_entry_matches_c(mut t: Vec<SaSint>, k: SaSint, fs: SaSint) {
14760        let n = t.len() as SaSint;
14761        let threads = 1;
14762        let mut sa = vec![0; t.len() + fs as usize];
14763        let initial_t = t.clone();
14764        let initial_sa = sa.clone();
14765
14766        let c_result = unsafe {
14767            probe_libsais16_main_32s_entry(t.as_mut_ptr(), sa.as_mut_ptr(), n, k, fs, threads)
14768        };
14769        let c_t = t.clone();
14770        let c_sa = sa.clone();
14771
14772        t.copy_from_slice(&initial_t);
14773        sa.copy_from_slice(&initial_sa);
14774
14775        let mut thread_state = alloc_thread_state(threads).unwrap();
14776        let rust_result = main_32s_entry(
14777            t.as_mut_ptr(),
14778            &mut sa,
14779            n,
14780            k,
14781            fs,
14782            threads,
14783            &mut thread_state,
14784        );
14785
14786        assert_eq!(rust_result, c_result);
14787        assert_eq!(t, c_t);
14788        assert_eq!(sa, c_sa);
14789    }
14790
14791    #[test]
14792    fn libsais16_main_32s_entry_matches_c_for_local_32s_paths() {
14793        assert_main_32s_entry_matches_c(make_main_32s_stress_text(1024, 300), 300, 2048);
14794        assert_main_32s_entry_matches_c(make_main_32s_stress_text(1024, 400), 400, 2048);
14795        assert_main_32s_entry_matches_c(make_main_32s_stress_text(1024, 700), 700, 2048);
14796        assert_main_32s_entry_matches_c(make_main_32s_stress_text(1024, 1501), 1501, 2048);
14797        assert_main_32s_entry_matches_c(make_recursive_main_32s_text(24), 300, 0);
14798        assert_main_32s_entry_matches_c(make_recursive_main_32s_text(24), 1501, 0);
14799    }
14800
14801    #[test]
14802    fn libsais16_final_bwt_scan_left_to_right_16u_matches_c() {
14803        let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
14804        let mut c_sa = rust_sa.clone();
14805        let mut c_bucket = rust_bucket.clone();
14806
14807        final_bwt_scan_left_to_right_16u(&text, &mut rust_sa, &mut rust_bucket, 0, 6);
14808        unsafe {
14809            probe_libsais16_final_bwt_scan_left_to_right_16u(
14810                text.as_ptr(),
14811                c_sa.as_mut_ptr(),
14812                c_bucket.as_mut_ptr(),
14813                0,
14814                6,
14815            );
14816        }
14817
14818        assert_eq!(rust_sa, c_sa);
14819        assert_eq!(rust_bucket, c_bucket);
14820    }
14821
14822    #[test]
14823    fn libsais16_final_bwt_scan_right_to_left_16u_matches_c() {
14824        let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
14825        let mut c_sa = rust_sa.clone();
14826        let mut c_bucket = rust_bucket.clone();
14827
14828        let rust_index =
14829            final_bwt_scan_right_to_left_16u(&text, &mut rust_sa, &mut rust_bucket, 0, 6);
14830        let c_index = unsafe {
14831            probe_libsais16_final_bwt_scan_right_to_left_16u(
14832                text.as_ptr(),
14833                c_sa.as_mut_ptr(),
14834                c_bucket.as_mut_ptr(),
14835                0,
14836                6,
14837            )
14838        };
14839
14840        assert_eq!(rust_index, c_index);
14841        assert_eq!(rust_sa, c_sa);
14842        assert_eq!(rust_bucket, c_bucket);
14843    }
14844
14845    #[test]
14846    fn libsais16_final_bwt_aux_scan_left_to_right_16u_matches_c() {
14847        let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
14848        let mut c_sa = rust_sa.clone();
14849        let mut c_bucket = rust_bucket.clone();
14850        let mut rust_i = vec![-1; 8];
14851        let mut c_i = rust_i.clone();
14852
14853        final_bwt_aux_scan_left_to_right_16u(
14854            &text,
14855            &mut rust_sa,
14856            1,
14857            &mut rust_i,
14858            &mut rust_bucket,
14859            0,
14860            6,
14861        );
14862        unsafe {
14863            probe_libsais16_final_bwt_aux_scan_left_to_right_16u(
14864                text.as_ptr(),
14865                c_sa.as_mut_ptr(),
14866                1,
14867                c_i.as_mut_ptr(),
14868                c_bucket.as_mut_ptr(),
14869                0,
14870                6,
14871            );
14872        }
14873
14874        assert_eq!(rust_sa, c_sa);
14875        assert_eq!(rust_bucket, c_bucket);
14876        assert_eq!(rust_i, c_i);
14877    }
14878
14879    #[test]
14880    fn libsais16_final_bwt_aux_scan_right_to_left_16u_matches_c() {
14881        let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
14882        let mut c_sa = rust_sa.clone();
14883        let mut c_bucket = rust_bucket.clone();
14884        let mut rust_i = vec![-1; 8];
14885        let mut c_i = rust_i.clone();
14886
14887        final_bwt_aux_scan_right_to_left_16u(
14888            &text,
14889            &mut rust_sa,
14890            1,
14891            &mut rust_i,
14892            &mut rust_bucket,
14893            0,
14894            6,
14895        );
14896        unsafe {
14897            probe_libsais16_final_bwt_aux_scan_right_to_left_16u(
14898                text.as_ptr(),
14899                c_sa.as_mut_ptr(),
14900                1,
14901                c_i.as_mut_ptr(),
14902                c_bucket.as_mut_ptr(),
14903                0,
14904                6,
14905            );
14906        }
14907
14908        assert_eq!(rust_sa, c_sa);
14909        assert_eq!(rust_bucket, c_bucket);
14910        assert_eq!(rust_i, c_i);
14911    }
14912
14913    #[test]
14914    fn libsais16_renumber_lms_suffixes_16u_matches_c() {
14915        let m = 6;
14916        let mut rust_sa = vec![0; 20];
14917        rust_sa[..m].copy_from_slice(&[2, 4 | SAINT_MIN, 6, 8 | SAINT_MIN, 10, 12 | SAINT_MIN]);
14918        let mut c_sa = rust_sa.clone();
14919
14920        let rust_name = renumber_lms_suffixes_16u(&mut rust_sa, m as SaSint, 5, 0, m as SaSint);
14921        let c_name = unsafe {
14922            probe_libsais16_renumber_lms_suffixes_16u(
14923                c_sa.as_mut_ptr(),
14924                m as SaSint,
14925                5,
14926                0,
14927                m as SaSint,
14928            )
14929        };
14930
14931        assert_eq!(rust_name, c_name);
14932        assert_eq!(rust_sa, c_sa);
14933    }
14934
14935    fn lms_interval_fixture() -> (Vec<SaSint>, Vec<SaSint>) {
14936        let mut sa = vec![-7; 16];
14937        sa[4..8].copy_from_slice(&[41, 42, 61, 62]);
14938
14939        let mut buckets = vec![0; 8 * ALPHABET_SIZE];
14940        buckets[buckets_index2(2, 1)] = 0;
14941        buckets[buckets_index2(3, 1)] = 2;
14942        buckets[buckets_index2(4, 1)] = 2;
14943        buckets[buckets_index2(5, 1)] = 2;
14944        buckets[buckets_index2(6, 1)] = 4;
14945        buckets[buckets_index2(7, 1)] = 4;
14946        buckets[7 * ALPHABET_SIZE + 2] = 6;
14947        buckets[7 * ALPHABET_SIZE + 5] = 12;
14948
14949        (sa, buckets)
14950    }
14951
14952    #[test]
14953    fn libsais16_place_lms_suffixes_interval_16u_matches_c() {
14954        for flags in [0, LIBSAIS_FLAGS_GSA] {
14955            let (mut rust_sa, mut rust_buckets) = lms_interval_fixture();
14956            let mut c_sa = rust_sa.clone();
14957            let mut c_buckets = rust_buckets.clone();
14958
14959            place_lms_suffixes_interval_16u(&mut rust_sa, 16, 8, flags, &mut rust_buckets);
14960            unsafe {
14961                probe_libsais16_place_lms_suffixes_interval_16u(
14962                    c_sa.as_mut_ptr(),
14963                    16,
14964                    8,
14965                    flags,
14966                    c_buckets.as_mut_ptr(),
14967                );
14968            }
14969
14970            assert_eq!(rust_sa, c_sa);
14971            assert_eq!(rust_buckets, c_buckets);
14972        }
14973    }
14974
14975    #[test]
14976    fn libsais16_bwt_copy_16u_matches_c() {
14977        let mut a = vec![0, 1, 65535, 65536, -1, -2, 70000, 17, 131071, -65536];
14978        let mut rust_u = vec![999; a.len()];
14979        let mut c_u = rust_u.clone();
14980
14981        bwt_copy_16u(&mut rust_u, &a, a.len() as SaSint);
14982        unsafe {
14983            probe_libsais16_bwt_copy_16u(c_u.as_mut_ptr(), a.as_mut_ptr(), a.len() as SaSint);
14984        }
14985
14986        assert_eq!(rust_u, c_u);
14987    }
14988
14989    #[test]
14990    fn libsais16_early_omp_wrappers_match_c() {
14991        let text = [3, 1, 2, 1, 0, 4, 1, 0];
14992        let n = text.len() as SaSint;
14993
14994        let mut rust_sa = vec![-99; text.len()];
14995        let mut c_sa = rust_sa.clone();
14996        gather_lms_suffixes_16u_omp(&text, &mut rust_sa, n, 1, &mut []);
14997        unsafe {
14998            probe_libsais16_gather_lms_suffixes_16u_omp(text.as_ptr(), c_sa.as_mut_ptr(), n, 1);
14999        }
15000        assert_eq!(rust_sa, c_sa);
15001
15002        let mut rust_sa = vec![-99; text.len()];
15003        let mut c_sa = rust_sa.clone();
15004        let mut rust_buckets = vec![-1; 4 * ALPHABET_SIZE];
15005        let mut c_buckets = rust_buckets.clone();
15006        let rust_m = count_and_gather_lms_suffixes_16u_omp(
15007            &text,
15008            &mut rust_sa,
15009            n,
15010            &mut rust_buckets,
15011            1,
15012            &mut [],
15013        );
15014        let c_m = unsafe {
15015            probe_libsais16_count_and_gather_lms_suffixes_16u_omp(
15016                text.as_ptr(),
15017                c_sa.as_mut_ptr(),
15018                n,
15019                c_buckets.as_mut_ptr(),
15020                1,
15021            )
15022        };
15023        assert_eq!(rust_m, c_m);
15024        assert_eq!(rust_sa, c_sa);
15025        assert_eq!(rust_buckets, c_buckets);
15026
15027        let mut rust_buckets = vec![0; 8 * ALPHABET_SIZE];
15028        let m = count_and_gather_lms_suffixes_16u(
15029            &text,
15030            &mut rust_sa,
15031            n,
15032            &mut rust_buckets[..4 * ALPHABET_SIZE],
15033            0,
15034            n,
15035        );
15036        initialize_buckets_start_and_end_16u(&mut rust_buckets, None);
15037        let first_lms_suffix = rust_sa[(n - m) as usize];
15038        initialize_buckets_for_lms_suffixes_radix_sort_16u(
15039            &text,
15040            &mut rust_buckets,
15041            first_lms_suffix,
15042        );
15043        let mut c_sa = rust_sa.clone();
15044        let mut c_buckets = rust_buckets.clone();
15045        radix_sort_lms_suffixes_16u_omp(
15046            &text,
15047            &mut rust_sa,
15048            n,
15049            m,
15050            0,
15051            &mut rust_buckets,
15052            1,
15053            &mut [],
15054        );
15055        unsafe {
15056            probe_libsais16_radix_sort_lms_suffixes_16u_omp(
15057                text.as_ptr(),
15058                c_sa.as_mut_ptr(),
15059                n,
15060                m,
15061                0,
15062                c_buckets.as_mut_ptr(),
15063                1,
15064            );
15065        }
15066        assert_eq!(rust_sa, c_sa);
15067        assert_eq!(rust_buckets, c_buckets);
15068    }
15069
15070    #[test]
15071    fn libsais16_early_omp_wrappers_use_block_partition_for_large_inputs() {
15072        let n = 65_600usize;
15073        let text: Vec<u16> = (0..n)
15074            .map(|i| 1 + ((i * 37 + i / 17) % 509) as u16)
15075            .collect();
15076
15077        let mut gathered_threaded = vec![-99; n];
15078        let mut gathered_scalar = vec![-99; n];
15079        let mut thread_state = alloc_thread_state(4).unwrap();
15080        let mut count_sa = vec![-99; n];
15081        let mut count_buckets = vec![0; 4 * ALPHABET_SIZE];
15082        count_and_gather_lms_suffixes_16u_omp(
15083            &text,
15084            &mut count_sa,
15085            n as SaSint,
15086            &mut count_buckets,
15087            4,
15088            &mut thread_state,
15089        );
15090        gather_lms_suffixes_16u_omp(
15091            &text,
15092            &mut gathered_threaded,
15093            n as SaSint,
15094            4,
15095            &mut thread_state,
15096        );
15097        gather_lms_suffixes_16u(
15098            &text,
15099            &mut gathered_scalar,
15100            n as SaSint,
15101            n as SaSint - 1,
15102            0,
15103            n as SaSint,
15104        );
15105        assert_eq!(gathered_threaded, gathered_scalar);
15106
15107        let mut sa_threaded = vec![-99; n];
15108        let mut sa_scalar = vec![-99; n];
15109        let mut buckets_threaded = vec![0; 4 * ALPHABET_SIZE];
15110        let mut buckets_scalar = vec![0; 4 * ALPHABET_SIZE];
15111        let m_threaded = count_and_gather_lms_suffixes_16u_omp(
15112            &text,
15113            &mut sa_threaded,
15114            n as SaSint,
15115            &mut buckets_threaded,
15116            4,
15117            &mut thread_state,
15118        );
15119        let m_scalar = count_and_gather_lms_suffixes_16u(
15120            &text,
15121            &mut sa_scalar,
15122            n as SaSint,
15123            &mut buckets_scalar,
15124            0,
15125            n as SaSint,
15126        );
15127        assert_eq!(m_threaded, m_scalar);
15128        assert_eq!(
15129            &sa_threaded[n - m_threaded as usize..],
15130            &sa_scalar[n - m_scalar as usize..]
15131        );
15132        assert_eq!(buckets_threaded, buckets_scalar);
15133    }
15134
15135    #[test]
15136    fn libsais16_late_omp_wrappers_match_c() {
15137        let m = 6;
15138        let mut rust_sa = vec![0; 20];
15139        rust_sa[..m].copy_from_slice(&[2, 4 | SAINT_MIN, 6, 8 | SAINT_MIN, 10, 12 | SAINT_MIN]);
15140        let mut c_sa = rust_sa.clone();
15141        let mut rust_thread_state = alloc_thread_state(1).unwrap();
15142        let rust_name =
15143            renumber_lms_suffixes_16u_omp(&mut rust_sa, m as SaSint, 1, &mut rust_thread_state);
15144        let c_name = unsafe {
15145            probe_libsais16_renumber_lms_suffixes_16u_omp(c_sa.as_mut_ptr(), m as SaSint, 1)
15146        };
15147        assert_eq!(rust_name, c_name);
15148        assert_eq!(rust_sa, c_sa);
15149
15150        let mut a = vec![0, 1, 65535, 65536, -1, -2, 70000, 17, 131071, -65536];
15151        let mut rust_u = vec![999; a.len()];
15152        let mut c_u = rust_u.clone();
15153        bwt_copy_16u_omp(&mut rust_u, &a, a.len() as SaSint, 1);
15154        unsafe {
15155            probe_libsais16_bwt_copy_16u_omp(
15156                c_u.as_mut_ptr(),
15157                a.as_mut_ptr(),
15158                a.len() as SaSint,
15159                1,
15160            );
15161        }
15162        assert_eq!(rust_u, c_u);
15163    }
15164
15165    #[test]
15166    fn libsais16_gather_marked_lms_suffixes_matches_c() {
15167        let mut rust_sa = vec![0, 0, 3 | SAINT_MIN, 4, 5 | SAINT_MIN, 6, -7, 8];
15168        let mut c_sa = rust_sa.clone();
15169
15170        let rust_l = gather_marked_lms_suffixes(&mut rust_sa, 2, 8, 0, 4) as SaSint;
15171        let c_l =
15172            unsafe { probe_libsais16_gather_marked_lms_suffixes(c_sa.as_mut_ptr(), 2, 8, 0, 4) };
15173
15174        assert_eq!(rust_l, c_l);
15175        assert_eq!(rust_sa, c_sa);
15176    }
15177
15178    #[test]
15179    fn libsais16_gather_marked_lms_suffixes_omp_matches_c() {
15180        let mut rust_sa = vec![0; 10];
15181        rust_sa[4..8].copy_from_slice(&[2 | SAINT_MIN, 4, 6 | SAINT_MIN, 8]);
15182        let mut c_sa = rust_sa.clone();
15183
15184        let mut rust_thread_state = alloc_thread_state(1).unwrap();
15185        gather_marked_lms_suffixes_omp(&mut rust_sa, 8, 4, 2, 1, &mut rust_thread_state);
15186        unsafe {
15187            probe_libsais16_gather_marked_lms_suffixes_omp(c_sa.as_mut_ptr(), 8, 4, 2, 1);
15188        }
15189
15190        assert_eq!(rust_sa, c_sa);
15191    }
15192
15193    #[test]
15194    fn libsais16_renumber_and_gather_lms_suffixes_omp_matches_c() {
15195        let mut rust_sa = vec![0; 10];
15196        rust_sa[..4].copy_from_slice(&[2, 4 | SAINT_MIN, 6, 8 | SAINT_MIN]);
15197        let mut c_sa = rust_sa.clone();
15198
15199        let mut rust_thread_state = alloc_thread_state(1).unwrap();
15200        let rust_name =
15201            renumber_and_gather_lms_suffixes_omp(&mut rust_sa, 8, 4, 2, 1, &mut rust_thread_state);
15202        let c_name = unsafe {
15203            probe_libsais16_renumber_and_gather_lms_suffixes_omp(c_sa.as_mut_ptr(), 8, 4, 2, 1)
15204        };
15205
15206        assert_eq!(rust_name, c_name);
15207        assert_eq!(rust_sa, c_sa);
15208    }
15209
15210    #[test]
15211    fn libsais16_reconstruct_lms_suffixes_matches_c() {
15212        let mut rust_sa = vec![2, 0, 1, 77, 88, 10, 11, 12];
15213        let mut c_sa = rust_sa.clone();
15214
15215        reconstruct_lms_suffixes(&mut rust_sa, 8, 3, 0, 3);
15216        unsafe {
15217            probe_libsais16_reconstruct_lms_suffixes(c_sa.as_mut_ptr(), 8, 3, 0, 3);
15218        }
15219
15220        assert_eq!(rust_sa, c_sa);
15221
15222        let mut rust_sa = vec![2, 0, 1, 77, 88, 10, 11, 12];
15223        let mut c_sa = rust_sa.clone();
15224        reconstruct_lms_suffixes_omp(&mut rust_sa, 8, 3, 1);
15225        unsafe {
15226            probe_libsais16_reconstruct_lms_suffixes_omp(c_sa.as_mut_ptr(), 8, 3, 1);
15227        }
15228
15229        assert_eq!(rust_sa, c_sa);
15230    }
15231
15232    #[test]
15233    fn libsais16_lms_late_omp_wrappers_use_block_partition() {
15234        let m = 65_536usize;
15235        let mut scalar = vec![0; 2 * m + 8];
15236        for i in 0..m {
15237            let value = (2 * i) as SaSint;
15238            scalar[i] = if i % 7 == 0 { value | SAINT_MIN } else { value };
15239        }
15240        let mut threaded = scalar.clone();
15241
15242        let mut scalar_state = alloc_thread_state(1).unwrap();
15243        let mut threaded_state = alloc_thread_state(4).unwrap();
15244        let scalar_name =
15245            renumber_lms_suffixes_16u_omp(&mut scalar, m as SaSint, 1, &mut scalar_state);
15246        let threaded_name =
15247            renumber_lms_suffixes_16u_omp(&mut threaded, m as SaSint, 4, &mut threaded_state);
15248        assert_eq!(threaded_name, scalar_name);
15249        assert_eq!(threaded, scalar);
15250
15251        let n = 131_072usize;
15252        let m = 65_536usize;
15253        let fs = 128usize;
15254        let mut scalar = vec![0; n + fs];
15255        for i in 0..(n >> 1) {
15256            let value = (i as SaSint + 1) & SAINT_MAX;
15257            scalar[m + i] = if i % 7 == 0 { value | SAINT_MIN } else { value };
15258        }
15259        let marked_count = (0..(n >> 1)).filter(|i| i % 7 == 0).count();
15260        let mut threaded = scalar.clone();
15261
15262        let mut scalar_state = alloc_thread_state(1).unwrap();
15263        let mut threaded_state = alloc_thread_state(4).unwrap();
15264        gather_marked_lms_suffixes_omp(
15265            &mut scalar,
15266            n as SaSint,
15267            m as SaSint,
15268            fs as SaSint,
15269            1,
15270            &mut scalar_state,
15271        );
15272        gather_marked_lms_suffixes_omp(
15273            &mut threaded,
15274            n as SaSint,
15275            m as SaSint,
15276            fs as SaSint,
15277            4,
15278            &mut threaded_state,
15279        );
15280        assert_eq!(
15281            &threaded[n + fs - marked_count..n + fs],
15282            &scalar[n + fs - marked_count..n + fs]
15283        );
15284
15285        let m = 65_536usize;
15286        let n = 2 * m;
15287        let mut scalar = vec![0; n];
15288        for i in 0..m {
15289            scalar[i] = i as SaSint;
15290            scalar[n - m + i] = 1_000_000 + i as SaSint;
15291        }
15292        let mut threaded = scalar.clone();
15293
15294        reconstruct_lms_suffixes_omp(&mut scalar, n as SaSint, m as SaSint, 1);
15295        reconstruct_lms_suffixes_omp(&mut threaded, n as SaSint, m as SaSint, 4);
15296        assert_eq!(threaded, scalar);
15297    }
15298
15299    #[test]
15300    fn libsais16_distinct_lms_helpers_match_c() {
15301        let m = 6;
15302        let mut rust_sa = vec![0; 18];
15303        rust_sa[..m].copy_from_slice(&[
15304            2 | SAINT_MIN,
15305            4 | SAINT_MIN,
15306            6,
15307            8 | SAINT_MIN,
15308            10,
15309            12 | SAINT_MIN,
15310        ]);
15311        let mut c_sa = rust_sa.clone();
15312        let rust_name =
15313            renumber_distinct_lms_suffixes_32s_4k(&mut rust_sa, m as SaSint, 1, 0, m as isize);
15314        let c_name = unsafe {
15315            probe_libsais16_renumber_distinct_lms_suffixes_32s_4k(
15316                c_sa.as_mut_ptr(),
15317                m as SaSint,
15318                1,
15319                0,
15320                m as SaSint,
15321            )
15322        };
15323        assert_eq!(rust_name, c_name);
15324        assert_eq!(rust_sa, c_sa);
15325
15326        let mut rust_sa = vec![0; 12];
15327        rust_sa[m..m + 6].copy_from_slice(&[SAINT_MIN | 1, 0, SAINT_MIN | 2, 0, 3, 0]);
15328        let mut c_sa = rust_sa.clone();
15329        mark_distinct_lms_suffixes_32s(&mut rust_sa, m as SaSint, 0, 6);
15330        unsafe {
15331            probe_libsais16_mark_distinct_lms_suffixes_32s(c_sa.as_mut_ptr(), m as SaSint, 0, 6);
15332        }
15333        assert_eq!(rust_sa, c_sa);
15334
15335        let mut rust_sa = vec![0; 12];
15336        rust_sa[m..m + 6].copy_from_slice(&[SAINT_MIN | 1, 7, SAINT_MIN | 2, 0, -5, 9]);
15337        let mut c_sa = rust_sa.clone();
15338        clamp_lms_suffixes_length_32s(&mut rust_sa, m as SaSint, 0, 6);
15339        unsafe {
15340            probe_libsais16_clamp_lms_suffixes_length_32s(c_sa.as_mut_ptr(), m as SaSint, 0, 6);
15341        }
15342        assert_eq!(rust_sa, c_sa);
15343    }
15344
15345    #[test]
15346    fn libsais16_distinct_lms_omp_wrappers_match_c() {
15347        let n = 12;
15348        let m = 6;
15349        let mut rust_sa = vec![0; 18];
15350        rust_sa[..m].copy_from_slice(&[
15351            2 | SAINT_MIN,
15352            4 | SAINT_MIN,
15353            6,
15354            8 | SAINT_MIN,
15355            10,
15356            12 | SAINT_MIN,
15357        ]);
15358        let mut c_sa = rust_sa.clone();
15359        let mut rust_thread_state = alloc_thread_state(1).unwrap();
15360        let rust_name = renumber_distinct_lms_suffixes_32s_4k_omp(
15361            &mut rust_sa,
15362            m as SaSint,
15363            1,
15364            &mut rust_thread_state,
15365        );
15366        let c_name = unsafe {
15367            probe_libsais16_renumber_distinct_lms_suffixes_32s_4k_omp(
15368                c_sa.as_mut_ptr(),
15369                m as SaSint,
15370                1,
15371            )
15372        };
15373        assert_eq!(rust_name, c_name);
15374        assert_eq!(rust_sa, c_sa);
15375
15376        let mut rust_sa = vec![0; 18];
15377        rust_sa[m..m + 6].copy_from_slice(&[SAINT_MIN | 1, 0, SAINT_MIN | 2, 0, 3, 0]);
15378        let mut c_sa = rust_sa.clone();
15379        mark_distinct_lms_suffixes_32s_omp(&mut rust_sa, n, m as SaSint, 1);
15380        unsafe {
15381            probe_libsais16_mark_distinct_lms_suffixes_32s_omp(
15382                c_sa.as_mut_ptr(),
15383                n,
15384                m as SaSint,
15385                1,
15386            );
15387        }
15388        assert_eq!(rust_sa, c_sa);
15389
15390        let mut rust_sa = vec![0; 18];
15391        rust_sa[m..m + 6].copy_from_slice(&[SAINT_MIN | 1, 7, SAINT_MIN | 2, 0, -5, 9]);
15392        let mut c_sa = rust_sa.clone();
15393        clamp_lms_suffixes_length_32s_omp(&mut rust_sa, n, m as SaSint, 1);
15394        unsafe {
15395            probe_libsais16_clamp_lms_suffixes_length_32s_omp(c_sa.as_mut_ptr(), n, m as SaSint, 1);
15396        }
15397        assert_eq!(rust_sa, c_sa);
15398
15399        let mut rust_sa = vec![0; 18];
15400        rust_sa[..m].copy_from_slice(&[
15401            2 | SAINT_MIN,
15402            4 | SAINT_MIN,
15403            6,
15404            8 | SAINT_MIN,
15405            10,
15406            12 | SAINT_MIN,
15407        ]);
15408        let mut c_sa = rust_sa.clone();
15409        let mut rust_thread_state = alloc_thread_state(1).unwrap();
15410        let rust_name = renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(
15411            &mut rust_sa,
15412            n,
15413            m as SaSint,
15414            1,
15415            &mut rust_thread_state,
15416        );
15417        let c_name = unsafe {
15418            probe_libsais16_renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(
15419                c_sa.as_mut_ptr(),
15420                n,
15421                m as SaSint,
15422                1,
15423            )
15424        };
15425        assert_eq!(rust_name, c_name);
15426        assert_eq!(rust_sa, c_sa);
15427    }
15428
15429    #[test]
15430    fn libsais16_distinct_lms_omp_wrappers_use_block_partition() {
15431        let m = 65_536usize;
15432        let mut scalar = vec![0; 2 * m];
15433        for i in 0..m {
15434            let value = (2 * i) as SaSint;
15435            scalar[i] = if i % 7 == 0 { value | SAINT_MIN } else { value };
15436        }
15437        let mut threaded = scalar.clone();
15438
15439        let mut scalar_state = alloc_thread_state(1).unwrap();
15440        let mut threaded_state = alloc_thread_state(4).unwrap();
15441        let scalar_name = renumber_distinct_lms_suffixes_32s_4k_omp(
15442            &mut scalar,
15443            m as SaSint,
15444            1,
15445            &mut scalar_state,
15446        );
15447        let threaded_name = renumber_distinct_lms_suffixes_32s_4k_omp(
15448            &mut threaded,
15449            m as SaSint,
15450            4,
15451            &mut threaded_state,
15452        );
15453        assert_eq!(threaded_name, scalar_name);
15454        assert_eq!(threaded, scalar);
15455
15456        let n = 131_072usize;
15457        let m = 65_536usize;
15458        let mut scalar = vec![0; n];
15459        for i in 0..(n >> 1) {
15460            scalar[m + i] = if i % 5 == 0 {
15461                SAINT_MIN | (i as SaSint + 1)
15462            } else if i % 11 == 0 {
15463                0
15464            } else {
15465                i as SaSint + 1
15466            };
15467        }
15468        let mut threaded = scalar.clone();
15469        mark_distinct_lms_suffixes_32s_omp(&mut scalar, n as SaSint, m as SaSint, 1);
15470        mark_distinct_lms_suffixes_32s_omp(&mut threaded, n as SaSint, m as SaSint, 4);
15471        assert_eq!(&threaded[m..n], &scalar[m..n]);
15472
15473        let mut scalar = vec![0; n];
15474        for i in 0..(n >> 1) {
15475            scalar[m + i] = if i % 5 == 0 {
15476                SAINT_MIN | (i as SaSint + 1)
15477            } else {
15478                i as SaSint + 1
15479            };
15480        }
15481        let mut threaded = scalar.clone();
15482        clamp_lms_suffixes_length_32s_omp(&mut scalar, n as SaSint, m as SaSint, 1);
15483        clamp_lms_suffixes_length_32s_omp(&mut threaded, n as SaSint, m as SaSint, 4);
15484        assert_eq!(&threaded[m..n], &scalar[m..n]);
15485    }
15486
15487    #[test]
15488    fn libsais16_unique_nonunique_lms_helpers_match_c() {
15489        let m = 4;
15490        let mut rust_t = vec![0; 12];
15491        let mut rust_sa = vec![0; 12];
15492        rust_sa[..m].copy_from_slice(&[2, 4, 6, 8]);
15493        rust_sa[m + 1] = SAINT_MIN | 11;
15494        rust_sa[m + 2] = 22;
15495        rust_sa[m + 3] = SAINT_MIN | 33;
15496        rust_sa[m + 4] = 44;
15497        let mut c_t = rust_t.clone();
15498        let mut c_sa = rust_sa.clone();
15499
15500        let rust_f = renumber_unique_and_nonunique_lms_suffixes_32s(
15501            &mut rust_t,
15502            &mut rust_sa,
15503            m as SaSint,
15504            0,
15505            0,
15506            m as isize,
15507        );
15508        let c_f = unsafe {
15509            probe_libsais16_renumber_unique_and_nonunique_lms_suffixes_32s(
15510                c_t.as_mut_ptr(),
15511                c_sa.as_mut_ptr(),
15512                m as SaSint,
15513                0,
15514                0,
15515                m as SaSint,
15516            )
15517        };
15518        assert_eq!(rust_f, c_f);
15519        assert_eq!(rust_t, c_t);
15520        assert_eq!(rust_sa, c_sa);
15521
15522        let mut rust_sa = vec![0; 10];
15523        rust_sa[m..m + 4].copy_from_slice(&[SAINT_MIN | 3, 4, SAINT_MIN | 5, 6]);
15524        let mut c_sa = rust_sa.clone();
15525        let mut rust_l = m as isize;
15526        let mut rust_r = 10isize;
15527        let mut c_l = rust_l as SaSint;
15528        let mut c_r = rust_r as SaSint;
15529        compact_unique_and_nonunique_lms_suffixes_32s(
15530            &mut rust_sa,
15531            m as SaSint,
15532            &mut rust_l,
15533            &mut rust_r,
15534            0,
15535            4,
15536        );
15537        unsafe {
15538            probe_libsais16_compact_unique_and_nonunique_lms_suffixes_32s(
15539                c_sa.as_mut_ptr(),
15540                m as SaSint,
15541                &mut c_l,
15542                &mut c_r,
15543                0,
15544                4,
15545            );
15546        }
15547        assert_eq!(rust_l as SaSint, c_l);
15548        assert_eq!(rust_r as SaSint, c_r);
15549        assert_eq!(rust_sa, c_sa);
15550    }
15551
15552    #[test]
15553    fn libsais16_unique_nonunique_lms_omp_wrappers_match_c() {
15554        let n = 8;
15555        let m = 4;
15556        let fs = 4;
15557        let mut rust_t = vec![0; 12];
15558        let mut rust_sa = vec![0; 12];
15559        rust_sa[..m].copy_from_slice(&[2, 4, 6, 8]);
15560        rust_sa[m + 1] = SAINT_MIN | 11;
15561        rust_sa[m + 2] = 22;
15562        rust_sa[m + 3] = SAINT_MIN | 33;
15563        rust_sa[m + 4] = 44;
15564        let mut c_t = rust_t.clone();
15565        let mut c_sa = rust_sa.clone();
15566
15567        let rust_f = renumber_unique_and_nonunique_lms_suffixes_32s_omp(
15568            &mut rust_t,
15569            &mut rust_sa,
15570            m as SaSint,
15571            1,
15572        );
15573        let c_f = unsafe {
15574            probe_libsais16_renumber_unique_and_nonunique_lms_suffixes_32s_omp(
15575                c_t.as_mut_ptr(),
15576                c_sa.as_mut_ptr(),
15577                m as SaSint,
15578                1,
15579            )
15580        };
15581        assert_eq!(rust_f, c_f);
15582        assert_eq!(rust_t, c_t);
15583        assert_eq!(rust_sa, c_sa);
15584
15585        let mut rust_sa = vec![0; 12];
15586        rust_sa[m..m + 4].copy_from_slice(&[SAINT_MIN | 3, 4, SAINT_MIN | 5, 6]);
15587        rust_sa[m - 2..m].copy_from_slice(&[101, 102]);
15588        let mut c_sa = rust_sa.clone();
15589        compact_unique_and_nonunique_lms_suffixes_32s_omp(&mut rust_sa, n, m as SaSint, fs, 2, 1);
15590        unsafe {
15591            probe_libsais16_compact_unique_and_nonunique_lms_suffixes_32s_omp(
15592                c_sa.as_mut_ptr(),
15593                n,
15594                m as SaSint,
15595                fs,
15596                2,
15597                1,
15598            );
15599        }
15600        assert_eq!(rust_sa, c_sa);
15601
15602        let mut rust_t = vec![0; 12];
15603        let mut rust_sa = vec![0; 12];
15604        rust_sa[..m].copy_from_slice(&[2, 4, 6, 8]);
15605        rust_sa[m + 1] = SAINT_MIN | 11;
15606        rust_sa[m + 2] = 22;
15607        rust_sa[m + 3] = SAINT_MIN | 33;
15608        rust_sa[m + 4] = 44;
15609        let mut c_t = rust_t.clone();
15610        let mut c_sa = rust_sa.clone();
15611        let rust_f = compact_lms_suffixes_32s_omp(&mut rust_t, &mut rust_sa, n, m as SaSint, fs, 1);
15612        let c_f = unsafe {
15613            probe_libsais16_compact_lms_suffixes_32s_omp(
15614                c_t.as_mut_ptr(),
15615                c_sa.as_mut_ptr(),
15616                n,
15617                m as SaSint,
15618                fs,
15619                1,
15620            )
15621        };
15622        assert_eq!(rust_f, c_f);
15623        assert_eq!(rust_t, c_t);
15624        assert_eq!(rust_sa, c_sa);
15625    }
15626
15627    #[test]
15628    fn libsais16_unique_nonunique_lms_omp_wrappers_use_block_partition() {
15629        let m = 65_536usize;
15630        let mut scalar_t = vec![0; 2 * m];
15631        let mut scalar_sa = vec![0; 2 * m];
15632        for i in 0..m {
15633            scalar_sa[i] = (2 * i) as SaSint;
15634            scalar_sa[m + i] = if i % 5 == 0 {
15635                SAINT_MIN | (i as SaSint + 3)
15636            } else {
15637                i as SaSint + 3
15638            };
15639        }
15640        let mut threaded_t = scalar_t.clone();
15641        let mut threaded_sa = scalar_sa.clone();
15642
15643        let scalar_f = renumber_unique_and_nonunique_lms_suffixes_32s_omp(
15644            &mut scalar_t,
15645            &mut scalar_sa,
15646            m as SaSint,
15647            1,
15648        );
15649        let threaded_f = renumber_unique_and_nonunique_lms_suffixes_32s_omp(
15650            &mut threaded_t,
15651            &mut threaded_sa,
15652            m as SaSint,
15653            4,
15654        );
15655        assert_eq!(threaded_f, scalar_f);
15656        assert_eq!(threaded_t, scalar_t);
15657        assert_eq!(threaded_sa, scalar_sa);
15658
15659        let n = 131_072usize;
15660        let m = 4_096usize;
15661        let fs = 8_192usize;
15662        let mut scalar_sa = vec![0; n + fs];
15663        for i in 0..(n >> 1) {
15664            scalar_sa[m + i] = if i % 32 == 0 {
15665                SAINT_MIN | (i as SaSint + 1)
15666            } else {
15667                i as SaSint + 1
15668            };
15669        }
15670        let f = 1_024usize;
15671        for i in 0..f {
15672            scalar_sa[m - f + i] = 1_000_000 + i as SaSint;
15673        }
15674        let mut threaded_sa = scalar_sa.clone();
15675
15676        compact_unique_and_nonunique_lms_suffixes_32s_omp(
15677            &mut scalar_sa,
15678            n as SaSint,
15679            m as SaSint,
15680            fs as SaSint,
15681            f as SaSint,
15682            1,
15683        );
15684        compact_unique_and_nonunique_lms_suffixes_32s_omp(
15685            &mut threaded_sa,
15686            n as SaSint,
15687            m as SaSint,
15688            fs as SaSint,
15689            f as SaSint,
15690            4,
15691        );
15692        assert_eq!(&threaded_sa[..m], &scalar_sa[..m]);
15693        assert_eq!(
15694            &threaded_sa[n + fs - m..n + fs],
15695            &scalar_sa[n + fs - m..n + fs]
15696        );
15697    }
15698
15699    #[test]
15700    fn libsais16_merge_lms_helpers_match_c() {
15701        let n = 10;
15702        let m = 3;
15703        let mut rust_t = vec![0; n as usize];
15704        rust_t[1] = SAINT_MIN | 11;
15705        rust_t[3] = SAINT_MIN | 22;
15706        rust_t[7] = SAINT_MIN | 33;
15707        let mut rust_sa = vec![0; n as usize];
15708        rust_sa[6..10].copy_from_slice(&[2, 5, 8, 9]);
15709        let mut c_t = rust_t.clone();
15710        let mut c_sa = rust_sa.clone();
15711        merge_unique_lms_suffixes_32s(&mut rust_t, &mut rust_sa, n, m, 0, 0, n as isize);
15712        unsafe {
15713            probe_libsais16_merge_unique_lms_suffixes_32s(
15714                c_t.as_mut_ptr(),
15715                c_sa.as_mut_ptr(),
15716                n,
15717                m,
15718                0,
15719                0,
15720                n,
15721            );
15722        }
15723        assert_eq!(rust_t, c_t);
15724        assert_eq!(rust_sa, c_sa);
15725
15726        let n = 10;
15727        let m = 5;
15728        let mut rust_sa = vec![9, 0, 8, 0, 0, 7, 31, 32, 33, 34];
15729        let mut c_sa = rust_sa.clone();
15730        merge_nonunique_lms_suffixes_32s(&mut rust_sa, n, m, 2, 0, m as isize);
15731        unsafe {
15732            probe_libsais16_merge_nonunique_lms_suffixes_32s(c_sa.as_mut_ptr(), n, m, 2, 0, m);
15733        }
15734        assert_eq!(rust_sa, c_sa);
15735    }
15736
15737    #[test]
15738    fn libsais16_merge_lms_omp_wrappers_match_c() {
15739        let n = 12;
15740        let m = 4;
15741        let f = 2;
15742        let mut rust_t = vec![0; n as usize];
15743        rust_t[1] = SAINT_MIN | 11;
15744        rust_t[5] = SAINT_MIN | 22;
15745        let mut rust_sa = vec![0; n as usize];
15746        rust_sa[1] = 41;
15747        rust_sa[7..12].copy_from_slice(&[2, 6, 21, 22, 23]);
15748        let mut c_t = rust_t.clone();
15749        let mut c_sa = rust_sa.clone();
15750        merge_unique_lms_suffixes_32s_omp(&mut rust_t, &mut rust_sa, n, m, 1);
15751        unsafe {
15752            probe_libsais16_merge_unique_lms_suffixes_32s_omp(
15753                c_t.as_mut_ptr(),
15754                c_sa.as_mut_ptr(),
15755                n,
15756                m,
15757                1,
15758            );
15759        }
15760        assert_eq!(rust_t, c_t);
15761        assert_eq!(rust_sa, c_sa);
15762
15763        let mut rust_sa = vec![0, 41, 1, 0, 55, 66, 77, 2, 6, 21, 22, 23];
15764        let mut c_sa = rust_sa.clone();
15765        merge_nonunique_lms_suffixes_32s_omp(&mut rust_sa, n, m, f, 1);
15766        unsafe {
15767            probe_libsais16_merge_nonunique_lms_suffixes_32s_omp(c_sa.as_mut_ptr(), n, m, f, 1);
15768        }
15769        assert_eq!(rust_sa, c_sa);
15770
15771        let mut rust_t = vec![0; n as usize];
15772        rust_t[1] = SAINT_MIN | 11;
15773        rust_t[5] = SAINT_MIN | 22;
15774        let mut rust_sa = vec![0; n as usize];
15775        rust_sa[1] = 41;
15776        rust_sa[7..12].copy_from_slice(&[2, 6, 21, 22, 23]);
15777        let mut c_t = rust_t.clone();
15778        let mut c_sa = rust_sa.clone();
15779        merge_compacted_lms_suffixes_32s_omp(&mut rust_t, &mut rust_sa, n, m, f, 1);
15780        unsafe {
15781            probe_libsais16_merge_compacted_lms_suffixes_32s_omp(
15782                c_t.as_mut_ptr(),
15783                c_sa.as_mut_ptr(),
15784                n,
15785                m,
15786                f,
15787                1,
15788            );
15789        }
15790        assert_eq!(rust_t, c_t);
15791        assert_eq!(rust_sa, c_sa);
15792    }
15793
15794    #[test]
15795    fn libsais16_merge_lms_omp_wrappers_use_block_partition() {
15796        let n = 65_536usize;
15797        let m = 10_000usize;
15798        let mut scalar_t = vec![0; n];
15799        for i in (0..n).step_by(17) {
15800            scalar_t[i] = SAINT_MIN | (i as SaSint + 1);
15801        }
15802        let unique_count = scalar_t.iter().filter(|&&value| value < 0).count();
15803        let mut scalar_sa = vec![0; n];
15804        let source = n - m - 1;
15805        for i in 0..=unique_count {
15806            scalar_sa[source + i] = ((i * 13 + 7) % n) as SaSint;
15807        }
15808        let mut threaded_t = scalar_t.clone();
15809        let mut threaded_sa = scalar_sa.clone();
15810
15811        merge_unique_lms_suffixes_32s_omp(
15812            &mut scalar_t,
15813            &mut scalar_sa,
15814            n as SaSint,
15815            m as SaSint,
15816            1,
15817        );
15818        merge_unique_lms_suffixes_32s_omp(
15819            &mut threaded_t,
15820            &mut threaded_sa,
15821            n as SaSint,
15822            m as SaSint,
15823            4,
15824        );
15825        assert_eq!(threaded_t, scalar_t);
15826        assert_eq!(threaded_sa, scalar_sa);
15827
15828        let n = 131_072usize;
15829        let m = 65_536usize;
15830        let f = 100usize;
15831        let mut scalar_sa = vec![1; n];
15832        for i in (0..m).step_by(9) {
15833            scalar_sa[i] = 0;
15834        }
15835        let zero_count = scalar_sa[..m].iter().filter(|&&value| value == 0).count();
15836        let source = n - m - 1 + f;
15837        for i in 0..=zero_count {
15838            scalar_sa[source + i] = 2_000_000 + i as SaSint;
15839        }
15840        let mut threaded_sa = scalar_sa.clone();
15841
15842        merge_nonunique_lms_suffixes_32s_omp(
15843            &mut scalar_sa,
15844            n as SaSint,
15845            m as SaSint,
15846            f as SaSint,
15847            1,
15848        );
15849        merge_nonunique_lms_suffixes_32s_omp(
15850            &mut threaded_sa,
15851            n as SaSint,
15852            m as SaSint,
15853            f as SaSint,
15854            4,
15855        );
15856        assert_eq!(threaded_sa, scalar_sa);
15857    }
15858
15859    #[test]
15860    fn libsais16_radix_sort_lms_suffixes_32s_match_c() {
15861        let t = vec![0, 1, 2, 3, 1, 2, 3, 0];
15862        let mut rust_sa = vec![0, 0, 0, 0, 0, 1, 2, 3];
15863        let mut c_sa = rust_sa.clone();
15864        let mut rust_bucket = vec![0, 6, 7, 8];
15865        let mut c_bucket = rust_bucket.clone();
15866        radix_sort_lms_suffixes_32s_6k(&t, &mut rust_sa, &mut rust_bucket, 5, 3);
15867        unsafe {
15868            probe_libsais16_radix_sort_lms_suffixes_32s_6k(
15869                t.as_ptr(),
15870                c_sa.as_mut_ptr(),
15871                c_bucket.as_mut_ptr(),
15872                5,
15873                3,
15874            );
15875        }
15876        assert_eq!(rust_sa, c_sa);
15877        assert_eq!(rust_bucket, c_bucket);
15878
15879        let mut rust_sa = vec![0, 0, 0, 0, 0, 1, 2, 3];
15880        let mut c_sa = rust_sa.clone();
15881        let mut rust_bucket = vec![0, 0, 6, 0, 7, 0, 8, 0];
15882        let mut c_bucket = rust_bucket.clone();
15883        radix_sort_lms_suffixes_32s_2k(&t, &mut rust_sa, &mut rust_bucket, 5, 3);
15884        unsafe {
15885            probe_libsais16_radix_sort_lms_suffixes_32s_2k(
15886                t.as_ptr(),
15887                c_sa.as_mut_ptr(),
15888                c_bucket.as_mut_ptr(),
15889                5,
15890                3,
15891            );
15892        }
15893        assert_eq!(rust_sa, c_sa);
15894        assert_eq!(rust_bucket, c_bucket);
15895
15896        let mut cache = vec![ThreadCache::default(); 8];
15897        let sa = vec![0, 0, 0, 0, 0, 1, 2, 3];
15898        radix_sort_lms_suffixes_32s_block_gather(&t, &sa, &mut cache, 5, 3);
15899        assert_eq!(cache[5].index, 1);
15900        assert_eq!(cache[5].symbol, 1);
15901        assert_eq!(cache[6].index, 2);
15902        assert_eq!(cache[6].symbol, 2);
15903        assert_eq!(cache[7].index, 3);
15904        assert_eq!(cache[7].symbol, 3);
15905
15906        let mut bucket = vec![0, 6, 7, 8];
15907        radix_sort_lms_suffixes_32s_6k_block_sort(&mut bucket, &mut cache, 5, 3);
15908        assert_eq!(bucket, vec![0, 5, 6, 7]);
15909        assert_eq!(cache[5].symbol, 5);
15910        assert_eq!(cache[6].symbol, 6);
15911        assert_eq!(cache[7].symbol, 7);
15912
15913        let mut cache = vec![ThreadCache::default(); 8];
15914        radix_sort_lms_suffixes_32s_block_gather(&t, &sa, &mut cache, 5, 3);
15915        let mut bucket = vec![0, 0, 6, 0, 7, 0, 8, 0];
15916        radix_sort_lms_suffixes_32s_2k_block_sort(&mut bucket, &mut cache, 5, 3);
15917        assert_eq!(bucket, vec![0, 0, 5, 0, 6, 0, 7, 0]);
15918        assert_eq!(cache[5].symbol, 5);
15919        assert_eq!(cache[6].symbol, 6);
15920        assert_eq!(cache[7].symbol, 7);
15921
15922        let mut rust_sa = vec![0, 0, 0, 0, 0, 1, 2, 3];
15923        let mut c_sa = rust_sa.clone();
15924        let mut rust_bucket = vec![0, 6, 7, 8];
15925        let mut c_bucket = rust_bucket.clone();
15926        radix_sort_lms_suffixes_32s_6k_omp(&t, &mut rust_sa, 8, 4, &mut rust_bucket, 1);
15927        unsafe {
15928            probe_libsais16_radix_sort_lms_suffixes_32s_6k_omp(
15929                t.as_ptr(),
15930                c_sa.as_mut_ptr(),
15931                8,
15932                4,
15933                c_bucket.as_mut_ptr(),
15934                1,
15935            );
15936        }
15937        assert_eq!(rust_sa, c_sa);
15938        assert_eq!(rust_bucket, c_bucket);
15939
15940        let mut rust_sa = vec![0, 0, 0, 0, 0, 1, 2, 3];
15941        let mut c_sa = rust_sa.clone();
15942        let mut rust_bucket = vec![0, 0, 6, 0, 7, 0, 8, 0];
15943        let mut c_bucket = rust_bucket.clone();
15944        radix_sort_lms_suffixes_32s_2k_omp(&t, &mut rust_sa, 8, 4, &mut rust_bucket, 1);
15945        unsafe {
15946            probe_libsais16_radix_sort_lms_suffixes_32s_2k_omp(
15947                t.as_ptr(),
15948                c_sa.as_mut_ptr(),
15949                8,
15950                4,
15951                c_bucket.as_mut_ptr(),
15952                1,
15953            );
15954        }
15955        assert_eq!(rust_sa, c_sa);
15956        assert_eq!(rust_bucket, c_bucket);
15957
15958        let t = vec![2, 1, 3, 1, 0];
15959        let mut rust_sa = vec![0; t.len()];
15960        let mut c_sa = rust_sa.clone();
15961        let mut rust_bucket = vec![0, 2, 4, 5];
15962        let mut c_bucket = rust_bucket.clone();
15963        let rust_m =
15964            radix_sort_lms_suffixes_32s_1k(&t, &mut rust_sa, t.len() as SaSint, &mut rust_bucket);
15965        let c_m = unsafe {
15966            probe_libsais16_radix_sort_lms_suffixes_32s_1k(
15967                t.as_ptr(),
15968                c_sa.as_mut_ptr(),
15969                t.len() as SaSint,
15970                c_bucket.as_mut_ptr(),
15971            )
15972        };
15973        assert_eq!(rust_m, c_m);
15974        assert_eq!(rust_sa, c_sa);
15975        assert_eq!(rust_bucket, c_bucket);
15976    }
15977
15978    #[test]
15979    fn libsais16_radix_sort_set_markers_32s_match_c() {
15980        let mut rust_sa = vec![0; 8];
15981        let mut c_sa = rust_sa.clone();
15982        let mut induction_bucket = vec![1, 3, 5, 7];
15983        radix_sort_set_markers_32s_6k(&mut rust_sa, &induction_bucket, 0, 4);
15984        unsafe {
15985            probe_libsais16_radix_sort_set_markers_32s_6k(
15986                c_sa.as_mut_ptr(),
15987                induction_bucket.as_mut_ptr(),
15988                0,
15989                4,
15990            );
15991        }
15992        assert_eq!(rust_sa, c_sa);
15993
15994        let mut rust_sa = vec![0; 8];
15995        let mut c_sa = rust_sa.clone();
15996        radix_sort_set_markers_32s_6k_omp(&mut rust_sa, 5, &induction_bucket, 1);
15997        unsafe {
15998            probe_libsais16_radix_sort_set_markers_32s_6k_omp(
15999                c_sa.as_mut_ptr(),
16000                5,
16001                induction_bucket.as_mut_ptr(),
16002                1,
16003            );
16004        }
16005        assert_eq!(rust_sa, c_sa);
16006
16007        let mut rust_sa = vec![0; 8];
16008        let mut c_sa = rust_sa.clone();
16009        let mut induction_bucket = vec![1, 0, 3, 0, 5, 0, 7, 0];
16010        radix_sort_set_markers_32s_4k(&mut rust_sa, &induction_bucket, 0, 4);
16011        unsafe {
16012            probe_libsais16_radix_sort_set_markers_32s_4k(
16013                c_sa.as_mut_ptr(),
16014                induction_bucket.as_mut_ptr(),
16015                0,
16016                4,
16017            );
16018        }
16019        assert_eq!(rust_sa, c_sa);
16020
16021        let mut rust_sa = vec![0; 8];
16022        let mut c_sa = rust_sa.clone();
16023        radix_sort_set_markers_32s_4k_omp(&mut rust_sa, 5, &induction_bucket, 1);
16024        unsafe {
16025            probe_libsais16_radix_sort_set_markers_32s_4k_omp(
16026                c_sa.as_mut_ptr(),
16027                5,
16028                induction_bucket.as_mut_ptr(),
16029                1,
16030            );
16031        }
16032        assert_eq!(rust_sa, c_sa);
16033    }
16034
16035    #[test]
16036    fn libsais16_radix_sort_set_markers_32s_omp_partitions_large_inputs() {
16037        let k = 65_600usize;
16038        let induction_bucket_6k: Vec<SaSint> = (0..k).map(|i| i as SaSint).collect();
16039        let mut single = vec![0; k];
16040        let mut threaded = vec![0; k];
16041        radix_sort_set_markers_32s_6k_omp(&mut single, k as SaSint, &induction_bucket_6k, 1);
16042        radix_sort_set_markers_32s_6k_omp(&mut threaded, k as SaSint, &induction_bucket_6k, 4);
16043        assert_eq!(threaded, single);
16044
16045        let mut induction_bucket_4k = vec![0; 2 * k];
16046        for i in 0..k {
16047            induction_bucket_4k[buckets_index2(i, 0)] = i as SaSint;
16048        }
16049        let mut single = vec![0; k];
16050        let mut threaded = vec![0; k];
16051        radix_sort_set_markers_32s_4k_omp(&mut single, k as SaSint, &induction_bucket_4k, 1);
16052        radix_sort_set_markers_32s_4k_omp(&mut threaded, k as SaSint, &induction_bucket_4k, 4);
16053        assert_eq!(threaded, single);
16054    }
16055
16056    #[test]
16057    fn libsais16_partial_sorting_32s_helpers_match_c() {
16058        let k = 3;
16059        let mut rust_sa = vec![0, SAINT_MIN, 2, SAINT_MIN, 4, SAINT_MIN];
16060        let mut c_sa = rust_sa.clone();
16061        let mut buckets = vec![0; 6 * k as usize];
16062        buckets[buckets_index4(1, 0)] = 3;
16063        buckets[buckets_index4(2, 0)] = 6;
16064        buckets[4 * k as usize + buckets_index2(0, 0)] = 0;
16065        buckets[4 * k as usize + buckets_index2(1, 0)] = 1;
16066        partial_sorting_shift_markers_32s_6k_omp(&mut rust_sa, k, &buckets, 1);
16067        unsafe {
16068            probe_libsais16_partial_sorting_shift_markers_32s_6k_omp(
16069                c_sa.as_mut_ptr(),
16070                k,
16071                buckets.as_ptr(),
16072                1,
16073            );
16074        }
16075        assert_eq!(rust_sa, c_sa);
16076
16077        let mut rust_sa = vec![
16078            1 | SUFFIX_GROUP_MARKER,
16079            2,
16080            3 | SUFFIX_GROUP_MARKER,
16081            4 | SUFFIX_GROUP_MARKER,
16082            5,
16083            6,
16084        ];
16085        let mut c_sa = rust_sa.clone();
16086        partial_sorting_shift_markers_32s_4k(&mut rust_sa, 6);
16087        unsafe { probe_libsais16_partial_sorting_shift_markers_32s_4k(c_sa.as_mut_ptr(), 6) };
16088        assert_eq!(rust_sa, c_sa);
16089
16090        let mut rust_buckets = vec![0; 6 * k as usize];
16091        for (i, value) in rust_buckets[4 * k as usize..].iter_mut().enumerate() {
16092            *value = 100 + i as SaSint;
16093        }
16094        let mut c_buckets = rust_buckets.clone();
16095        partial_sorting_shift_buckets_32s_6k(k, &mut rust_buckets);
16096        unsafe { probe_libsais16_partial_sorting_shift_buckets_32s_6k(k, c_buckets.as_mut_ptr()) };
16097        assert_eq!(rust_buckets, c_buckets);
16098
16099        let mut rust_sa = vec![1 | SUFFIX_GROUP_MARKER, -3, 5 | SUFFIX_GROUP_MARKER, -7];
16100        let mut c_sa = rust_sa.clone();
16101        let rust_l = partial_sorting_gather_lms_suffixes_32s_4k(&mut rust_sa, 0, 4);
16102        let c_l = unsafe {
16103            probe_libsais16_partial_sorting_gather_lms_suffixes_32s_4k(c_sa.as_mut_ptr(), 0, 4)
16104        };
16105        assert_eq!(rust_l, c_l);
16106        assert_eq!(rust_sa, c_sa);
16107
16108        let mut rust_sa = vec![1, -3, 5, -7];
16109        let mut c_sa = rust_sa.clone();
16110        let rust_l = partial_sorting_gather_lms_suffixes_32s_1k(&mut rust_sa, 0, 4);
16111        let c_l = unsafe {
16112            probe_libsais16_partial_sorting_gather_lms_suffixes_32s_1k(c_sa.as_mut_ptr(), 0, 4)
16113        };
16114        assert_eq!(rust_l, c_l);
16115        assert_eq!(rust_sa, c_sa);
16116
16117        let mut rust_state = alloc_thread_state(1).unwrap();
16118        let mut rust_sa = vec![1 | SUFFIX_GROUP_MARKER, -3, 5 | SUFFIX_GROUP_MARKER, -7];
16119        let mut c_sa = rust_sa.clone();
16120        partial_sorting_gather_lms_suffixes_32s_4k_omp(&mut rust_sa, 4, 1, &mut rust_state);
16121        unsafe {
16122            probe_libsais16_partial_sorting_gather_lms_suffixes_32s_4k_omp(c_sa.as_mut_ptr(), 4, 1);
16123        }
16124        assert_eq!(rust_sa, c_sa);
16125
16126        let mut rust_state = alloc_thread_state(1).unwrap();
16127        let mut rust_sa = vec![1, -3, 5, -7];
16128        let mut c_sa = rust_sa.clone();
16129        partial_sorting_gather_lms_suffixes_32s_1k_omp(&mut rust_sa, 4, 1, &mut rust_state);
16130        unsafe {
16131            probe_libsais16_partial_sorting_gather_lms_suffixes_32s_1k_omp(c_sa.as_mut_ptr(), 4, 1);
16132        }
16133        assert_eq!(rust_sa, c_sa);
16134    }
16135
16136    #[test]
16137    fn libsais16_partial_sorting_gather_lms_suffixes_32s_omp_uses_block_partition() {
16138        let n = 65_536usize;
16139        let mut base_4k = vec![0; n];
16140        let mut base_1k = vec![0; n];
16141        for i in 0..n {
16142            let value = (i as SaSint + 1) & SAINT_MAX;
16143            base_4k[i] = if i % 7 == 0 {
16144                value | SAINT_MIN | SUFFIX_GROUP_MARKER
16145            } else if i % 11 == 0 {
16146                value | SUFFIX_GROUP_MARKER
16147            } else {
16148                value
16149            };
16150            base_1k[i] = if i % 7 == 0 { value | SAINT_MIN } else { value };
16151        }
16152        let lms_count = base_1k.iter().filter(|&&v| v < 0).count();
16153
16154        let mut scalar = base_4k.clone();
16155        let mut threaded = base_4k;
16156        let mut scalar_state = alloc_thread_state(1).unwrap();
16157        let mut threaded_state = alloc_thread_state(4).unwrap();
16158        partial_sorting_gather_lms_suffixes_32s_4k_omp(
16159            &mut scalar,
16160            n as SaSint,
16161            1,
16162            &mut scalar_state,
16163        );
16164        partial_sorting_gather_lms_suffixes_32s_4k_omp(
16165            &mut threaded,
16166            n as SaSint,
16167            4,
16168            &mut threaded_state,
16169        );
16170        assert_eq!(&threaded[..lms_count], &scalar[..lms_count]);
16171
16172        let mut scalar = base_1k.clone();
16173        let mut threaded = base_1k;
16174        partial_sorting_gather_lms_suffixes_32s_1k_omp(
16175            &mut scalar,
16176            n as SaSint,
16177            1,
16178            &mut scalar_state,
16179        );
16180        partial_sorting_gather_lms_suffixes_32s_1k_omp(
16181            &mut threaded,
16182            n as SaSint,
16183            4,
16184            &mut threaded_state,
16185        );
16186        assert_eq!(&threaded[..lms_count], &scalar[..lms_count]);
16187    }
16188
16189    #[test]
16190    fn libsais16_partial_sorting_32s_block_helpers_behave_like_upstream_shapes() {
16191        let t = vec![0, 1, 2, 1, 0];
16192        let k = 3;
16193
16194        let mut sa = vec![0, 4 | SAINT_MIN, 0];
16195        let mut cache = vec![ThreadCache::default(); sa.len()];
16196        partial_sorting_scan_right_to_left_32s_6k_block_gather(&t, &mut sa, &mut cache, 1, 1);
16197        assert_eq!(cache[1].index, 4 | SAINT_MIN);
16198        assert_eq!(cache[1].symbol, buckets_index4(1, 1) as SaSint);
16199
16200        let mut sa = vec![0, 4 | SUFFIX_GROUP_MARKER, 0];
16201        let mut cache = vec![ThreadCache::default(); sa.len()];
16202        partial_sorting_scan_right_to_left_32s_4k_block_gather(&t, &mut sa, &mut cache, 1, 1);
16203        assert_eq!(sa[1], 0);
16204        assert_eq!(cache[1].index, 4 | SUFFIX_GROUP_MARKER);
16205        assert_eq!(cache[1].symbol, buckets_index2(1, 1) as SaSint);
16206
16207        let mut sa = vec![0, 4, 0];
16208        let mut cache = vec![ThreadCache::default(); sa.len()];
16209        partial_sorting_scan_right_to_left_32s_1k_block_gather(&t, &mut sa, &mut cache, 1, 1);
16210        assert_eq!(sa[1], 0);
16211        assert_eq!(cache[1].index, 3 | SAINT_MIN);
16212        assert_eq!(cache[1].symbol, 1);
16213
16214        let mut sa = vec![4 | SAINT_MIN, 0, 0];
16215        let mut cache = vec![ThreadCache::default(); sa.len()];
16216        partial_sorting_scan_left_to_right_32s_6k_block_gather(&t, &mut sa, &mut cache, 0, 1);
16217        assert_eq!(cache[0].index, 4 | SAINT_MIN);
16218        assert_eq!(cache[0].symbol, buckets_index4(1, 1) as SaSint);
16219
16220        let mut sa = vec![4 | SUFFIX_GROUP_MARKER, 0, 0];
16221        let mut cache = vec![ThreadCache::default(); sa.len()];
16222        partial_sorting_scan_left_to_right_32s_4k_block_gather(&t, &mut sa, &mut cache, 0, 1);
16223        assert_eq!(sa[0], 0);
16224        assert_eq!(cache[0].index, 4 | SUFFIX_GROUP_MARKER);
16225        assert_eq!(cache[0].symbol, buckets_index2(1, 0) as SaSint);
16226
16227        let mut sa = vec![4, 0, 0];
16228        let mut cache = vec![ThreadCache::default(); sa.len()];
16229        partial_sorting_scan_left_to_right_32s_1k_block_gather(&t, &mut sa, &mut cache, 0, 1);
16230        assert_eq!(sa[0], 0);
16231        assert_eq!(cache[0].index, 3);
16232        assert_eq!(cache[0].symbol, 1);
16233
16234        let mut cache = vec![ThreadCache::default(); 3];
16235        cache[1].index = 4 | SAINT_MIN;
16236        cache[1].symbol = buckets_index4(1, 1) as SaSint;
16237        let mut buckets = vec![0; 4 * k];
16238        buckets[buckets_index4(1, 1)] = 2;
16239        let d = partial_sorting_scan_right_to_left_32s_6k_block_sort(
16240            &t,
16241            &mut buckets,
16242            0,
16243            &mut cache,
16244            1,
16245            1,
16246        );
16247        assert_eq!(d, 1);
16248        assert_eq!(cache[1].index, 3 | SAINT_MIN);
16249        assert_eq!(buckets[buckets_index4(1, 1)], 1);
16250        assert_eq!(buckets[buckets_index4(1, 1) + 2], 1);
16251
16252        let mut cache = vec![ThreadCache::default(); 3];
16253        cache[0].index = 4 | SAINT_MIN;
16254        cache[0].symbol = buckets_index4(1, 1) as SaSint;
16255        let mut buckets = vec![0; 4 * k];
16256        buckets[buckets_index4(1, 1)] = 1;
16257        let d = partial_sorting_scan_left_to_right_32s_6k_block_sort(
16258            &t,
16259            &mut buckets,
16260            0,
16261            &mut cache,
16262            0,
16263            1,
16264        );
16265        assert_eq!(d, 1);
16266        assert_eq!(cache[0].index, 3 | SAINT_MIN);
16267        assert_eq!(buckets[buckets_index4(1, 1)], 2);
16268        assert_eq!(buckets[buckets_index4(1, 1) + 2], 1);
16269
16270        let mut cache = vec![ThreadCache::default(); 3];
16271        cache[1].index = 4 | SUFFIX_GROUP_MARKER;
16272        cache[1].symbol = buckets_index2(1, 1) as SaSint;
16273        let mut buckets = vec![0; 4 * k];
16274        buckets[3 * k + 1] = 2;
16275        let d = partial_sorting_scan_right_to_left_32s_4k_block_sort(
16276            &t,
16277            k as SaSint,
16278            &mut buckets,
16279            0,
16280            &mut cache,
16281            1,
16282            1,
16283        );
16284        assert_eq!(d, 1);
16285        assert_eq!(cache[1].symbol, 1);
16286        assert_eq!(buckets[3 * k + 1], 1);
16287
16288        let mut cache = vec![ThreadCache::default(); 3];
16289        cache[0].index = 4 | SUFFIX_GROUP_MARKER;
16290        cache[0].symbol = buckets_index2(1, 0) as SaSint;
16291        let mut buckets = vec![0; 4 * k];
16292        buckets[2 * k + 1] = 1;
16293        let d = partial_sorting_scan_left_to_right_32s_4k_block_sort(
16294            &t,
16295            k as SaSint,
16296            &mut buckets,
16297            0,
16298            &mut cache,
16299            0,
16300            1,
16301        );
16302        assert_eq!(d, 1);
16303        assert_eq!(cache[0].symbol, 1);
16304        assert_eq!(buckets[2 * k + 1], 2);
16305
16306        let mut cache = vec![ThreadCache::default(); 3];
16307        cache[1].index = 4;
16308        cache[1].symbol = 1;
16309        let mut buckets = vec![0; k];
16310        buckets[1] = 2;
16311        partial_sorting_scan_right_to_left_32s_1k_block_sort(&t, &mut buckets, &mut cache, 1, 1);
16312        assert_eq!(cache[1].symbol, 1);
16313        assert_eq!(buckets[1], 1);
16314
16315        let mut cache = vec![ThreadCache::default(); 3];
16316        cache[0].index = 4;
16317        cache[0].symbol = 1;
16318        let mut buckets = vec![0; k];
16319        buckets[1] = 1;
16320        partial_sorting_scan_left_to_right_32s_1k_block_sort(&t, &mut buckets, &mut cache, 0, 1);
16321        assert_eq!(cache[0].symbol, 1);
16322        assert_eq!(buckets[1], 2);
16323    }
16324
16325    #[test]
16326    fn libsais16_partial_sorting_scan_32s_match_c() {
16327        let t = vec![0, 1, 2, 1, 3, 0];
16328        let k = 4;
16329
16330        let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 9, 9];
16331        let mut c_sa = rust_sa.clone();
16332        let mut rust_buckets = vec![0; 6 * k as usize];
16333        rust_buckets[buckets_index4(2, 0)] = 4;
16334        rust_buckets[buckets_index4(1, 1)] = 5;
16335        let mut c_buckets = rust_buckets.clone();
16336        let rust_d =
16337            partial_sorting_scan_left_to_right_32s_6k(&t, &mut rust_sa, &mut rust_buckets, 0, 0, 2);
16338        let c_d = unsafe {
16339            probe_libsais16_partial_sorting_scan_left_to_right_32s_6k(
16340                t.as_ptr(),
16341                c_sa.as_mut_ptr(),
16342                c_buckets.as_mut_ptr(),
16343                0,
16344                0,
16345                2,
16346            )
16347        };
16348        assert_eq!(rust_d, c_d);
16349        assert_eq!(rust_sa, c_sa);
16350        assert_eq!(rust_buckets, c_buckets);
16351
16352        let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 9, 9];
16353        let mut c_sa = rust_sa.clone();
16354        let mut rust_buckets = vec![0; 4 * k as usize];
16355        rust_buckets[2 * k as usize + 2] = 4;
16356        rust_buckets[2 * k as usize + 1] = 5;
16357        let mut c_buckets = rust_buckets.clone();
16358        let rust_d = partial_sorting_scan_left_to_right_32s_4k(
16359            &t,
16360            &mut rust_sa,
16361            k,
16362            &mut rust_buckets,
16363            0,
16364            0,
16365            2,
16366        );
16367        let c_d = unsafe {
16368            probe_libsais16_partial_sorting_scan_left_to_right_32s_4k(
16369                t.as_ptr(),
16370                c_sa.as_mut_ptr(),
16371                k,
16372                c_buckets.as_mut_ptr(),
16373                0,
16374                0,
16375                2,
16376            )
16377        };
16378        assert_eq!(rust_d, c_d);
16379        assert_eq!(rust_sa, c_sa);
16380        assert_eq!(rust_buckets, c_buckets);
16381
16382        let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 9, 9];
16383        let mut c_sa = rust_sa.clone();
16384        let mut rust_buckets = vec![0, 5, 4, 0];
16385        let mut c_buckets = rust_buckets.clone();
16386        partial_sorting_scan_left_to_right_32s_1k(&t, &mut rust_sa, &mut rust_buckets, 0, 2);
16387        unsafe {
16388            probe_libsais16_partial_sorting_scan_left_to_right_32s_1k(
16389                t.as_ptr(),
16390                c_sa.as_mut_ptr(),
16391                c_buckets.as_mut_ptr(),
16392                0,
16393                2,
16394            );
16395        }
16396        assert_eq!(rust_sa, c_sa);
16397        assert_eq!(rust_buckets, c_buckets);
16398
16399        let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 9, 9];
16400        let mut c_sa = rust_sa.clone();
16401        let mut rust_buckets = vec![0; 6 * k as usize];
16402        rust_buckets[buckets_index4(2, 0)] = 7;
16403        rust_buckets[buckets_index4(1, 1)] = 6;
16404        let mut c_buckets = rust_buckets.clone();
16405        let rust_d =
16406            partial_sorting_scan_right_to_left_32s_6k(&t, &mut rust_sa, &mut rust_buckets, 0, 0, 2);
16407        let c_d = unsafe {
16408            probe_libsais16_partial_sorting_scan_right_to_left_32s_6k(
16409                t.as_ptr(),
16410                c_sa.as_mut_ptr(),
16411                c_buckets.as_mut_ptr(),
16412                0,
16413                0,
16414                2,
16415            )
16416        };
16417        assert_eq!(rust_d, c_d);
16418        assert_eq!(rust_sa, c_sa);
16419        assert_eq!(rust_buckets, c_buckets);
16420
16421        let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 9, 9];
16422        let mut c_sa = rust_sa.clone();
16423        let mut rust_buckets = vec![0; 4 * k as usize];
16424        rust_buckets[3 * k as usize + 2] = 7;
16425        rust_buckets[3 * k as usize + 1] = 6;
16426        let mut c_buckets = rust_buckets.clone();
16427        let rust_d = partial_sorting_scan_right_to_left_32s_4k(
16428            &t,
16429            &mut rust_sa,
16430            k,
16431            &mut rust_buckets,
16432            0,
16433            0,
16434            2,
16435        );
16436        let c_d = unsafe {
16437            probe_libsais16_partial_sorting_scan_right_to_left_32s_4k(
16438                t.as_ptr(),
16439                c_sa.as_mut_ptr(),
16440                k,
16441                c_buckets.as_mut_ptr(),
16442                0,
16443                0,
16444                2,
16445            )
16446        };
16447        assert_eq!(rust_d, c_d);
16448        assert_eq!(rust_sa, c_sa);
16449        assert_eq!(rust_buckets, c_buckets);
16450
16451        let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 9, 9];
16452        let mut c_sa = rust_sa.clone();
16453        let mut rust_buckets = vec![0, 6, 7, 0];
16454        let mut c_buckets = rust_buckets.clone();
16455        partial_sorting_scan_right_to_left_32s_1k(&t, &mut rust_sa, &mut rust_buckets, 0, 2);
16456        unsafe {
16457            probe_libsais16_partial_sorting_scan_right_to_left_32s_1k(
16458                t.as_ptr(),
16459                c_sa.as_mut_ptr(),
16460                c_buckets.as_mut_ptr(),
16461                0,
16462                2,
16463            );
16464        }
16465        assert_eq!(rust_sa, c_sa);
16466        assert_eq!(rust_buckets, c_buckets);
16467
16468        let mut state = alloc_thread_state(1).unwrap();
16469        let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 7, 9];
16470        let mut c_sa = rust_sa.clone();
16471        let mut rust_buckets = vec![0; 6 * k as usize];
16472        rust_buckets[buckets_index4(2, 0)] = 4;
16473        rust_buckets[buckets_index4(1, 1)] = 5;
16474        rust_buckets[buckets_index4(3, 0)] = 6;
16475        let mut c_buckets = rust_buckets.clone();
16476        let rust_d = partial_sorting_scan_left_to_right_32s_6k_omp(
16477            &t,
16478            &mut rust_sa,
16479            5,
16480            &mut rust_buckets,
16481            2,
16482            0,
16483            1,
16484            &mut state,
16485        );
16486        let c_d = unsafe {
16487            probe_libsais16_partial_sorting_scan_left_to_right_32s_6k_omp(
16488                t.as_ptr(),
16489                c_sa.as_mut_ptr(),
16490                5,
16491                c_buckets.as_mut_ptr(),
16492                2,
16493                0,
16494                1,
16495            )
16496        };
16497        assert_eq!(rust_d, c_d);
16498        assert_eq!(rust_sa, c_sa);
16499        assert_eq!(rust_buckets, c_buckets);
16500
16501        let mut state = alloc_thread_state(1).unwrap();
16502        let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 7, 9];
16503        let mut c_sa = rust_sa.clone();
16504        let mut rust_buckets = vec![0; 4 * k as usize];
16505        rust_buckets[2 * k as usize + 2] = 4;
16506        rust_buckets[2 * k as usize + 1] = 5;
16507        rust_buckets[2 * k as usize + 3] = 6;
16508        let mut c_buckets = rust_buckets.clone();
16509        let rust_d = partial_sorting_scan_left_to_right_32s_4k_omp(
16510            &t,
16511            &mut rust_sa,
16512            5,
16513            k,
16514            &mut rust_buckets,
16515            0,
16516            1,
16517            &mut state,
16518        );
16519        let c_d = unsafe {
16520            probe_libsais16_partial_sorting_scan_left_to_right_32s_4k_omp(
16521                t.as_ptr(),
16522                c_sa.as_mut_ptr(),
16523                5,
16524                k,
16525                c_buckets.as_mut_ptr(),
16526                0,
16527                1,
16528            )
16529        };
16530        assert_eq!(rust_d, c_d);
16531        assert_eq!(rust_sa, c_sa);
16532        assert_eq!(rust_buckets, c_buckets);
16533
16534        let mut state = alloc_thread_state(1).unwrap();
16535        let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 7, 9];
16536        let mut c_sa = rust_sa.clone();
16537        let mut rust_buckets = vec![0, 5, 4, 6];
16538        let mut c_buckets = rust_buckets.clone();
16539        partial_sorting_scan_left_to_right_32s_1k_omp(
16540            &t,
16541            &mut rust_sa,
16542            5,
16543            &mut rust_buckets,
16544            1,
16545            &mut state,
16546        );
16547        unsafe {
16548            probe_libsais16_partial_sorting_scan_left_to_right_32s_1k_omp(
16549                t.as_ptr(),
16550                c_sa.as_mut_ptr(),
16551                5,
16552                c_buckets.as_mut_ptr(),
16553                1,
16554            );
16555        }
16556        assert_eq!(rust_sa, c_sa);
16557        assert_eq!(rust_buckets, c_buckets);
16558
16559        let mut state = alloc_thread_state(1).unwrap();
16560        let mut rust_sa = vec![0, 0, 3, 4, 9, 9, 9, 9];
16561        let mut c_sa = rust_sa.clone();
16562        let mut rust_buckets = vec![0; 6 * k as usize];
16563        rust_buckets[buckets_index4(2, 0)] = 7;
16564        rust_buckets[buckets_index4(1, 1)] = 6;
16565        let mut c_buckets = rust_buckets.clone();
16566        let rust_d = partial_sorting_scan_right_to_left_32s_6k_omp(
16567            &t,
16568            &mut rust_sa,
16569            5,
16570            &mut rust_buckets,
16571            1,
16572            1,
16573            0,
16574            1,
16575            &mut state,
16576        );
16577        let c_d = unsafe {
16578            probe_libsais16_partial_sorting_scan_right_to_left_32s_6k_omp(
16579                t.as_ptr(),
16580                c_sa.as_mut_ptr(),
16581                5,
16582                c_buckets.as_mut_ptr(),
16583                1,
16584                1,
16585                0,
16586                1,
16587            )
16588        };
16589        assert_eq!(rust_d, c_d);
16590        assert_eq!(rust_sa, c_sa);
16591        assert_eq!(rust_buckets, c_buckets);
16592
16593        let mut state = alloc_thread_state(1).unwrap();
16594        let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 9, 9];
16595        let mut c_sa = rust_sa.clone();
16596        let mut rust_buckets = vec![0; 4 * k as usize];
16597        rust_buckets[3 * k as usize + 2] = 7;
16598        rust_buckets[3 * k as usize + 1] = 6;
16599        let mut c_buckets = rust_buckets.clone();
16600        let rust_d = partial_sorting_scan_right_to_left_32s_4k_omp(
16601            &t,
16602            &mut rust_sa,
16603            2,
16604            k,
16605            &mut rust_buckets,
16606            0,
16607            1,
16608            &mut state,
16609        );
16610        let c_d = unsafe {
16611            probe_libsais16_partial_sorting_scan_right_to_left_32s_4k_omp(
16612                t.as_ptr(),
16613                c_sa.as_mut_ptr(),
16614                2,
16615                k,
16616                c_buckets.as_mut_ptr(),
16617                0,
16618                1,
16619            )
16620        };
16621        assert_eq!(rust_d, c_d);
16622        assert_eq!(rust_sa, c_sa);
16623        assert_eq!(rust_buckets, c_buckets);
16624
16625        let mut state = alloc_thread_state(1).unwrap();
16626        let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 9, 9];
16627        let mut c_sa = rust_sa.clone();
16628        let mut rust_buckets = vec![0, 6, 7, 0];
16629        let mut c_buckets = rust_buckets.clone();
16630        partial_sorting_scan_right_to_left_32s_1k_omp(
16631            &t,
16632            &mut rust_sa,
16633            2,
16634            &mut rust_buckets,
16635            1,
16636            &mut state,
16637        );
16638        unsafe {
16639            probe_libsais16_partial_sorting_scan_right_to_left_32s_1k_omp(
16640                t.as_ptr(),
16641                c_sa.as_mut_ptr(),
16642                2,
16643                c_buckets.as_mut_ptr(),
16644                1,
16645            );
16646        }
16647        assert_eq!(rust_sa, c_sa);
16648        assert_eq!(rust_buckets, c_buckets);
16649    }
16650
16651    #[test]
16652    fn libsais16_place_lms_suffixes_histogram_32s_match_c() {
16653        let n = 12;
16654        let k = 4;
16655        let m = 4;
16656        let mut rust_sa = vec![101, 102, 103, 104, 9, 9, 9, 9, 9, 9, 9, 9];
16657        let mut c_sa = rust_sa.clone();
16658        let mut buckets = vec![0; 2 * k as usize];
16659        buckets[buckets_index2(1, 0)] = 7;
16660        buckets[buckets_index2(1, 1)] = 2;
16661        buckets[buckets_index2(2, 0)] = 10;
16662        buckets[buckets_index2(2, 1)] = 1;
16663        place_lms_suffixes_histogram_32s_2k(&mut rust_sa, n, k, m, &buckets);
16664        unsafe {
16665            probe_libsais16_place_lms_suffixes_histogram_32s_2k(
16666                c_sa.as_mut_ptr(),
16667                n,
16668                k,
16669                m,
16670                buckets.as_ptr(),
16671            );
16672        }
16673        assert_eq!(rust_sa, c_sa);
16674
16675        let mut rust_sa = vec![101, 102, 103, 104, 9, 9, 9, 9, 9, 9, 9, 9];
16676        let mut c_sa = rust_sa.clone();
16677        let mut buckets = vec![0; 4 * k as usize];
16678        buckets[buckets_index2(1, 1)] = 2;
16679        buckets[buckets_index2(2, 1)] = 1;
16680        buckets[3 * k as usize + 1] = 7;
16681        buckets[3 * k as usize + 2] = 10;
16682        place_lms_suffixes_histogram_32s_4k(&mut rust_sa, n, k, m, &buckets);
16683        unsafe {
16684            probe_libsais16_place_lms_suffixes_histogram_32s_4k(
16685                c_sa.as_mut_ptr(),
16686                n,
16687                k,
16688                m,
16689                buckets.as_ptr(),
16690            );
16691        }
16692        assert_eq!(rust_sa, c_sa);
16693
16694        let mut rust_sa = vec![101, 102, 103, 104, 9, 9, 9, 9, 9, 9, 9, 9];
16695        let mut c_sa = rust_sa.clone();
16696        let mut buckets = vec![0; 6 * k as usize];
16697        buckets[buckets_index4(1, 1)] = 2;
16698        buckets[buckets_index4(2, 1)] = 1;
16699        buckets[5 * k as usize + 1] = 7;
16700        buckets[5 * k as usize + 2] = 10;
16701        place_lms_suffixes_histogram_32s_6k(&mut rust_sa, n, k, m, &buckets);
16702        unsafe {
16703            probe_libsais16_place_lms_suffixes_histogram_32s_6k(
16704                c_sa.as_mut_ptr(),
16705                n,
16706                k,
16707                m,
16708                buckets.as_ptr(),
16709            );
16710        }
16711        assert_eq!(rust_sa, c_sa);
16712    }
16713
16714    #[test]
16715    fn libsais16_count_gather_lms_suffixes_32s_match_c() {
16716        let t = vec![2, 1, 3, 1, 2, 0, 1, 0];
16717        let n = t.len() as SaSint;
16718        let k = 4;
16719
16720        let mut rust_sa = vec![0; t.len()];
16721        let mut c_sa = rust_sa.clone();
16722        let rust_m = gather_lms_suffixes_32s(&t, &mut rust_sa, n);
16723        let c_m =
16724            unsafe { probe_libsais16_gather_lms_suffixes_32s(t.as_ptr(), c_sa.as_mut_ptr(), n) };
16725        assert_eq!(rust_m, c_m);
16726        assert_eq!(rust_sa, c_sa);
16727
16728        let compact_t = vec![2, SAINT_MIN | 1, 3, 1, SAINT_MIN | 2, 0, 1, 0];
16729        let mut rust_sa = vec![0; compact_t.len()];
16730        let mut c_sa = rust_sa.clone();
16731        let rust_m = gather_compacted_lms_suffixes_32s(&compact_t, &mut rust_sa, n);
16732        let c_m = unsafe {
16733            probe_libsais16_gather_compacted_lms_suffixes_32s(
16734                compact_t.as_ptr(),
16735                c_sa.as_mut_ptr(),
16736                n,
16737            )
16738        };
16739        assert_eq!(rust_m, c_m);
16740        assert_eq!(rust_sa, c_sa);
16741
16742        let mut rust_buckets = vec![99; 2 * k as usize];
16743        let mut c_buckets = rust_buckets.clone();
16744        count_lms_suffixes_32s_2k(&t, n, k, &mut rust_buckets);
16745        unsafe {
16746            probe_libsais16_count_lms_suffixes_32s_2k(t.as_ptr(), n, k, c_buckets.as_mut_ptr());
16747        }
16748        assert_eq!(rust_buckets, c_buckets);
16749
16750        let mut rust_sa = vec![0; t.len()];
16751        let mut c_sa = rust_sa.clone();
16752        let mut rust_buckets = vec![0; 2 * k as usize];
16753        let mut c_buckets = rust_buckets.clone();
16754        let rust_m = count_and_gather_lms_suffixes_32s_2k(
16755            &t,
16756            &mut rust_sa,
16757            n,
16758            k,
16759            &mut rust_buckets,
16760            0,
16761            n as isize,
16762        );
16763        let c_m = unsafe {
16764            probe_libsais16_count_and_gather_lms_suffixes_32s_2k(
16765                t.as_ptr(),
16766                c_sa.as_mut_ptr(),
16767                n,
16768                k,
16769                c_buckets.as_mut_ptr(),
16770                0,
16771                n,
16772            )
16773        };
16774        assert_eq!(rust_m, c_m);
16775        assert_eq!(rust_sa, c_sa);
16776        assert_eq!(rust_buckets, c_buckets);
16777
16778        let mut rust_sa = vec![0; compact_t.len()];
16779        let mut c_sa = rust_sa.clone();
16780        let mut rust_buckets = vec![0; 2 * k as usize];
16781        let mut c_buckets = rust_buckets.clone();
16782        let rust_m = count_and_gather_compacted_lms_suffixes_32s_2k(
16783            &compact_t,
16784            &mut rust_sa,
16785            n,
16786            k,
16787            &mut rust_buckets,
16788            0,
16789            n as isize,
16790        );
16791        let c_m = unsafe {
16792            probe_libsais16_count_and_gather_compacted_lms_suffixes_32s_2k(
16793                compact_t.as_ptr(),
16794                c_sa.as_mut_ptr(),
16795                n,
16796                k,
16797                c_buckets.as_mut_ptr(),
16798                0,
16799                n,
16800            )
16801        };
16802        assert_eq!(rust_m, c_m);
16803        assert_eq!(rust_sa, c_sa);
16804        assert_eq!(rust_buckets, c_buckets);
16805    }
16806
16807    #[test]
16808    fn libsais16_small_openmp_leaf_helpers_match_upstream_shapes() {
16809        let sa = [-1, 0, 3, SAINT_MIN, 0, 7, -5];
16810        assert_eq!(count_negative_marked_suffixes(&sa, 1, 5), 1);
16811        assert_eq!(count_zero_marked_suffixes(&sa, 1, 5), 2);
16812
16813        let mut buckets = vec![1, 2, 3, 0, 4, 5, 6, 0, 7, 8, 9, 0, 10, 11, 12, 0];
16814        accumulate_counts_s32_4(&mut buckets, 12, 3, 4);
16815        assert_eq!(&buckets[12..15], &[22, 26, 30]);
16816
16817        let mut many = Vec::new();
16818        for bucket in 0..10 {
16819            many.extend([bucket, bucket + 1, bucket + 2, 0]);
16820        }
16821        accumulate_counts_s32(&mut many, 36, 3, 4, 10);
16822        assert_eq!(&many[36..39], &[45, 55, 65]);
16823
16824        let t = [1, SAINT_MIN | 2, 0];
16825        let mut compacted_buckets = vec![0; 6];
16826        count_compacted_lms_suffixes_32s_2k(&t, t.len() as SaSint, 3, &mut compacted_buckets);
16827        assert_eq!(compacted_buckets, vec![1, 0, 1, 0, 0, 1]);
16828
16829        let unique_sa = [0, 2, 4, 6, 0, -10, 20, -30];
16830        assert_eq!(count_unique_suffixes(&unique_sa, 4, 0, 4), 2);
16831
16832        assert_eq!(get_bucket_stride(20_000, 1000, 4), 1024);
16833        assert_eq!(get_bucket_stride(3024, 1001, 4), 1008);
16834        assert_eq!(get_bucket_stride(3000, 1001, 4), 1001);
16835    }
16836
16837    #[test]
16838    fn libsais16_count_gather_lms_suffixes_32s_omp_wrappers_match_c() {
16839        let t = vec![2, 1, 3, 1, 2, 0, 1, 0];
16840        let n = t.len() as SaSint;
16841        let k = 4;
16842        let mut rust_sa = vec![0; t.len()];
16843        let mut c_sa = rust_sa.clone();
16844        let mut rust_buckets = vec![0; 2 * k as usize];
16845        let mut c_buckets = rust_buckets.clone();
16846        let mut rust_state = alloc_thread_state(1).unwrap();
16847        let rust_m = count_and_gather_lms_suffixes_32s_2k_omp(
16848            &t,
16849            &mut rust_sa,
16850            n,
16851            k,
16852            &mut rust_buckets,
16853            0,
16854            1,
16855            &mut rust_state,
16856        );
16857        let c_m = unsafe {
16858            probe_libsais16_count_and_gather_lms_suffixes_32s_2k_omp(
16859                t.as_ptr(),
16860                c_sa.as_mut_ptr(),
16861                n,
16862                k,
16863                c_buckets.as_mut_ptr(),
16864                0,
16865                1,
16866            )
16867        };
16868        assert_eq!(rust_m, c_m);
16869        assert_eq!(rust_sa, c_sa);
16870        assert_eq!(rust_buckets, c_buckets);
16871
16872        let compact_t = vec![2, SAINT_MIN | 1, 3, 1, SAINT_MIN | 2, 0, 1, 0];
16873        let mut rust_sa = vec![0; compact_t.len()];
16874        let mut c_sa = rust_sa.clone();
16875        let mut rust_buckets = vec![0; 2 * k as usize];
16876        let mut c_buckets = rust_buckets.clone();
16877        let mut rust_state = alloc_thread_state(1).unwrap();
16878        count_and_gather_compacted_lms_suffixes_32s_2k_omp(
16879            &compact_t,
16880            &mut rust_sa,
16881            n,
16882            k,
16883            &mut rust_buckets,
16884            0,
16885            1,
16886            &mut rust_state,
16887        );
16888        unsafe {
16889            probe_libsais16_count_and_gather_compacted_lms_suffixes_32s_2k_omp(
16890                compact_t.as_ptr(),
16891                c_sa.as_mut_ptr(),
16892                n,
16893                k,
16894                c_buckets.as_mut_ptr(),
16895                0,
16896                1,
16897            );
16898        }
16899        assert_eq!(rust_sa, c_sa);
16900        assert_eq!(rust_buckets, c_buckets);
16901    }
16902
16903    #[test]
16904    fn libsais16_count_gather_lms_suffixes_32s_4k_match_c() {
16905        let t = vec![2, 1, 3, 1, 2, 0, 1, 0];
16906        let n = t.len() as SaSint;
16907        let k = 4;
16908
16909        let mut rust_buckets = vec![77; 4 * k as usize];
16910        let mut c_buckets = vec![0; 4 * k as usize];
16911        let mut c_sa_for_count = vec![0; t.len()];
16912        count_lms_suffixes_32s_4k(&t, n, k, &mut rust_buckets);
16913        unsafe {
16914            probe_libsais16_count_and_gather_lms_suffixes_32s_4k(
16915                t.as_ptr(),
16916                c_sa_for_count.as_mut_ptr(),
16917                n,
16918                k,
16919                c_buckets.as_mut_ptr(),
16920                0,
16921                n,
16922            );
16923        }
16924        assert_eq!(rust_buckets, c_buckets);
16925
16926        let mut rust_sa = vec![0; t.len()];
16927        let mut c_sa = rust_sa.clone();
16928        let mut rust_buckets = vec![0; 4 * k as usize];
16929        let mut c_buckets = rust_buckets.clone();
16930        let rust_m = count_and_gather_lms_suffixes_32s_4k(
16931            &t,
16932            &mut rust_sa,
16933            n,
16934            k,
16935            &mut rust_buckets,
16936            0,
16937            n as isize,
16938        );
16939        let c_m = unsafe {
16940            probe_libsais16_count_and_gather_lms_suffixes_32s_4k(
16941                t.as_ptr(),
16942                c_sa.as_mut_ptr(),
16943                n,
16944                k,
16945                c_buckets.as_mut_ptr(),
16946                0,
16947                n,
16948            )
16949        };
16950        assert_eq!(rust_m, c_m);
16951        assert_eq!(rust_sa, c_sa);
16952        assert_eq!(rust_buckets, c_buckets);
16953
16954        let mut rust_sa = vec![0; t.len()];
16955        let mut c_sa = rust_sa.clone();
16956        let mut rust_buckets = vec![0; 4 * k as usize];
16957        let mut c_buckets = rust_buckets.clone();
16958        let mut rust_state = alloc_thread_state(1).unwrap();
16959        let rust_m = count_and_gather_lms_suffixes_32s_4k_omp(
16960            &t,
16961            &mut rust_sa,
16962            n,
16963            k,
16964            &mut rust_buckets,
16965            0,
16966            1,
16967            &mut rust_state,
16968        );
16969        let c_m = unsafe {
16970            probe_libsais16_count_and_gather_lms_suffixes_32s_4k_omp(
16971                t.as_ptr(),
16972                c_sa.as_mut_ptr(),
16973                n,
16974                k,
16975                c_buckets.as_mut_ptr(),
16976                0,
16977                1,
16978            )
16979        };
16980        assert_eq!(rust_m, c_m);
16981        assert_eq!(rust_sa, c_sa);
16982        assert_eq!(rust_buckets, c_buckets);
16983
16984        let mut rust_buckets = vec![91; k as usize];
16985        let mut c_buckets = rust_buckets.clone();
16986        count_suffixes_32s(&t, n, k, &mut rust_buckets);
16987        unsafe {
16988            probe_libsais16_count_suffixes_32s(t.as_ptr(), n, k, c_buckets.as_mut_ptr());
16989        }
16990        assert_eq!(rust_buckets, c_buckets);
16991    }
16992
16993    #[test]
16994    fn libsais16_initialize_buckets_32s_match_c() {
16995        let k = 4;
16996
16997        let base_6k = vec![
16998            1, 2, 0, 1, 0, 1, 2, 0, 3, 0, 1, 1, 2, 1, 0, 0, 9, 9, 9, 9, 8, 8, 8, 8,
16999        ];
17000        let mut rust = base_6k.clone();
17001        let mut c = base_6k.clone();
17002        initialize_buckets_start_and_end_32s_6k(k, &mut rust);
17003        unsafe { probe_libsais16_initialize_buckets_start_and_end_32s_6k(k, c.as_mut_ptr()) };
17004        assert_eq!(rust, c);
17005
17006        let base_4k = vec![1, 2, 0, 1, 3, 0, 2, 1, 9, 9, 9, 9, 8, 8, 8, 8];
17007        let mut rust = base_4k.clone();
17008        let mut c = base_4k.clone();
17009        initialize_buckets_start_and_end_32s_4k(k, &mut rust);
17010        unsafe { probe_libsais16_initialize_buckets_start_and_end_32s_4k(k, c.as_mut_ptr()) };
17011        assert_eq!(rust, c);
17012
17013        let base_2k = vec![1, 2, 0, 1, 3, 0, 2, 1];
17014        let mut rust = base_2k.clone();
17015        let mut c = base_2k.clone();
17016        initialize_buckets_end_32s_2k(k, &mut rust);
17017        unsafe { probe_libsais16_initialize_buckets_end_32s_2k(k, c.as_mut_ptr()) };
17018        assert_eq!(rust, c);
17019
17020        let mut rust = base_2k.clone();
17021        let mut c = base_2k.clone();
17022        initialize_buckets_start_and_end_32s_2k(k, &mut rust);
17023        unsafe { probe_libsais16_initialize_buckets_start_and_end_32s_2k(k, c.as_mut_ptr()) };
17024        assert_eq!(rust, c);
17025
17026        let base_1k = vec![2, 1, 3, 2];
17027        let mut rust = base_1k.clone();
17028        let mut c = base_1k.clone();
17029        initialize_buckets_start_32s_1k(k, &mut rust);
17030        unsafe { probe_libsais16_initialize_buckets_start_32s_1k(k, c.as_mut_ptr()) };
17031        assert_eq!(rust, c);
17032
17033        let mut rust = base_1k.clone();
17034        let mut c = base_1k.clone();
17035        initialize_buckets_end_32s_1k(k, &mut rust);
17036        unsafe { probe_libsais16_initialize_buckets_end_32s_1k(k, c.as_mut_ptr()) };
17037        assert_eq!(rust, c);
17038
17039        let t = vec![2, 1, 3, 1, 2, 0, 1, 0];
17040        let mut rust = vec![1, 2, 0, 1, 3, 0, 2, 1];
17041        let mut c = rust.clone();
17042        initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(&t, k, &mut rust, 4);
17043        unsafe {
17044            probe_libsais16_initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(
17045                t.as_ptr(),
17046                k,
17047                c.as_mut_ptr(),
17048                4,
17049            );
17050        }
17051        assert_eq!(rust, c);
17052
17053        let mut rust = vec![
17054            1, 2, 0, 1, 3, 0, 2, 1, 1, 0, 2, 0, 0, 1, 1, 0, 9, 9, 9, 9, 8, 8, 8, 8,
17055        ];
17056        let mut c = rust.clone();
17057        let rust_sum = initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(&t, k, &mut rust, 4);
17058        let c_sum = unsafe {
17059            probe_libsais16_initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(
17060                t.as_ptr(),
17061                k,
17062                c.as_mut_ptr(),
17063                4,
17064            )
17065        };
17066        assert_eq!(rust_sum, c_sum);
17067        assert_eq!(rust, c);
17068
17069        let mut rust = base_4k.clone();
17070        let mut c = base_4k;
17071        initialize_buckets_for_radix_and_partial_sorting_32s_4k(&t, k, &mut rust, 4);
17072        unsafe {
17073            probe_libsais16_initialize_buckets_for_radix_and_partial_sorting_32s_4k(
17074                t.as_ptr(),
17075                k,
17076                c.as_mut_ptr(),
17077                4,
17078            );
17079        }
17080        assert_eq!(rust, c);
17081    }
17082
17083    #[test]
17084    fn libsais16_place_lms_suffixes_interval_32s_match_c() {
17085        let n = 12;
17086        let k = 4;
17087        let m = 4;
17088
17089        let mut rust_sa = vec![101, 102, 103, 104, 9, 9, 9, 9, 9, 9, 9, 9];
17090        let mut c_sa = rust_sa.clone();
17091        let mut buckets = vec![0; 4 * k as usize];
17092        buckets[buckets_index2(0, 1)] = 2;
17093        buckets[buckets_index2(1, 1)] = 2;
17094        buckets[buckets_index2(2, 1)] = 3;
17095        buckets[buckets_index2(2, 1) + buckets_index2(1, 0)] = 4;
17096        buckets[3 * k as usize + 1] = 7;
17097        buckets[3 * k as usize + 2] = 10;
17098        place_lms_suffixes_interval_32s_4k(&mut rust_sa, n, k, m, &buckets);
17099        unsafe {
17100            probe_libsais16_place_lms_suffixes_interval_32s_4k(
17101                c_sa.as_mut_ptr(),
17102                n,
17103                k,
17104                m,
17105                buckets.as_ptr(),
17106            );
17107        }
17108        assert_eq!(rust_sa, c_sa);
17109
17110        let mut rust_sa = vec![101, 102, 103, 104, 9, 9, 9, 9, 9, 9, 9, 9];
17111        let mut c_sa = rust_sa.clone();
17112        let mut buckets = vec![0; 2 * k as usize];
17113        buckets[buckets_index2(1, 0)] = 7;
17114        buckets[buckets_index2(0, 1)] = 1;
17115        buckets[buckets_index2(1, 1)] = 1;
17116        buckets[buckets_index2(2, 0)] = 10;
17117        buckets[buckets_index2(2, 1)] = 2;
17118        buckets[buckets_index2(3, 1)] = 3;
17119        place_lms_suffixes_interval_32s_2k(&mut rust_sa, n, k, m, &buckets);
17120        unsafe {
17121            probe_libsais16_place_lms_suffixes_interval_32s_2k(
17122                c_sa.as_mut_ptr(),
17123                n,
17124                k,
17125                m,
17126                buckets.as_ptr(),
17127            );
17128        }
17129        assert_eq!(rust_sa, c_sa);
17130
17131        let t = vec![0, 1, 2, 1, 2, 3, 1, 3, 0, 0, 0, 0];
17132        let mut rust_sa = vec![1, 3, 4, 7, 9, 9, 9, 9, 9, 9, 9, 9];
17133        let mut c_sa = rust_sa.clone();
17134        let rust_buckets = vec![0, 3, 6, 10];
17135        let mut c_buckets = rust_buckets.clone();
17136        place_lms_suffixes_interval_32s_1k(&t, &mut rust_sa, k, m, &rust_buckets);
17137        unsafe {
17138            probe_libsais16_place_lms_suffixes_interval_32s_1k(
17139                t.as_ptr(),
17140                c_sa.as_mut_ptr(),
17141                k,
17142                m,
17143                c_buckets.as_mut_ptr(),
17144            );
17145        }
17146        assert_eq!(rust_sa, c_sa);
17147        assert_eq!(rust_buckets, c_buckets);
17148    }
17149
17150    #[test]
17151    fn libsais16_renumber_and_mark_distinct_lms_suffixes_32s_1k_matches_c() {
17152        let rust_t = vec![2, 1, 3, 1, 2, 0, 1, 0];
17153        let n = rust_t.len() as SaSint;
17154        let mut probe_sa = vec![0; rust_t.len()];
17155        let m = gather_lms_suffixes_32s(&rust_t, &mut probe_sa, n);
17156        let mut rust_sa = vec![0; rust_t.len()];
17157        let mut c_t = rust_t.clone();
17158        let mut c_sa = rust_sa.clone();
17159
17160        let rust_name =
17161            renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(&rust_t, &mut rust_sa, n, m, 1);
17162        let c_name = unsafe {
17163            probe_libsais16_renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(
17164                c_t.as_mut_ptr(),
17165                c_sa.as_mut_ptr(),
17166                n,
17167                m,
17168                1,
17169            )
17170        };
17171        assert_eq!(rust_name, c_name);
17172        assert_eq!(rust_t, c_t);
17173        assert_eq!(rust_sa, c_sa);
17174    }
17175
17176    #[test]
17177    fn libsais16_reconstruct_compacted_lms_suffixes_32s_match_c() {
17178        let n = 8;
17179        let k = 4;
17180        let fs = 0;
17181        let f = 0;
17182        let mut m_probe_sa = vec![0; n as usize];
17183        let m = gather_lms_suffixes_32s(&[2, 1, 3, 1, 2, 0, 1, 0], &mut m_probe_sa, n);
17184
17185        let mut rust_t = vec![2, 1, 3, 1, 2, 0, 1, 0];
17186        let mut c_t = rust_t.clone();
17187        let mut rust_sa = vec![0; n as usize];
17188        let mut c_sa = rust_sa.clone();
17189        let mut rust_buckets = vec![0; 2 * k as usize];
17190        let mut c_buckets = rust_buckets.clone();
17191        let mut rust_thread_state = alloc_thread_state(1).unwrap();
17192        reconstruct_compacted_lms_suffixes_32s_2k_omp(
17193            &mut rust_t,
17194            &mut rust_sa,
17195            n,
17196            k,
17197            m,
17198            fs,
17199            f,
17200            &mut rust_buckets,
17201            0,
17202            1,
17203            &mut rust_thread_state,
17204        );
17205        unsafe {
17206            probe_libsais16_reconstruct_compacted_lms_suffixes_32s_2k_omp(
17207                c_t.as_mut_ptr(),
17208                c_sa.as_mut_ptr(),
17209                n,
17210                k,
17211                m,
17212                fs,
17213                f,
17214                c_buckets.as_mut_ptr(),
17215                0,
17216                1,
17217            );
17218        }
17219        assert_eq!(rust_t, c_t);
17220        assert_eq!(rust_sa, c_sa);
17221        assert_eq!(rust_buckets, c_buckets);
17222
17223        let mut rust_t = vec![2, 1, 3, 1, 2, 0, 1, 0];
17224        let mut c_t = rust_t.clone();
17225        let mut rust_sa = vec![0; n as usize];
17226        let mut c_sa = rust_sa.clone();
17227        reconstruct_compacted_lms_suffixes_32s_1k_omp(&mut rust_t, &mut rust_sa, n, m, fs, f, 1);
17228        unsafe {
17229            probe_libsais16_reconstruct_compacted_lms_suffixes_32s_1k_omp(
17230                c_t.as_mut_ptr(),
17231                c_sa.as_mut_ptr(),
17232                n,
17233                m,
17234                fs,
17235                f,
17236                1,
17237            );
17238        }
17239        assert_eq!(rust_t, c_t);
17240        assert_eq!(rust_sa, c_sa);
17241    }
17242
17243    #[test]
17244    fn libsais16_partial_omp_wrappers_match_c() {
17245        let (text, mut rust_sa, mut rust_buckets) = partial_scan_fixture();
17246        let mut c_sa = rust_sa.clone();
17247        let mut c_buckets = rust_buckets.clone();
17248
17249        let rust_d = partial_sorting_scan_left_to_right_16u_omp(
17250            &text,
17251            &mut rust_sa,
17252            text.len() as SaSint,
17253            8,
17254            &mut rust_buckets,
17255            5,
17256            3,
17257            1,
17258        );
17259        let c_d = unsafe {
17260            probe_libsais16_partial_sorting_scan_left_to_right_16u_omp(
17261                text.as_ptr(),
17262                c_sa.as_mut_ptr(),
17263                text.len() as SaSint,
17264                8,
17265                c_buckets.as_mut_ptr(),
17266                5,
17267                3,
17268                1,
17269            )
17270        };
17271        assert_eq!(rust_d, c_d);
17272        assert_eq!(rust_sa, c_sa);
17273        assert_eq!(rust_buckets, c_buckets);
17274
17275        let (text, mut rust_sa, mut rust_buckets) = partial_scan_fixture();
17276        rust_sa[6..10].copy_from_slice(&[3, 5 | SAINT_MIN, 7, 9 | SAINT_MIN]);
17277        let mut c_sa = rust_sa.clone();
17278        let mut c_buckets = rust_buckets.clone();
17279        partial_sorting_scan_right_to_left_16u_omp(
17280            &text,
17281            &mut rust_sa,
17282            text.len() as SaSint,
17283            8,
17284            &mut rust_buckets,
17285            0,
17286            5,
17287            3,
17288            1,
17289        );
17290        unsafe {
17291            probe_libsais16_partial_sorting_scan_right_to_left_16u_omp(
17292                text.as_ptr(),
17293                c_sa.as_mut_ptr(),
17294                text.len() as SaSint,
17295                8,
17296                c_buckets.as_mut_ptr(),
17297                0,
17298                5,
17299                3,
17300                1,
17301            );
17302        }
17303        assert_eq!(rust_sa, c_sa);
17304        assert_eq!(rust_buckets, c_buckets);
17305
17306        let (text, mut rust_sa, mut rust_buckets) = partial_scan_fixture();
17307        rust_sa[6..10].copy_from_slice(&[3, 5 | SAINT_MIN, 7, 9 | SAINT_MIN]);
17308        let mut c_sa = rust_sa.clone();
17309        let mut c_buckets = rust_buckets.clone();
17310        partial_gsa_scan_right_to_left_16u_omp(
17311            &text,
17312            &mut rust_sa,
17313            text.len() as SaSint,
17314            8,
17315            &mut rust_buckets,
17316            0,
17317            5,
17318            3,
17319            1,
17320        );
17321        unsafe {
17322            probe_libsais16_partial_gsa_scan_right_to_left_16u_omp(
17323                text.as_ptr(),
17324                c_sa.as_mut_ptr(),
17325                text.len() as SaSint,
17326                8,
17327                c_buckets.as_mut_ptr(),
17328                0,
17329                5,
17330                3,
17331                1,
17332            );
17333        }
17334        assert_eq!(rust_sa, c_sa);
17335        assert_eq!(rust_buckets, c_buckets);
17336    }
17337
17338    #[test]
17339    fn libsais16_final_omp_wrappers_match_c() {
17340        let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
17341        let mut c_sa = rust_sa.clone();
17342        let mut c_bucket = rust_bucket.clone();
17343        final_bwt_scan_left_to_right_16u_omp(
17344            &text,
17345            &mut rust_sa,
17346            text.len() as SaSint,
17347            8,
17348            &mut rust_bucket,
17349            1,
17350        );
17351        unsafe {
17352            probe_libsais16_final_bwt_scan_left_to_right_16u_omp(
17353                text.as_ptr(),
17354                c_sa.as_mut_ptr(),
17355                text.len() as SaSint,
17356                8,
17357                c_bucket.as_mut_ptr(),
17358                1,
17359            );
17360        }
17361        assert_eq!(rust_sa, c_sa);
17362        assert_eq!(rust_bucket, c_bucket);
17363
17364        let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
17365        let mut c_sa = rust_sa.clone();
17366        let mut c_bucket = rust_bucket.clone();
17367        let mut rust_i = vec![-1; 8];
17368        let mut c_i = rust_i.clone();
17369        final_bwt_aux_scan_left_to_right_16u_omp(
17370            &text,
17371            &mut rust_sa,
17372            text.len() as SaSint,
17373            8,
17374            1,
17375            &mut rust_i,
17376            &mut rust_bucket,
17377            1,
17378        );
17379        unsafe {
17380            probe_libsais16_final_bwt_aux_scan_left_to_right_16u_omp(
17381                text.as_ptr(),
17382                c_sa.as_mut_ptr(),
17383                text.len() as SaSint,
17384                8,
17385                1,
17386                c_i.as_mut_ptr(),
17387                c_bucket.as_mut_ptr(),
17388                1,
17389            );
17390        }
17391        assert_eq!(rust_sa, c_sa);
17392        assert_eq!(rust_bucket, c_bucket);
17393        assert_eq!(rust_i, c_i);
17394
17395        let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
17396        let mut c_sa = rust_sa.clone();
17397        let mut c_bucket = rust_bucket.clone();
17398        final_sorting_scan_left_to_right_16u_omp(
17399            &text,
17400            &mut rust_sa,
17401            text.len() as SaSint,
17402            8,
17403            &mut rust_bucket,
17404            1,
17405        );
17406        unsafe {
17407            probe_libsais16_final_sorting_scan_left_to_right_16u_omp(
17408                text.as_ptr(),
17409                c_sa.as_mut_ptr(),
17410                text.len() as SaSint,
17411                8,
17412                c_bucket.as_mut_ptr(),
17413                1,
17414            );
17415        }
17416        assert_eq!(rust_sa, c_sa);
17417        assert_eq!(rust_bucket, c_bucket);
17418
17419        let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
17420        let mut c_sa = rust_sa.clone();
17421        let mut c_bucket = rust_bucket.clone();
17422        let rust_index = final_bwt_scan_right_to_left_16u_omp(
17423            &text,
17424            &mut rust_sa,
17425            text.len() as SaSint,
17426            8,
17427            &mut rust_bucket,
17428            1,
17429        );
17430        let c_index = unsafe {
17431            probe_libsais16_final_bwt_scan_right_to_left_16u_omp(
17432                text.as_ptr(),
17433                c_sa.as_mut_ptr(),
17434                text.len() as SaSint,
17435                8,
17436                c_bucket.as_mut_ptr(),
17437                1,
17438            )
17439        };
17440        assert_eq!(rust_index, c_index);
17441        assert_eq!(rust_sa, c_sa);
17442        assert_eq!(rust_bucket, c_bucket);
17443
17444        let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
17445        let mut c_sa = rust_sa.clone();
17446        let mut c_bucket = rust_bucket.clone();
17447        let mut rust_i = vec![-1; 8];
17448        let mut c_i = rust_i.clone();
17449        final_bwt_aux_scan_right_to_left_16u_omp(
17450            &text,
17451            &mut rust_sa,
17452            text.len() as SaSint,
17453            8,
17454            1,
17455            &mut rust_i,
17456            &mut rust_bucket,
17457            1,
17458        );
17459        unsafe {
17460            probe_libsais16_final_bwt_aux_scan_right_to_left_16u_omp(
17461                text.as_ptr(),
17462                c_sa.as_mut_ptr(),
17463                text.len() as SaSint,
17464                8,
17465                1,
17466                c_i.as_mut_ptr(),
17467                c_bucket.as_mut_ptr(),
17468                1,
17469            );
17470        }
17471        assert_eq!(rust_sa, c_sa);
17472        assert_eq!(rust_bucket, c_bucket);
17473        assert_eq!(rust_i, c_i);
17474
17475        let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
17476        let mut c_sa = rust_sa.clone();
17477        let mut c_bucket = rust_bucket.clone();
17478        final_sorting_scan_right_to_left_16u_omp(&text, &mut rust_sa, 0, 6, 8, &mut rust_bucket, 1);
17479        unsafe {
17480            probe_libsais16_final_sorting_scan_right_to_left_16u_omp(
17481                text.as_ptr(),
17482                c_sa.as_mut_ptr(),
17483                0,
17484                6,
17485                8,
17486                c_bucket.as_mut_ptr(),
17487                1,
17488            );
17489        }
17490        assert_eq!(rust_sa, c_sa);
17491        assert_eq!(rust_bucket, c_bucket);
17492
17493        let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
17494        let mut c_sa = rust_sa.clone();
17495        let mut c_bucket = rust_bucket.clone();
17496        final_gsa_scan_right_to_left_16u_omp(&text, &mut rust_sa, 0, 6, 8, &mut rust_bucket, 1);
17497        unsafe {
17498            probe_libsais16_final_gsa_scan_right_to_left_16u_omp(
17499                text.as_ptr(),
17500                c_sa.as_mut_ptr(),
17501                0,
17502                6,
17503                8,
17504                c_bucket.as_mut_ptr(),
17505                1,
17506            );
17507        }
17508        assert_eq!(rust_sa, c_sa);
17509        assert_eq!(rust_bucket, c_bucket);
17510    }
17511
17512    #[test]
17513    fn libsais16_matches_bruteforce() {
17514        let t = [3, 1, 4, 1, 5, 9, 0, 2];
17515        let mut sa = vec![0; t.len()];
17516        let mut freq = vec![0; ALPHABET_SIZE];
17517        assert_eq!(libsais16(&t, &mut sa, 0, Some(&mut freq)), 0);
17518        assert_eq!(sa, brute_sa(&t));
17519        assert_eq!(freq[1], 2);
17520        assert_eq!(freq[9], 1);
17521    }
17522
17523    #[test]
17524    fn libsais16_bwt_round_trips() {
17525        let t = [2, 1, 3, 1, 2, 4, 1, 0];
17526        let mut bwt = vec![0; t.len()];
17527        let mut work = vec![0; t.len()];
17528        let primary = libsais16_bwt(&t, &mut bwt, &mut work, 0, None);
17529        assert!(primary > 0);
17530
17531        let mut restored = vec![0; t.len()];
17532        assert_eq!(
17533            libsais16_unbwt(&bwt, &mut restored, &mut work, None, primary),
17534            0
17535        );
17536        assert_eq!(restored, t);
17537    }
17538
17539    #[test]
17540    fn libsais16_plcp_lcp_are_consistent() {
17541        let t = [2, 1, 2, 1, 0];
17542        let sa = brute_sa(&t);
17543        let mut plcp = vec![0; t.len()];
17544        let mut lcp = vec![0; t.len()];
17545        assert_eq!(libsais16_plcp(&t, &sa, &mut plcp), 0);
17546        assert_eq!(libsais16_lcp(&plcp, &sa, &mut lcp), 0);
17547        assert_eq!(lcp[0], 0);
17548
17549        let mut named_plcp = vec![0; t.len()];
17550        assert_eq!(
17551            compute_phi_omp(&sa, &mut named_plcp, t.len() as SaSint, 1),
17552            0
17553        );
17554        assert_eq!(
17555            compute_plcp_omp(&t, &mut named_plcp, t.len() as SaSint, 1),
17556            0
17557        );
17558        assert_eq!(named_plcp, plcp);
17559
17560        let mut named_lcp = vec![0; t.len()];
17561        assert_eq!(
17562            compute_lcp_omp(&named_plcp, &sa, &mut named_lcp, t.len() as SaSint, 1),
17563            0
17564        );
17565        assert_eq!(named_lcp, lcp);
17566
17567        let mut gsa_plcp = vec![0; t.len()];
17568        let mut named_gsa_plcp = vec![0; t.len()];
17569        assert_eq!(libsais16_plcp_gsa(&t, &sa, &mut gsa_plcp), 0);
17570        assert_eq!(
17571            compute_phi_omp(&sa, &mut named_gsa_plcp, t.len() as SaSint, 1),
17572            0
17573        );
17574        assert_eq!(
17575            compute_plcp_gsa_omp(&t, &mut named_gsa_plcp, t.len() as SaSint, 1),
17576            0
17577        );
17578        assert_eq!(named_gsa_plcp, gsa_plcp);
17579    }
17580
17581    #[test]
17582    fn libsais16_bwt_copy_16u_omp_uses_block_partition_for_large_inputs() {
17583        let n = 65_600usize;
17584        let a: Vec<SaSint> = (0..n).map(|i| (i * 17) as SaSint).collect();
17585        let mut threaded = vec![0; n];
17586        let mut sequential = vec![0; n];
17587
17588        bwt_copy_16u_omp(&mut threaded, &a, n as SaSint, 4);
17589        bwt_copy_16u(&mut sequential, &a, n as SaSint);
17590
17591        assert_eq!(threaded, sequential);
17592    }
17593
17594    #[test]
17595    fn libsais16_plcp_lcp_omp_wrappers_match_single_thread_on_large_inputs() {
17596        let n = 65_600usize;
17597        let text: Vec<u16> = (0..n).map(|i| 1 + (i % 251) as u16).collect();
17598        let sa: Vec<SaSint> = (0..n as SaSint).collect();
17599
17600        let mut plcp_single = vec![0; n];
17601        let mut plcp_threaded = vec![0; n];
17602        assert_eq!(compute_phi_omp(&sa, &mut plcp_single, n as SaSint, 1), 0);
17603        assert_eq!(compute_phi_omp(&sa, &mut plcp_threaded, n as SaSint, 4), 0);
17604        assert_eq!(plcp_threaded, plcp_single);
17605
17606        assert_eq!(compute_plcp_omp(&text, &mut plcp_single, n as SaSint, 1), 0);
17607        assert_eq!(
17608            compute_plcp_omp(&text, &mut plcp_threaded, n as SaSint, 4),
17609            0
17610        );
17611        assert_eq!(plcp_threaded, plcp_single);
17612
17613        let mut lcp_single = vec![0; n];
17614        let mut lcp_threaded = vec![0; n];
17615        assert_eq!(
17616            compute_lcp_omp(&plcp_single, &sa, &mut lcp_single, n as SaSint, 1),
17617            0
17618        );
17619        assert_eq!(
17620            compute_lcp_omp(&plcp_threaded, &sa, &mut lcp_threaded, n as SaSint, 4),
17621            0
17622        );
17623        assert_eq!(lcp_threaded, lcp_single);
17624    }
17625
17626    #[test]
17627    fn libsais16_context_allocates_upstream_shaped_buffers() {
17628        let ctx = create_ctx().unwrap();
17629        assert_eq!(ctx.threads, 1);
17630        assert_eq!(ctx.buckets.len(), 8 * ALPHABET_SIZE);
17631        assert!(ctx.thread_state.is_none());
17632
17633        let ctx = create_ctx_omp(2).unwrap();
17634        assert_eq!(ctx.threads, 2);
17635        assert_eq!(ctx.buckets.len(), 8 * ALPHABET_SIZE);
17636        let thread_state = ctx.thread_state.as_ref().unwrap();
17637        assert_eq!(thread_state.len(), 2);
17638        assert_eq!(thread_state[0].buckets.len(), 4 * ALPHABET_SIZE);
17639        assert_eq!(thread_state[0].cache_entries, PER_THREAD_CACHE_SIZE);
17640
17641        let ctx = create_ctx_omp(0).unwrap();
17642        assert_eq!(ctx.threads, 1);
17643        assert!(ctx.thread_state.is_none());
17644    }
17645
17646    #[test]
17647    fn libsais16_unbwt_context_allocates_upstream_shaped_buffers() {
17648        let ctx = unbwt_create_ctx().unwrap();
17649        assert_eq!(ctx.threads, 1);
17650        assert_eq!(ctx.bucket2.len(), ALPHABET_SIZE);
17651        assert_eq!(ctx.fastbits.len(), 1 + (1 << UNBWT_FASTBITS));
17652        assert!(ctx.buckets.is_none());
17653
17654        let ctx = unbwt_create_ctx_omp(3).unwrap();
17655        assert_eq!(ctx.threads, 3);
17656        assert_eq!(ctx.bucket2.len(), ALPHABET_SIZE);
17657        assert_eq!(ctx.fastbits.len(), 1 + (1 << UNBWT_FASTBITS));
17658        assert_eq!(ctx.buckets.as_ref().unwrap().len(), 3 * ALPHABET_SIZE);
17659    }
17660
17661    #[test]
17662    fn libsais16_named_unbwt_helpers_follow_decode_shapes() {
17663        let t = [0, 1, 2];
17664        let mut p = vec![usize::MAX; 4];
17665        let mut bucket2 = vec![0; ALPHABET_SIZE];
17666        bucket2[0] = 1;
17667        bucket2[1] = 2;
17668        bucket2[2] = 3;
17669        unbwt_calculate_P(&t, &mut p, &mut bucket2, 2, 1, 3);
17670        assert_eq!(p[2], 1);
17671        assert_eq!(p[3], 3);
17672
17673        let p = [1usize, 2, 0];
17674        let mut bucket2 = vec![3; ALPHABET_SIZE];
17675        bucket2[0] = 1;
17676        bucket2[1] = 2;
17677        bucket2[2] = 3;
17678        let fastbits = vec![0; 3];
17679
17680        let mut u = vec![99; 3];
17681        let mut i0 = 0;
17682        unbwt_decode_1(&mut u, &p, &bucket2, &fastbits, 0, &mut i0, 3);
17683        assert_eq!(u, vec![0, 1, 2]);
17684        assert_eq!(i0, 0);
17685
17686        let mut u = vec![99; 6];
17687        let (mut i0, mut i1) = (0, 1);
17688        unbwt_decode_2(&mut u, &p, &bucket2, &fastbits, 0, 3, &mut i0, &mut i1, 2);
17689        assert_eq!(&u[..2], &[0, 1]);
17690        assert_eq!(&u[3..5], &[1, 2]);
17691        assert_eq!((i0, i1), (2, 0));
17692
17693        let mut u = vec![99; 8];
17694        let mut cursors = [0; 8];
17695        unbwt_decode_8(&mut u, &p, &bucket2, &fastbits, 0, 1, &mut cursors, 1);
17696        assert_eq!(u, vec![0; 8]);
17697        assert_eq!(cursors, [1; 8]);
17698    }
17699
17700    #[test]
17701    fn libsais16_unbwt_init_parallel_uses_block_partition() {
17702        let n = 70_003usize;
17703        let t: Vec<u16> = (0..n)
17704            .map(|i| ((i.wrapping_mul(37).wrapping_add(i >> 3)) % 251) as u16)
17705            .collect();
17706        let i = [12_345];
17707
17708        let mut single_p = vec![0; n + 1];
17709        let mut threaded_p = vec![0; n + 1];
17710        let mut single_bucket2 = vec![0; ALPHABET_SIZE];
17711        let mut threaded_bucket2 = vec![0; ALPHABET_SIZE];
17712        let mut single_fastbits = vec![0; 1 + (1 << UNBWT_FASTBITS)];
17713        let mut threaded_fastbits = vec![0; 1 + (1 << UNBWT_FASTBITS)];
17714        let mut buckets = vec![0; 4 * ALPHABET_SIZE];
17715
17716        unbwt_init_single(
17717            &t,
17718            &mut single_p,
17719            None,
17720            &i,
17721            &mut single_bucket2,
17722            &mut single_fastbits,
17723        );
17724        unbwt_init_parallel(
17725            &t,
17726            &mut threaded_p,
17727            None,
17728            &i,
17729            &mut threaded_bucket2,
17730            &mut threaded_fastbits,
17731            &mut buckets,
17732            4,
17733        );
17734
17735        assert_eq!(threaded_p, single_p);
17736        assert_eq!(threaded_bucket2, single_bucket2);
17737        assert_eq!(threaded_fastbits, single_fastbits);
17738    }
17739
17740    fn assert_libsais16_matches_c(text: &[u16]) {
17741        let mut rust_sa = vec![0; text.len()];
17742        let mut c_sa = vec![0; text.len()];
17743
17744        let rust_rc = libsais16(text, &mut rust_sa, 0, None);
17745        let c_rc = unsafe {
17746            probe_public_libsais16(text.as_ptr(), c_sa.as_mut_ptr(), text.len() as SaSint, 0)
17747        };
17748
17749        assert_eq!(rust_rc, c_rc);
17750        assert_eq!(rust_sa, c_sa);
17751    }
17752
17753    fn assert_libsais16_gsa_matches_c(text: &[u16]) {
17754        let mut rust_sa = vec![0; text.len()];
17755        let mut c_sa = vec![0; text.len()];
17756
17757        let rust_rc = libsais16_gsa(text, &mut rust_sa, 0, None);
17758        let c_rc = unsafe {
17759            probe_public_libsais16_gsa(text.as_ptr(), c_sa.as_mut_ptr(), text.len() as SaSint, 0)
17760        };
17761
17762        assert_eq!(rust_rc, c_rc);
17763        assert_eq!(rust_sa, c_sa);
17764    }
17765
17766    fn assert_libsais16_int_matches_c(text: &[SaSint], k: SaSint) {
17767        let mut rust_t = text.to_vec();
17768        let mut c_t = text.to_vec();
17769        let mut rust_sa = vec![0; text.len()];
17770        let mut c_sa = vec![0; text.len()];
17771
17772        let rust_rc = libsais16_int(&mut rust_t, &mut rust_sa, k, 0);
17773        let c_rc = unsafe {
17774            probe_public_libsais16_int(
17775                c_t.as_mut_ptr(),
17776                c_sa.as_mut_ptr(),
17777                c_t.len() as SaSint,
17778                k,
17779                0,
17780            )
17781        };
17782
17783        assert_eq!(rust_rc, c_rc);
17784        assert_eq!(rust_t, c_t);
17785        assert_eq!(rust_sa, c_sa);
17786    }
17787
17788    fn assert_libsais16_bwt_matches_c(text: &[u16]) {
17789        let mut rust_u = vec![0; text.len()];
17790        let mut rust_a = vec![0; text.len()];
17791        let mut c_u = vec![0; text.len()];
17792        let mut c_a = vec![0; text.len()];
17793
17794        let rust_rc = libsais16_bwt(text, &mut rust_u, &mut rust_a, 0, None);
17795        let c_rc = unsafe {
17796            probe_public_libsais16_bwt(
17797                text.as_ptr(),
17798                c_u.as_mut_ptr(),
17799                c_a.as_mut_ptr(),
17800                text.len() as SaSint,
17801                0,
17802            )
17803        };
17804
17805        assert_eq!(rust_rc, c_rc);
17806        assert_eq!(rust_u, c_u);
17807    }
17808
17809    fn assert_libsais16_bwt_aux_matches_c(text: &[u16], r: SaSint) {
17810        let aux_len = if text.is_empty() {
17811            0
17812        } else {
17813            (text.len() - 1) / r as usize + 1
17814        };
17815        let mut rust_u = vec![0; text.len()];
17816        let mut rust_a = vec![0; text.len()];
17817        let mut rust_i = vec![0; aux_len];
17818        let mut c_u = vec![0; text.len()];
17819        let mut c_a = vec![0; text.len()];
17820        let mut c_i = vec![0; aux_len];
17821
17822        let rust_rc = libsais16_bwt_aux(text, &mut rust_u, &mut rust_a, 0, None, r, &mut rust_i);
17823        let c_rc = unsafe {
17824            probe_public_libsais16_bwt_aux(
17825                text.as_ptr(),
17826                c_u.as_mut_ptr(),
17827                c_a.as_mut_ptr(),
17828                text.len() as SaSint,
17829                0,
17830                r,
17831                c_i.as_mut_ptr(),
17832            )
17833        };
17834
17835        assert_eq!(rust_rc, c_rc);
17836        assert_eq!(rust_u, c_u);
17837        assert_eq!(rust_i, c_i);
17838    }
17839
17840    fn assert_libsais16_freq_outputs_match_c(text: &[u16], gsa_text: &[u16]) {
17841        let mut rust_sa = vec![0; text.len()];
17842        let mut c_sa = vec![0; text.len()];
17843        let mut rust_freq = vec![-1; ALPHABET_SIZE];
17844        let mut c_freq = vec![-1; ALPHABET_SIZE];
17845
17846        let rust_rc = libsais16(text, &mut rust_sa, 0, Some(&mut rust_freq));
17847        let c_rc = unsafe {
17848            probe_public_libsais16_freq(
17849                text.as_ptr(),
17850                c_sa.as_mut_ptr(),
17851                text.len() as SaSint,
17852                0,
17853                c_freq.as_mut_ptr(),
17854            )
17855        };
17856        assert_eq!(rust_rc, c_rc);
17857        assert_eq!(rust_sa, c_sa);
17858        assert_eq!(rust_freq, c_freq);
17859
17860        let mut rust_gsa = vec![0; gsa_text.len()];
17861        let mut c_gsa = vec![0; gsa_text.len()];
17862        rust_freq.fill(-1);
17863        c_freq.fill(-1);
17864        let rust_rc = libsais16_gsa(gsa_text, &mut rust_gsa, 0, Some(&mut rust_freq));
17865        let c_rc = unsafe {
17866            probe_public_libsais16_gsa_freq(
17867                gsa_text.as_ptr(),
17868                c_gsa.as_mut_ptr(),
17869                gsa_text.len() as SaSint,
17870                0,
17871                c_freq.as_mut_ptr(),
17872            )
17873        };
17874        assert_eq!(rust_rc, c_rc);
17875        assert_eq!(rust_gsa, c_gsa);
17876        assert_eq!(rust_freq, c_freq);
17877
17878        let mut rust_u = vec![0; text.len()];
17879        let mut rust_a = vec![0; text.len()];
17880        let mut c_u = vec![0; text.len()];
17881        let mut c_a = vec![0; text.len()];
17882        rust_freq.fill(-1);
17883        c_freq.fill(-1);
17884        let rust_rc = libsais16_bwt(text, &mut rust_u, &mut rust_a, 0, Some(&mut rust_freq));
17885        let c_rc = unsafe {
17886            probe_public_libsais16_bwt_freq(
17887                text.as_ptr(),
17888                c_u.as_mut_ptr(),
17889                c_a.as_mut_ptr(),
17890                text.len() as SaSint,
17891                0,
17892                c_freq.as_mut_ptr(),
17893            )
17894        };
17895        assert_eq!(rust_rc, c_rc);
17896        assert_eq!(rust_u, c_u);
17897        assert_eq!(rust_freq, c_freq);
17898
17899        let r = 4;
17900        let aux_len = (text.len() - 1) / r as usize + 1;
17901        let mut rust_i = vec![0; aux_len];
17902        let mut c_i = vec![0; aux_len];
17903        rust_freq.fill(-1);
17904        c_freq.fill(-1);
17905        let rust_rc = libsais16_bwt_aux(
17906            text,
17907            &mut rust_u,
17908            &mut rust_a,
17909            0,
17910            Some(&mut rust_freq),
17911            r,
17912            &mut rust_i,
17913        );
17914        let c_rc = unsafe {
17915            probe_public_libsais16_bwt_aux_freq(
17916                text.as_ptr(),
17917                c_u.as_mut_ptr(),
17918                c_a.as_mut_ptr(),
17919                text.len() as SaSint,
17920                0,
17921                c_freq.as_mut_ptr(),
17922                r,
17923                c_i.as_mut_ptr(),
17924            )
17925        };
17926        assert_eq!(rust_rc, c_rc);
17927        assert_eq!(rust_u, c_u);
17928        assert_eq!(rust_i, c_i);
17929        assert_eq!(rust_freq, c_freq);
17930    }
17931
17932    fn assert_libsais16_unbwt_matches_c(text: &[u16]) {
17933        let mut bwt = vec![0; text.len()];
17934        let mut work = vec![0; text.len()];
17935        let primary = libsais16_bwt(text, &mut bwt, &mut work, 0, None);
17936        assert!(primary >= 0);
17937
17938        let mut rust_u = vec![0; text.len()];
17939        let mut rust_a = vec![0; text.len() + 1];
17940        let mut c_u = vec![0; text.len()];
17941        let mut c_a = vec![0; text.len() + 1];
17942
17943        let rust_rc = libsais16_unbwt(&bwt, &mut rust_u, &mut rust_a, None, primary);
17944        let c_rc = unsafe {
17945            probe_public_libsais16_unbwt(
17946                bwt.as_ptr(),
17947                c_u.as_mut_ptr(),
17948                c_a.as_mut_ptr(),
17949                bwt.len() as SaSint,
17950                primary,
17951            )
17952        };
17953
17954        assert_eq!(rust_rc, c_rc);
17955        assert_eq!(rust_u, c_u);
17956        assert_eq!(rust_u, text);
17957    }
17958
17959    fn assert_libsais16_unbwt_aux_matches_c(text: &[u16], r: SaSint) {
17960        let mut bwt = vec![0; text.len()];
17961        let mut work = vec![0; text.len()];
17962        let mut aux = vec![0; (text.len() - 1) / r as usize + 1];
17963        let bwt_rc = libsais16_bwt_aux(text, &mut bwt, &mut work, 0, None, r, &mut aux);
17964        assert_eq!(bwt_rc, 0);
17965
17966        let mut rust_u = vec![0; text.len()];
17967        let mut rust_a = vec![0; text.len() + 1];
17968        let mut c_u = vec![0; text.len()];
17969        let mut c_a = vec![0; text.len() + 1];
17970
17971        let rust_rc = libsais16_unbwt_aux(&bwt, &mut rust_u, &mut rust_a, None, r, &aux);
17972        let c_rc = unsafe {
17973            probe_public_libsais16_unbwt_aux(
17974                bwt.as_ptr(),
17975                c_u.as_mut_ptr(),
17976                c_a.as_mut_ptr(),
17977                bwt.len() as SaSint,
17978                r,
17979                aux.as_ptr(),
17980            )
17981        };
17982
17983        assert_eq!(rust_rc, c_rc);
17984        assert_eq!(rust_u, c_u);
17985        assert_eq!(rust_u, text);
17986    }
17987
17988    fn assert_libsais16_unbwt_freq_matches_c(text: &[u16]) {
17989        let mut freq = vec![0; ALPHABET_SIZE];
17990        let mut bwt = vec![0; text.len()];
17991        let mut work = vec![0; text.len()];
17992        let primary = libsais16_bwt(text, &mut bwt, &mut work, 0, Some(&mut freq));
17993        assert!(primary >= 0);
17994
17995        let mut rust_u = vec![0; text.len()];
17996        let mut rust_a = vec![0; text.len() + 1];
17997        let mut c_u = vec![0; text.len()];
17998        let mut c_a = vec![0; text.len() + 1];
17999
18000        let rust_rc = libsais16_unbwt(&bwt, &mut rust_u, &mut rust_a, Some(&freq), primary);
18001        let c_rc = unsafe {
18002            probe_public_libsais16_unbwt_freq(
18003                bwt.as_ptr(),
18004                c_u.as_mut_ptr(),
18005                c_a.as_mut_ptr(),
18006                bwt.len() as SaSint,
18007                freq.as_ptr(),
18008                primary,
18009            )
18010        };
18011        assert_eq!(rust_rc, c_rc);
18012        assert_eq!(rust_u, c_u);
18013        assert_eq!(rust_u, text);
18014
18015        let r = 4;
18016        let mut aux = vec![0; (text.len() - 1) / r as usize + 1];
18017        let bwt_rc = libsais16_bwt_aux(text, &mut bwt, &mut work, 0, Some(&mut freq), r, &mut aux);
18018        assert_eq!(bwt_rc, 0);
18019
18020        rust_u.fill(0);
18021        rust_a.fill(0);
18022        c_u.fill(0);
18023        c_a.fill(0);
18024        let rust_rc = libsais16_unbwt_aux(&bwt, &mut rust_u, &mut rust_a, Some(&freq), r, &aux);
18025        let c_rc = unsafe {
18026            probe_public_libsais16_unbwt_aux_freq(
18027                bwt.as_ptr(),
18028                c_u.as_mut_ptr(),
18029                c_a.as_mut_ptr(),
18030                bwt.len() as SaSint,
18031                freq.as_ptr(),
18032                r,
18033                aux.as_ptr(),
18034            )
18035        };
18036        assert_eq!(rust_rc, c_rc);
18037        assert_eq!(rust_u, c_u);
18038        assert_eq!(rust_u, text);
18039    }
18040
18041    fn assert_libsais16_plcp_lcp_matches_c(text: &[u16]) {
18042        let mut sa = vec![0; text.len()];
18043        assert_eq!(libsais16(text, &mut sa, 0, None), 0);
18044
18045        let mut rust_plcp = vec![0; text.len()];
18046        let mut c_plcp = vec![0; text.len()];
18047        let rust_rc = libsais16_plcp(text, &sa, &mut rust_plcp);
18048        let c_rc = unsafe {
18049            probe_public_libsais16_plcp(
18050                text.as_ptr(),
18051                sa.as_ptr(),
18052                c_plcp.as_mut_ptr(),
18053                text.len() as SaSint,
18054            )
18055        };
18056        assert_eq!(rust_rc, c_rc);
18057        assert_eq!(rust_plcp, c_plcp);
18058
18059        let mut rust_lcp = vec![0; text.len()];
18060        let mut c_lcp = vec![0; text.len()];
18061        let rust_rc = libsais16_lcp(&rust_plcp, &sa, &mut rust_lcp);
18062        let c_rc = unsafe {
18063            probe_public_libsais16_lcp(
18064                c_plcp.as_ptr(),
18065                sa.as_ptr(),
18066                c_lcp.as_mut_ptr(),
18067                text.len() as SaSint,
18068            )
18069        };
18070        assert_eq!(rust_rc, c_rc);
18071        assert_eq!(rust_lcp, c_lcp);
18072    }
18073
18074    fn assert_libsais16_plcp_gsa_matches_c(text: &[u16]) {
18075        let mut sa = vec![0; text.len()];
18076        assert_eq!(libsais16_gsa(text, &mut sa, 0, None), 0);
18077
18078        let mut rust_plcp = vec![0; text.len()];
18079        let mut c_plcp = vec![0; text.len()];
18080        let rust_rc = libsais16_plcp_gsa(text, &sa, &mut rust_plcp);
18081        let c_rc = unsafe {
18082            probe_public_libsais16_plcp_gsa(
18083                text.as_ptr(),
18084                sa.as_ptr(),
18085                c_plcp.as_mut_ptr(),
18086                text.len() as SaSint,
18087            )
18088        };
18089        assert_eq!(rust_rc, c_rc);
18090        assert_eq!(rust_plcp, c_plcp);
18091    }
18092
18093    #[test]
18094    fn public_libsais16_matches_upstream_c() {
18095        for text in [
18096            [].as_slice(),
18097            &[1][..],
18098            &[2, 1, 3, 1, 2, 0],
18099            &[2, 1, 3, 1, 2, 4, 1, 0],
18100            &[65_535, 1, 65_534, 1, 0],
18101            &[7, 7, 7, 7, 7, 0],
18102        ] {
18103            assert_libsais16_matches_c(text);
18104        }
18105    }
18106
18107    #[test]
18108    fn public_libsais16_bwt_matches_upstream_c() {
18109        for text in [
18110            [].as_slice(),
18111            &[1][..],
18112            &[2, 1, 3, 1, 2, 0],
18113            &[2, 1, 3, 1, 2, 4, 1, 0],
18114            &[65_535, 1, 65_534, 1, 0],
18115            &[7, 7, 7, 7, 7, 0],
18116        ] {
18117            assert_libsais16_bwt_matches_c(text);
18118        }
18119    }
18120
18121    #[test]
18122    fn public_libsais16_gsa_matches_upstream_c() {
18123        for text in [&[0][..], &[2, 1, 0], &[2, 1, 0, 3, 1, 0], &[7, 7, 0, 7, 0]] {
18124            assert_libsais16_gsa_matches_c(text);
18125        }
18126    }
18127
18128    #[test]
18129    fn public_libsais16_int_matches_upstream_c() {
18130        for (text, k) in [
18131            (&[][..], 0),
18132            (&[0][..], 1),
18133            (&[1, 2, 1, 0][..], 3),
18134            (&[2, 1, 2, 1, 0][..], 3),
18135            (&[3, 3, 3, 2, 1, 0][..], 4),
18136        ] {
18137            assert_libsais16_int_matches_c(text, k);
18138        }
18139    }
18140
18141    #[test]
18142    fn public_libsais16_plcp_lcp_matches_upstream_c() {
18143        for text in [
18144            &[2, 1, 3, 1, 2, 0][..],
18145            &[2, 1, 3, 1, 2, 4, 1, 0],
18146            &[65_535, 1, 65_534, 1, 0],
18147            &[7, 7, 7, 7, 7, 0],
18148        ] {
18149            assert_libsais16_plcp_lcp_matches_c(text);
18150        }
18151    }
18152
18153    #[test]
18154    fn public_libsais16_plcp_gsa_matches_upstream_c() {
18155        for text in [&[0][..], &[2, 1, 0], &[2, 1, 0, 3, 1, 0], &[7, 7, 0, 7, 0]] {
18156            assert_libsais16_plcp_gsa_matches_c(text);
18157        }
18158    }
18159
18160    #[test]
18161    fn public_libsais16_bwt_aux_matches_upstream_c() {
18162        for text in [
18163            &[2, 1, 3, 1, 2, 0][..],
18164            &[2, 1, 3, 1, 2, 4, 1, 0],
18165            &[65_535, 1, 65_534, 1, 0],
18166            &[7, 7, 7, 7, 7, 0],
18167        ] {
18168            assert_libsais16_bwt_aux_matches_c(text, 4);
18169        }
18170    }
18171
18172    #[test]
18173    fn public_libsais16_frequency_outputs_match_upstream_c() {
18174        assert_libsais16_freq_outputs_match_c(&[65_535, 1, 2, 1, 0], &[65_535, 1, 0, 2, 1, 0]);
18175    }
18176
18177    #[test]
18178    fn public_libsais16_unbwt_with_frequency_matches_upstream_c() {
18179        assert_libsais16_unbwt_freq_matches_c(&[65_535, 1, 2, 1, 0]);
18180    }
18181
18182    #[test]
18183    fn public_libsais16_unbwt_matches_upstream_c() {
18184        for text in [
18185            &[1][..],
18186            &[2, 1, 3, 1, 2, 0],
18187            &[2, 1, 3, 1, 2, 4, 1, 0],
18188            &[65_535, 1, 65_534, 1, 0],
18189            &[7, 7, 7, 7, 7, 0],
18190        ] {
18191            assert_libsais16_unbwt_matches_c(text);
18192        }
18193    }
18194
18195    #[test]
18196    fn public_libsais16_unbwt_aux_matches_upstream_c() {
18197        for text in [
18198            &[2, 1, 3, 1, 2, 0][..],
18199            &[2, 1, 3, 1, 2, 4, 1, 0],
18200            &[65_535, 1, 65_534, 1, 0],
18201            &[7, 7, 7, 7, 7, 0],
18202        ] {
18203            assert_libsais16_unbwt_aux_matches_c(text, 4);
18204        }
18205    }
18206
18207    #[test]
18208    fn public_libsais16_unbwt_aux_exercises_decode_dispatch_cases() {
18209        for len in [2usize, 5, 9, 13, 17, 21, 25, 29, 33, 37] {
18210            let text = (0..len)
18211                .map(|i| ((i * 37 + 11) % 65_535 + 1) as u16)
18212                .collect::<Vec<_>>();
18213            assert_libsais16_unbwt_aux_matches_c(&text, 4);
18214        }
18215    }
18216
18217    #[test]
18218    fn libsais16_lcp_helpers_reject_invalid_suffix_entries() {
18219        let text = [2, 1, 2, 1, 0];
18220        let mut plcp = vec![0; text.len()];
18221        let mut lcp = vec![0; text.len()];
18222
18223        assert_eq!(libsais16_plcp(&text, &[0, 1, -1, 3, 4], &mut plcp), -1);
18224        assert_eq!(libsais16_plcp(&text, &[0, 1, 2, 3, 5], &mut plcp), -1);
18225        assert_eq!(libsais16_lcp(&plcp, &[0, 1, -1, 3, 4], &mut lcp), -1);
18226        assert_eq!(libsais16_lcp(&plcp, &[0, 1, 2, 3, 5], &mut lcp), -1);
18227    }
18228
18229    #[test]
18230    fn libsais16_rejects_invalid_public_arguments() {
18231        let text = [2, 1, 3, 1, 2, 0];
18232        let int_text = [1, 2, 1, 0];
18233        let mut int_text_for_short_sa = int_text.to_vec();
18234        let mut int_text_for_negative_fs = int_text.to_vec();
18235        let mut sa = vec![0; text.len() - 1];
18236        let mut int_sa = vec![0; int_text.len() - 1];
18237        let mut full_int_sa = vec![0; int_text.len()];
18238        let mut freq = vec![0; ALPHABET_SIZE - 1];
18239        let mut u = vec![0; text.len() - 1];
18240        let mut a = vec![0; text.len() - 1];
18241        let mut full_u = vec![0; text.len()];
18242        let mut full_a = vec![0; text.len()];
18243        let mut aux = vec![0; 1];
18244
18245        assert_eq!(libsais16(&text, &mut sa, 0, None), -1);
18246        assert_eq!(libsais16(&text, &mut full_a, 0, Some(&mut freq)), -1);
18247        assert_eq!(libsais16_gsa(&[1, 2, 3], &mut full_a[..3], 0, None), -1);
18248        assert_eq!(
18249            libsais16_int(&mut int_text_for_short_sa, &mut int_sa, 3, 0),
18250            -1
18251        );
18252        assert_eq!(
18253            libsais16_int(&mut int_text_for_negative_fs, &mut full_int_sa, 3, -1),
18254            -1
18255        );
18256        assert_eq!(libsais16_bwt(&text, &mut u, &mut full_a, 0, None), -1);
18257        assert_eq!(libsais16_bwt(&text, &mut full_u, &mut a, 0, None), -1);
18258        assert_eq!(
18259            libsais16_bwt_aux(&text, &mut full_u, &mut full_a, 0, None, 0, &mut aux),
18260            -1
18261        );
18262        assert_eq!(
18263            libsais16_bwt_aux(&text, &mut full_u, &mut full_a, 0, None, 3, &mut aux),
18264            -1
18265        );
18266        assert_eq!(
18267            libsais16_bwt_aux(&text, &mut full_u, &mut full_a, 0, None, 4, &mut aux),
18268            -1
18269        );
18270        assert_eq!(create_ctx_omp(-1), None);
18271        assert_eq!(unbwt_create_ctx_omp(-1), None);
18272    }
18273
18274    #[test]
18275    fn libsais16_unbwt_rejects_invalid_public_arguments() {
18276        let text = [2, 1, 3, 1, 2, 0];
18277        let mut bwt = vec![0; text.len()];
18278        let mut work = vec![0; text.len()];
18279        let primary = libsais16_bwt(&text, &mut bwt, &mut work, 0, None);
18280
18281        let mut short_u = vec![0; text.len() - 1];
18282        let mut short_a = vec![0; text.len() - 1];
18283        let mut full_u = vec![0; text.len()];
18284        let mut full_a = vec![0; text.len()];
18285        let short_freq = vec![0; ALPHABET_SIZE - 1];
18286        let short_aux = vec![primary];
18287        let bad_aux = vec![0, 0];
18288        let good_aux = vec![primary, 4];
18289
18290        assert_eq!(
18291            libsais16_unbwt(&bwt, &mut short_u, &mut full_a, None, primary),
18292            -1
18293        );
18294        assert_eq!(
18295            libsais16_unbwt(&bwt, &mut full_u, &mut short_a, None, primary),
18296            -1
18297        );
18298        assert_eq!(
18299            libsais16_unbwt(&bwt, &mut full_u, &mut full_a, Some(&short_freq), primary),
18300            -1
18301        );
18302        assert_eq!(libsais16_unbwt(&bwt, &mut full_u, &mut full_a, None, 0), -1);
18303        assert_eq!(
18304            libsais16_unbwt(
18305                &bwt,
18306                &mut full_u,
18307                &mut full_a,
18308                None,
18309                text.len() as SaSint + 1
18310            ),
18311            -1
18312        );
18313        assert_eq!(
18314            libsais16_unbwt_aux(&bwt, &mut full_u, &mut full_a, None, 0, &good_aux),
18315            -1
18316        );
18317        assert_eq!(
18318            libsais16_unbwt_aux(&bwt, &mut full_u, &mut full_a, None, 3, &good_aux),
18319            -1
18320        );
18321        assert_eq!(
18322            libsais16_unbwt_aux(&bwt, &mut full_u, &mut full_a, None, 4, &short_aux),
18323            -1
18324        );
18325        assert_eq!(
18326            libsais16_unbwt_aux(&bwt, &mut full_u, &mut full_a, None, 4, &bad_aux),
18327            -1
18328        );
18329    }
18330
18331    #[test]
18332    fn libsais16_ctx_rejects_invalid_public_arguments() {
18333        let text = [2, 1, 3, 1, 2, 0];
18334        let mut ctx = create_ctx().unwrap();
18335        let mut sa = vec![0; text.len() - 1];
18336        let mut freq = vec![0; ALPHABET_SIZE - 1];
18337        let mut u = vec![0; text.len() - 1];
18338        let mut a = vec![0; text.len() - 1];
18339        let mut full_u = vec![0; text.len()];
18340        let mut full_a = vec![0; text.len()];
18341        let mut aux = vec![0; 1];
18342
18343        assert_eq!(libsais16_ctx(&mut ctx, &text, &mut sa, 0, None), -1);
18344        assert_eq!(
18345            libsais16_ctx(&mut ctx, &text, &mut full_a, 0, Some(&mut freq)),
18346            -1
18347        );
18348        assert_eq!(
18349            libsais16_gsa_ctx(&mut ctx, &[1, 2, 3], &mut full_a[..3], 0, None),
18350            -1
18351        );
18352        assert_eq!(
18353            libsais16_bwt_ctx(&mut ctx, &text, &mut u, &mut full_a, 0, None),
18354            -1
18355        );
18356        assert_eq!(
18357            libsais16_bwt_ctx(&mut ctx, &text, &mut full_u, &mut a, 0, None),
18358            -1
18359        );
18360        assert_eq!(
18361            libsais16_bwt_aux_ctx(
18362                &mut ctx,
18363                &text,
18364                &mut full_u,
18365                &mut full_a,
18366                0,
18367                None,
18368                0,
18369                &mut aux
18370            ),
18371            -1
18372        );
18373        assert_eq!(
18374            libsais16_bwt_aux_ctx(
18375                &mut ctx,
18376                &text,
18377                &mut full_u,
18378                &mut full_a,
18379                0,
18380                None,
18381                3,
18382                &mut aux
18383            ),
18384            -1
18385        );
18386        assert_eq!(
18387            libsais16_bwt_aux_ctx(
18388                &mut ctx,
18389                &text,
18390                &mut full_u,
18391                &mut full_a,
18392                0,
18393                None,
18394                4,
18395                &mut aux
18396            ),
18397            -1
18398        );
18399
18400        let mut default_ctx = Context::default();
18401        assert_eq!(
18402            libsais16_ctx(&mut default_ctx, &text, &mut full_a, 0, None),
18403            -2
18404        );
18405
18406        let mut bad_bucket_ctx = create_ctx().unwrap();
18407        bad_bucket_ctx.buckets.clear();
18408        assert_eq!(
18409            libsais16_ctx(&mut bad_bucket_ctx, &text, &mut full_a, 0, None),
18410            -2
18411        );
18412
18413        let mut short_thread_state_ctx = create_ctx_omp(2).unwrap();
18414        short_thread_state_ctx
18415            .thread_state
18416            .as_mut()
18417            .unwrap()
18418            .truncate(1);
18419        assert_eq!(
18420            libsais16_ctx(&mut short_thread_state_ctx, &text, &mut full_a, 0, None),
18421            -2
18422        );
18423    }
18424
18425    #[test]
18426    fn libsais16_unbwt_ctx_rejects_invalid_public_arguments() {
18427        let text = [2, 1, 3, 1, 2, 0];
18428        let mut bwt = vec![0; text.len()];
18429        let mut work = vec![0; text.len()];
18430        let primary = libsais16_bwt(&text, &mut bwt, &mut work, 0, None);
18431        let mut ctx = unbwt_create_ctx().unwrap();
18432
18433        let mut short_u = vec![0; text.len() - 1];
18434        let mut short_a = vec![0; text.len() - 1];
18435        let mut full_u = vec![0; text.len()];
18436        let mut full_a = vec![0; text.len()];
18437        let short_freq = vec![0; ALPHABET_SIZE - 1];
18438        let short_aux = vec![primary];
18439        let bad_aux = vec![0, 0];
18440        let good_aux = vec![primary, 4];
18441
18442        assert_eq!(
18443            libsais16_unbwt_ctx(&mut ctx, &bwt, &mut short_u, &mut full_a, None, primary),
18444            -1
18445        );
18446        assert_eq!(
18447            libsais16_unbwt_ctx(&mut ctx, &bwt, &mut full_u, &mut short_a, None, primary),
18448            -1
18449        );
18450        assert_eq!(
18451            libsais16_unbwt_ctx(
18452                &mut ctx,
18453                &bwt,
18454                &mut full_u,
18455                &mut full_a,
18456                Some(&short_freq),
18457                primary
18458            ),
18459            -1
18460        );
18461        assert_eq!(
18462            libsais16_unbwt_ctx(&mut ctx, &bwt, &mut full_u, &mut full_a, None, 0),
18463            -1
18464        );
18465        assert_eq!(
18466            libsais16_unbwt_aux_ctx(&mut ctx, &bwt, &mut full_u, &mut full_a, None, 0, &good_aux),
18467            -1
18468        );
18469        assert_eq!(
18470            libsais16_unbwt_aux_ctx(&mut ctx, &bwt, &mut full_u, &mut full_a, None, 3, &good_aux),
18471            -1
18472        );
18473        assert_eq!(
18474            libsais16_unbwt_aux_ctx(
18475                &mut ctx,
18476                &bwt,
18477                &mut full_u,
18478                &mut full_a,
18479                None,
18480                4,
18481                &short_aux
18482            ),
18483            -1
18484        );
18485        assert_eq!(
18486            libsais16_unbwt_aux_ctx(&mut ctx, &bwt, &mut full_u, &mut full_a, None, 4, &bad_aux),
18487            -1
18488        );
18489    }
18490
18491    #[test]
18492    fn libsais16_context_wrappers_match_direct_calls() {
18493        let text = [2, 1, 3, 1, 2, 0];
18494        let mut ctx = create_ctx().unwrap();
18495
18496        let mut direct_sa = vec![0; text.len()];
18497        let mut ctx_sa = vec![0; text.len()];
18498        assert_eq!(libsais16(&text, &mut direct_sa, 0, None), 0);
18499        assert_eq!(libsais16_ctx(&mut ctx, &text, &mut ctx_sa, 0, None), 0);
18500        assert_eq!(ctx_sa, direct_sa);
18501
18502        let mut direct_bwt = vec![0; text.len()];
18503        let mut direct_work = vec![0; text.len()];
18504        let mut ctx_bwt = vec![0; text.len()];
18505        let mut ctx_work = vec![0; text.len()];
18506        assert_eq!(
18507            libsais16_bwt(&text, &mut direct_bwt, &mut direct_work, 0, None),
18508            libsais16_bwt_ctx(&mut ctx, &text, &mut ctx_bwt, &mut ctx_work, 0, None)
18509        );
18510        assert_eq!(ctx_bwt, direct_bwt);
18511
18512        let mut direct_aux = vec![0; 2];
18513        let mut ctx_aux = vec![0; 2];
18514        assert_eq!(
18515            libsais16_bwt_aux(
18516                &text,
18517                &mut direct_bwt,
18518                &mut direct_work,
18519                0,
18520                None,
18521                4,
18522                &mut direct_aux
18523            ),
18524            libsais16_bwt_aux_ctx(
18525                &mut ctx,
18526                &text,
18527                &mut ctx_bwt,
18528                &mut ctx_work,
18529                0,
18530                None,
18531                4,
18532                &mut ctx_aux
18533            )
18534        );
18535        assert_eq!(ctx_bwt, direct_bwt);
18536        assert_eq!(ctx_aux, direct_aux);
18537    }
18538
18539    #[test]
18540    fn libsais16_unbwt_context_wrappers_match_direct_calls() {
18541        let text = [2, 1, 3, 1, 2, 0];
18542        let mut bwt = vec![0; text.len()];
18543        let mut work = vec![0; text.len()];
18544        let primary = libsais16_bwt(&text, &mut bwt, &mut work, 0, None);
18545
18546        let mut ctx = unbwt_create_ctx().unwrap();
18547        let mut direct = vec![0; text.len()];
18548        let mut direct_work = vec![0; text.len()];
18549        let mut via_ctx = vec![0; text.len()];
18550        let mut ctx_work = vec![0; text.len()];
18551
18552        assert_eq!(
18553            libsais16_unbwt(&bwt, &mut direct, &mut direct_work, None, primary),
18554            0
18555        );
18556        assert_eq!(
18557            libsais16_unbwt_ctx(&mut ctx, &bwt, &mut via_ctx, &mut ctx_work, None, primary),
18558            0
18559        );
18560        assert_eq!(via_ctx, direct);
18561
18562        let mut aux = vec![0; 2];
18563        assert_eq!(
18564            libsais16_bwt_aux(&text, &mut bwt, &mut work, 0, None, 4, &mut aux),
18565            0
18566        );
18567        assert_eq!(
18568            libsais16_unbwt_aux(&bwt, &mut direct, &mut direct_work, None, 4, &aux),
18569            0
18570        );
18571        assert_eq!(
18572            libsais16_unbwt_aux_ctx(&mut ctx, &bwt, &mut via_ctx, &mut ctx_work, None, 4, &aux),
18573            0
18574        );
18575        assert_eq!(via_ctx, direct);
18576    }
18577
18578    #[test]
18579    fn libsais16_ctx_frequency_wrappers_match_direct_calls() {
18580        let text = [2, 1, 3, 1, 2, 0];
18581        let gsa_text = [2, 1, 0, 3, 1, 0];
18582        let mut ctx = create_ctx().unwrap();
18583
18584        let mut direct_sa = vec![0; text.len()];
18585        let mut ctx_sa = vec![0; text.len()];
18586        let mut direct_freq = vec![-1; ALPHABET_SIZE];
18587        let mut ctx_freq = vec![-1; ALPHABET_SIZE];
18588        assert_eq!(
18589            libsais16(&text, &mut direct_sa, 0, Some(&mut direct_freq)),
18590            0
18591        );
18592        assert_eq!(
18593            libsais16_ctx(&mut ctx, &text, &mut ctx_sa, 0, Some(&mut ctx_freq)),
18594            0
18595        );
18596        assert_eq!(ctx_sa, direct_sa);
18597        assert_eq!(ctx_freq, direct_freq);
18598
18599        let mut direct_gsa = vec![0; gsa_text.len()];
18600        let mut ctx_gsa = vec![0; gsa_text.len()];
18601        direct_freq.fill(-1);
18602        ctx_freq.fill(-1);
18603        assert_eq!(
18604            libsais16_gsa(&gsa_text, &mut direct_gsa, 0, Some(&mut direct_freq)),
18605            0
18606        );
18607        assert_eq!(
18608            libsais16_gsa_ctx(&mut ctx, &gsa_text, &mut ctx_gsa, 0, Some(&mut ctx_freq)),
18609            0
18610        );
18611        assert_eq!(ctx_gsa, direct_gsa);
18612        assert_eq!(ctx_freq, direct_freq);
18613
18614        let mut direct_bwt = vec![0; text.len()];
18615        let mut direct_work = vec![0; text.len()];
18616        let mut ctx_bwt = vec![0; text.len()];
18617        let mut ctx_work = vec![0; text.len()];
18618        direct_freq.fill(-1);
18619        ctx_freq.fill(-1);
18620        assert_eq!(
18621            libsais16_bwt(
18622                &text,
18623                &mut direct_bwt,
18624                &mut direct_work,
18625                0,
18626                Some(&mut direct_freq)
18627            ),
18628            libsais16_bwt_ctx(
18629                &mut ctx,
18630                &text,
18631                &mut ctx_bwt,
18632                &mut ctx_work,
18633                0,
18634                Some(&mut ctx_freq)
18635            )
18636        );
18637        assert_eq!(ctx_bwt, direct_bwt);
18638        assert_eq!(ctx_freq, direct_freq);
18639
18640        let mut direct_aux = vec![0; 2];
18641        let mut ctx_aux = vec![0; 2];
18642        direct_freq.fill(-1);
18643        ctx_freq.fill(-1);
18644        assert_eq!(
18645            libsais16_bwt_aux(
18646                &text,
18647                &mut direct_bwt,
18648                &mut direct_work,
18649                0,
18650                Some(&mut direct_freq),
18651                4,
18652                &mut direct_aux
18653            ),
18654            libsais16_bwt_aux_ctx(
18655                &mut ctx,
18656                &text,
18657                &mut ctx_bwt,
18658                &mut ctx_work,
18659                0,
18660                Some(&mut ctx_freq),
18661                4,
18662                &mut ctx_aux
18663            )
18664        );
18665        assert_eq!(ctx_bwt, direct_bwt);
18666        assert_eq!(ctx_aux, direct_aux);
18667        assert_eq!(ctx_freq, direct_freq);
18668    }
18669
18670    #[test]
18671    fn libsais16_unbwt_ctx_frequency_wrappers_match_direct_calls() {
18672        let text = [2, 1, 3, 1, 2, 0];
18673        let mut freq = vec![0; ALPHABET_SIZE];
18674        let mut bwt = vec![0; text.len()];
18675        let mut work = vec![0; text.len()];
18676        let primary = libsais16_bwt(&text, &mut bwt, &mut work, 0, Some(&mut freq));
18677        assert!(primary >= 0);
18678
18679        let mut ctx = unbwt_create_ctx().unwrap();
18680        let mut direct = vec![0; text.len()];
18681        let mut direct_work = vec![0; text.len() + 1];
18682        let mut via_ctx = vec![0; text.len()];
18683        let mut ctx_work = vec![0; text.len() + 1];
18684        assert_eq!(
18685            libsais16_unbwt(&bwt, &mut direct, &mut direct_work, Some(&freq), primary),
18686            libsais16_unbwt_ctx(
18687                &mut ctx,
18688                &bwt,
18689                &mut via_ctx,
18690                &mut ctx_work,
18691                Some(&freq),
18692                primary
18693            )
18694        );
18695        assert_eq!(via_ctx, direct);
18696        assert_eq!(via_ctx, text);
18697
18698        let mut aux = vec![0; (text.len() - 1) / 4 + 1];
18699        assert_eq!(
18700            libsais16_bwt_aux(&text, &mut bwt, &mut work, 0, Some(&mut freq), 4, &mut aux),
18701            0
18702        );
18703        direct.fill(0);
18704        direct_work.fill(0);
18705        via_ctx.fill(0);
18706        ctx_work.fill(0);
18707        assert_eq!(
18708            libsais16_unbwt_aux(&bwt, &mut direct, &mut direct_work, Some(&freq), 4, &aux),
18709            libsais16_unbwt_aux_ctx(
18710                &mut ctx,
18711                &bwt,
18712                &mut via_ctx,
18713                &mut ctx_work,
18714                Some(&freq),
18715                4,
18716                &aux
18717            )
18718        );
18719        assert_eq!(via_ctx, direct);
18720        assert_eq!(via_ctx, text);
18721    }
18722
18723    #[test]
18724    fn libsais16_omp_wrappers_match_direct_calls_and_reject_negative_threads() {
18725        let text = [2, 1, 3, 1, 2, 0];
18726        let gsa_text = [2, 1, 0, 3, 1, 0];
18727        let mut direct_sa = vec![0; text.len()];
18728        let mut omp_sa = vec![0; text.len()];
18729        assert_eq!(libsais16(&text, &mut direct_sa, 0, None), 0);
18730        assert_eq!(libsais16_omp(&text, &mut omp_sa, 0, None, 2), 0);
18731        assert_eq!(omp_sa, direct_sa);
18732        assert_eq!(libsais16_omp(&text, &mut omp_sa, 0, None, -1), -1);
18733
18734        let mut direct_gsa = vec![0; gsa_text.len()];
18735        let mut omp_gsa = vec![0; gsa_text.len()];
18736        assert_eq!(libsais16_gsa(&gsa_text, &mut direct_gsa, 0, None), 0);
18737        assert_eq!(libsais16_gsa_omp(&gsa_text, &mut omp_gsa, 0, None, 2), 0);
18738        assert_eq!(omp_gsa, direct_gsa);
18739        assert_eq!(libsais16_gsa_omp(&gsa_text, &mut omp_gsa, 0, None, -1), -1);
18740
18741        let int_text = [1, 2, 1, 0];
18742        let mut direct_int_text = int_text.to_vec();
18743        let mut omp_int_text = int_text.to_vec();
18744        let mut direct_int_sa = vec![0; int_text.len()];
18745        let mut omp_int_sa = vec![0; int_text.len()];
18746        assert_eq!(
18747            libsais16_int(&mut direct_int_text, &mut direct_int_sa, 3, 0),
18748            0
18749        );
18750        assert_eq!(
18751            libsais16_int_omp(&mut omp_int_text, &mut omp_int_sa, 3, 0, 2),
18752            0
18753        );
18754        assert_eq!(omp_int_text, direct_int_text);
18755        assert_eq!(omp_int_sa, direct_int_sa);
18756        assert_eq!(
18757            libsais16_int_omp(&mut omp_int_text, &mut omp_int_sa, 3, 0, -1),
18758            -1
18759        );
18760
18761        let mut direct_bwt = vec![0; text.len()];
18762        let mut direct_work = vec![0; text.len()];
18763        let mut omp_bwt = vec![0; text.len()];
18764        let mut omp_work = vec![0; text.len()];
18765        assert_eq!(
18766            libsais16_bwt(&text, &mut direct_bwt, &mut direct_work, 0, None),
18767            libsais16_bwt_omp(&text, &mut omp_bwt, &mut omp_work, 0, None, 2)
18768        );
18769        assert_eq!(omp_bwt, direct_bwt);
18770        assert_eq!(
18771            libsais16_bwt_omp(&text, &mut omp_bwt, &mut omp_work, 0, None, -1),
18772            -1
18773        );
18774
18775        let mut direct_aux = vec![0; 2];
18776        let mut omp_aux = vec![0; 2];
18777        assert_eq!(
18778            libsais16_bwt_aux(
18779                &text,
18780                &mut direct_bwt,
18781                &mut direct_work,
18782                0,
18783                None,
18784                4,
18785                &mut direct_aux
18786            ),
18787            libsais16_bwt_aux_omp(
18788                &text,
18789                &mut omp_bwt,
18790                &mut omp_work,
18791                0,
18792                None,
18793                4,
18794                &mut omp_aux,
18795                2
18796            )
18797        );
18798        assert_eq!(omp_bwt, direct_bwt);
18799        assert_eq!(omp_aux, direct_aux);
18800        assert_eq!(
18801            libsais16_bwt_aux_omp(
18802                &text,
18803                &mut omp_bwt,
18804                &mut omp_work,
18805                0,
18806                None,
18807                4,
18808                &mut omp_aux,
18809                -1
18810            ),
18811            -1
18812        );
18813    }
18814
18815    #[test]
18816    fn libsais16_omp_frequency_wrappers_match_direct_calls() {
18817        let text = [2, 1, 3, 1, 2, 0];
18818        let gsa_text = [2, 1, 0, 3, 1, 0];
18819        let mut direct_sa = vec![0; text.len()];
18820        let mut omp_sa = vec![0; text.len()];
18821        let mut direct_freq = vec![-1; ALPHABET_SIZE];
18822        let mut omp_freq = vec![-1; ALPHABET_SIZE];
18823        assert_eq!(
18824            libsais16(&text, &mut direct_sa, 0, Some(&mut direct_freq)),
18825            0
18826        );
18827        assert_eq!(
18828            libsais16_omp(&text, &mut omp_sa, 0, Some(&mut omp_freq), 2),
18829            0
18830        );
18831        assert_eq!(omp_sa, direct_sa);
18832        assert_eq!(omp_freq, direct_freq);
18833
18834        let mut direct_gsa = vec![0; gsa_text.len()];
18835        let mut omp_gsa = vec![0; gsa_text.len()];
18836        direct_freq.fill(-1);
18837        omp_freq.fill(-1);
18838        assert_eq!(
18839            libsais16_gsa(&gsa_text, &mut direct_gsa, 0, Some(&mut direct_freq)),
18840            0
18841        );
18842        assert_eq!(
18843            libsais16_gsa_omp(&gsa_text, &mut omp_gsa, 0, Some(&mut omp_freq), 2),
18844            0
18845        );
18846        assert_eq!(omp_gsa, direct_gsa);
18847        assert_eq!(omp_freq, direct_freq);
18848
18849        let mut direct_bwt = vec![0; text.len()];
18850        let mut direct_work = vec![0; text.len()];
18851        let mut omp_bwt = vec![0; text.len()];
18852        let mut omp_work = vec![0; text.len()];
18853        direct_freq.fill(-1);
18854        omp_freq.fill(-1);
18855        assert_eq!(
18856            libsais16_bwt(
18857                &text,
18858                &mut direct_bwt,
18859                &mut direct_work,
18860                0,
18861                Some(&mut direct_freq)
18862            ),
18863            libsais16_bwt_omp(
18864                &text,
18865                &mut omp_bwt,
18866                &mut omp_work,
18867                0,
18868                Some(&mut omp_freq),
18869                2
18870            )
18871        );
18872        assert_eq!(omp_bwt, direct_bwt);
18873        assert_eq!(omp_freq, direct_freq);
18874
18875        let mut direct_aux = vec![0; 2];
18876        let mut omp_aux = vec![0; 2];
18877        direct_freq.fill(-1);
18878        omp_freq.fill(-1);
18879        assert_eq!(
18880            libsais16_bwt_aux(
18881                &text,
18882                &mut direct_bwt,
18883                &mut direct_work,
18884                0,
18885                Some(&mut direct_freq),
18886                4,
18887                &mut direct_aux
18888            ),
18889            libsais16_bwt_aux_omp(
18890                &text,
18891                &mut omp_bwt,
18892                &mut omp_work,
18893                0,
18894                Some(&mut omp_freq),
18895                4,
18896                &mut omp_aux,
18897                2
18898            )
18899        );
18900        assert_eq!(omp_bwt, direct_bwt);
18901        assert_eq!(omp_aux, direct_aux);
18902        assert_eq!(omp_freq, direct_freq);
18903    }
18904
18905    #[test]
18906    fn libsais16_unbwt_omp_frequency_wrappers_match_direct_calls() {
18907        let text = [2, 1, 3, 1, 2, 0];
18908        let mut freq = vec![0; ALPHABET_SIZE];
18909        let mut bwt = vec![0; text.len()];
18910        let mut work = vec![0; text.len()];
18911        let primary = libsais16_bwt(&text, &mut bwt, &mut work, 0, Some(&mut freq));
18912        assert!(primary >= 0);
18913
18914        let mut direct = vec![0; text.len()];
18915        let mut direct_work = vec![0; text.len() + 1];
18916        let mut omp = vec![0; text.len()];
18917        let mut omp_work = vec![0; text.len() + 1];
18918        assert_eq!(
18919            libsais16_unbwt(&bwt, &mut direct, &mut direct_work, Some(&freq), primary),
18920            libsais16_unbwt_omp(&bwt, &mut omp, &mut omp_work, Some(&freq), primary, 2)
18921        );
18922        assert_eq!(omp, direct);
18923        assert_eq!(omp, text);
18924
18925        let mut aux = vec![0; (text.len() - 1) / 4 + 1];
18926        assert_eq!(
18927            libsais16_bwt_aux(&text, &mut bwt, &mut work, 0, Some(&mut freq), 4, &mut aux),
18928            0
18929        );
18930        direct.fill(0);
18931        direct_work.fill(0);
18932        omp.fill(0);
18933        omp_work.fill(0);
18934        assert_eq!(
18935            libsais16_unbwt_aux(&bwt, &mut direct, &mut direct_work, Some(&freq), 4, &aux),
18936            libsais16_unbwt_aux_omp(&bwt, &mut omp, &mut omp_work, Some(&freq), 4, &aux, 2)
18937        );
18938        assert_eq!(omp, direct);
18939        assert_eq!(omp, text);
18940    }
18941
18942    #[test]
18943    fn libsais16_lcp_and_unbwt_omp_wrappers_match_direct_calls() {
18944        let text = [2, 1, 3, 1, 2, 0];
18945        let mut sa = vec![0; text.len()];
18946        assert_eq!(libsais16(&text, &mut sa, 0, None), 0);
18947
18948        let mut direct_plcp = vec![0; text.len()];
18949        let mut omp_plcp = vec![0; text.len()];
18950        assert_eq!(libsais16_plcp(&text, &sa, &mut direct_plcp), 0);
18951        assert_eq!(libsais16_plcp_omp(&text, &sa, &mut omp_plcp, 2), 0);
18952        assert_eq!(omp_plcp, direct_plcp);
18953        assert_eq!(libsais16_plcp_omp(&text, &sa, &mut omp_plcp, -1), -1);
18954
18955        let gsa_text = [2, 1, 0, 1, 2, 0];
18956        let mut gsa = vec![0; gsa_text.len()];
18957        assert_eq!(libsais16_gsa(&gsa_text, &mut gsa, 0, None), 0);
18958        let mut direct_gsa_plcp = vec![0; gsa_text.len()];
18959        let mut omp_gsa_plcp = vec![0; gsa_text.len()];
18960        assert_eq!(libsais16_plcp_gsa(&gsa_text, &gsa, &mut direct_gsa_plcp), 0);
18961        assert_eq!(
18962            libsais16_plcp_gsa_omp(&gsa_text, &gsa, &mut omp_gsa_plcp, 2),
18963            0
18964        );
18965        assert_eq!(omp_gsa_plcp, direct_gsa_plcp);
18966        assert_eq!(
18967            libsais16_plcp_gsa_omp(&gsa_text, &gsa, &mut omp_gsa_plcp, -1),
18968            -1
18969        );
18970
18971        let mut direct_lcp = vec![0; text.len()];
18972        let mut omp_lcp = vec![0; text.len()];
18973        assert_eq!(libsais16_lcp(&direct_plcp, &sa, &mut direct_lcp), 0);
18974        assert_eq!(libsais16_lcp_omp(&direct_plcp, &sa, &mut omp_lcp, 2), 0);
18975        assert_eq!(omp_lcp, direct_lcp);
18976        assert_eq!(libsais16_lcp_omp(&direct_plcp, &sa, &mut omp_lcp, -1), -1);
18977
18978        let mut bwt = vec![0; text.len()];
18979        let mut work = vec![0; text.len()];
18980        let primary = libsais16_bwt(&text, &mut bwt, &mut work, 0, None);
18981        let mut direct = vec![0; text.len()];
18982        let mut omp = vec![0; text.len()];
18983        let mut direct_work = vec![0; text.len()];
18984        let mut omp_work = vec![0; text.len()];
18985        assert_eq!(
18986            libsais16_unbwt(&bwt, &mut direct, &mut direct_work, None, primary),
18987            0
18988        );
18989        assert_eq!(
18990            libsais16_unbwt_omp(&bwt, &mut omp, &mut omp_work, None, primary, 2),
18991            0
18992        );
18993        assert_eq!(omp, direct);
18994        assert_eq!(
18995            libsais16_unbwt_omp(&bwt, &mut omp, &mut omp_work, None, primary, -1),
18996            -1
18997        );
18998    }
18999}