1use std::mem;
8
9pub type SaSint = i32;
10pub type SaUint = u32;
11
12pub const ALPHABET_SIZE: usize = 1usize << 16;
13const SAINT_MAX: SaSint = SaSint::MAX;
14const SAINT_MIN: SaSint = SaSint::MIN;
15const SAINT_BIT: u32 = 32;
16const SUFFIX_GROUP_BIT: u32 = SAINT_BIT - 1;
17const SUFFIX_GROUP_MARKER: SaSint = 1_i32 << (SUFFIX_GROUP_BIT - 1);
18const LIBSAIS_FLAGS_BWT: SaSint = 1;
19const LIBSAIS_FLAGS_GSA: SaSint = 2;
20const LIBSAIS_LOCAL_BUFFER_SIZE: usize = 2000;
21const UNBWT_FASTBITS: usize = 17;
22const PER_THREAD_CACHE_SIZE: usize = 2_097_184;
23
24#[repr(C)]
25#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
26struct ThreadCache {
27 symbol: SaSint,
28 index: SaSint,
29}
30
31#[derive(Clone, Debug, Default, PartialEq, Eq)]
32pub struct ThreadState {
33 position: SaSint,
34 m: SaSint,
35 last_lms_suffix: SaSint,
36 count: SaSint,
37 buckets: Vec<SaSint>,
38 cache: Vec<ThreadCache>,
39 cache_entries: usize,
40}
41
42#[derive(Clone, Debug, Default, PartialEq, Eq)]
43pub struct Context {
44 buckets: Vec<SaSint>,
45 thread_state: Option<Vec<ThreadState>>,
46 threads: SaSint,
47}
48
49#[derive(Clone, Debug, Default, PartialEq, Eq)]
50pub struct UnbwtContext {
51 bucket2: Vec<usize>,
52 fastbits: Vec<u16>,
53 buckets: Option<Vec<usize>>,
54 threads: SaSint,
55}
56
57pub fn create_ctx() -> Option<Context> {
63 create_ctx_main(1)
64}
65
66pub fn create_ctx_omp(threads: SaSint) -> Option<Context> {
74 if threads < 0 {
75 None
76 } else {
77 create_ctx_main(normalize_threads(threads))
78 }
79}
80
81pub fn free_ctx(_ctx: Context) {}
83
84pub fn unbwt_create_ctx() -> Option<UnbwtContext> {
90 unbwt_create_ctx_main(1)
91}
92
93pub fn unbwt_create_ctx_omp(threads: SaSint) -> Option<UnbwtContext> {
101 if threads < 0 {
102 None
103 } else {
104 unbwt_create_ctx_main(normalize_threads(threads))
105 }
106}
107
108pub fn unbwt_free_ctx(_ctx: UnbwtContext) {}
110
111fn normalize_threads(threads: SaSint) -> SaSint {
112 if threads > 0 {
113 threads
114 } else {
115 1
116 }
117}
118
119fn align_up(value: usize, alignment: usize) -> usize {
120 (value + (alignment - 1)) & !(alignment - 1)
121}
122
123fn alloc_thread_state(threads: SaSint) -> Option<Vec<ThreadState>> {
124 let threads = usize::try_from(threads).ok()?;
125 let mut thread_state = Vec::with_capacity(threads);
126 for _ in 0..threads {
127 thread_state.push(ThreadState {
128 position: 0,
129 m: 0,
130 last_lms_suffix: 0,
131 count: 0,
132 buckets: vec![0; 4 * ALPHABET_SIZE],
133 cache: vec![ThreadCache::default(); PER_THREAD_CACHE_SIZE],
134 cache_entries: PER_THREAD_CACHE_SIZE,
135 });
136 }
137 Some(thread_state)
138}
139
140fn create_ctx_main(threads: SaSint) -> Option<Context> {
141 let buckets = vec![0; 8 * ALPHABET_SIZE];
142 let thread_state = if threads > 1 {
143 Some(alloc_thread_state(threads)?)
144 } else {
145 None
146 };
147
148 Some(Context {
149 buckets,
150 thread_state,
151 threads,
152 })
153}
154
155fn unbwt_create_ctx_main(threads: SaSint) -> Option<UnbwtContext> {
156 let bucket2 = vec![0; ALPHABET_SIZE];
157 let fastbits = vec![0; 1 + (1 << UNBWT_FASTBITS)];
158 let buckets = if threads > 1 {
159 Some(vec![0; usize::try_from(threads).ok()? * ALPHABET_SIZE])
160 } else {
161 None
162 };
163
164 Some(UnbwtContext {
165 bucket2,
166 fastbits,
167 buckets,
168 threads,
169 })
170}
171
172fn fill_freq(t: &[u16], freq: Option<&mut [SaSint]>) {
173 if let Some(freq) = freq {
174 freq[..ALPHABET_SIZE].fill(0);
175 for &symbol in t {
176 freq[symbol as usize] += 1;
177 }
178 }
179}
180
181#[allow(dead_code)]
182fn buckets_index4(c: usize, s: usize) -> usize {
183 (c << 2) + s
184}
185
186#[allow(dead_code)]
187fn buckets_index2(c: usize, s: usize) -> usize {
188 (c << 1) + s
189}
190
191#[allow(dead_code)]
192fn place_cached_suffixes(
193 sa: &mut [SaSint],
194 cache: &[ThreadCache],
195 block_start: SaSint,
196 block_size: SaSint,
197) {
198 let start = usize::try_from(block_start).expect("block_start must be non-negative");
199 let len = usize::try_from(block_size).expect("block_size must be non-negative");
200 let entries = if cache.len() >= start + len {
201 &cache[start..start + len]
202 } else {
203 &cache[..len]
204 };
205
206 for entry in entries {
207 sa[entry.symbol as usize] = entry.index;
208 }
209}
210
211#[allow(dead_code)]
212fn compact_and_place_cached_suffixes(
213 sa: &mut [SaSint],
214 cache: &mut [ThreadCache],
215 block_start: SaSint,
216 block_size: SaSint,
217) {
218 let start = usize::try_from(block_start).expect("block_start must be non-negative");
219 let len = usize::try_from(block_size).expect("block_size must be non-negative");
220 let read_start = if cache.len() >= start + len { start } else { 0 };
221 let read_end = read_start + len;
222
223 let mut write = read_start;
224 for read in read_start..read_end {
225 let entry = cache[read];
226 if entry.symbol >= 0 {
227 cache[write] = entry;
228 write += 1;
229 }
230 }
231 place_cached_suffixes(sa, cache, block_start, (write - read_start) as SaSint);
232}
233
234#[allow(dead_code)]
235fn count_negative_marked_suffixes(
236 sa: &[SaSint],
237 block_start: SaSint,
238 block_size: SaSint,
239) -> SaSint {
240 let start = block_start as usize;
241 let end = start + block_size as usize;
242 sa[start..end].iter().filter(|&&value| value < 0).count() as SaSint
243}
244
245#[allow(dead_code)]
246fn count_zero_marked_suffixes(sa: &[SaSint], block_start: SaSint, block_size: SaSint) -> SaSint {
247 let start = block_start as usize;
248 let end = start + block_size as usize;
249 sa[start..end].iter().filter(|&&value| value == 0).count() as SaSint
250}
251
252#[allow(dead_code)]
253fn accumulate_counts_s32_n(
254 buckets: &mut [SaSint],
255 bucket00: usize,
256 bucket_size: usize,
257 bucket_stride: usize,
258 num_buckets: usize,
259) {
260 for s in 0..bucket_size {
261 let mut sum = buckets[bucket00 + s];
262 for bucket in 1..num_buckets {
263 sum += buckets[bucket00 - bucket * bucket_stride + s];
264 }
265 buckets[bucket00 + s] = sum;
266 }
267}
268
269#[allow(dead_code)]
270fn accumulate_counts_s32_2(
271 buckets: &mut [SaSint],
272 bucket00: usize,
273 bucket_size: usize,
274 bucket_stride: usize,
275) {
276 accumulate_counts_s32_n(buckets, bucket00, bucket_size, bucket_stride, 2);
277}
278
279#[allow(dead_code)]
280fn accumulate_counts_s32_3(
281 buckets: &mut [SaSint],
282 bucket00: usize,
283 bucket_size: usize,
284 bucket_stride: usize,
285) {
286 accumulate_counts_s32_n(buckets, bucket00, bucket_size, bucket_stride, 3);
287}
288
289#[allow(dead_code)]
290fn accumulate_counts_s32_4(
291 buckets: &mut [SaSint],
292 bucket00: usize,
293 bucket_size: usize,
294 bucket_stride: usize,
295) {
296 accumulate_counts_s32_n(buckets, bucket00, bucket_size, bucket_stride, 4);
297}
298
299#[allow(dead_code)]
300fn accumulate_counts_s32_5(
301 buckets: &mut [SaSint],
302 bucket00: usize,
303 bucket_size: usize,
304 bucket_stride: usize,
305) {
306 accumulate_counts_s32_n(buckets, bucket00, bucket_size, bucket_stride, 5);
307}
308
309#[allow(dead_code)]
310fn accumulate_counts_s32_6(
311 buckets: &mut [SaSint],
312 bucket00: usize,
313 bucket_size: usize,
314 bucket_stride: usize,
315) {
316 accumulate_counts_s32_n(buckets, bucket00, bucket_size, bucket_stride, 6);
317}
318
319#[allow(dead_code)]
320fn accumulate_counts_s32_7(
321 buckets: &mut [SaSint],
322 bucket00: usize,
323 bucket_size: usize,
324 bucket_stride: usize,
325) {
326 accumulate_counts_s32_n(buckets, bucket00, bucket_size, bucket_stride, 7);
327}
328
329#[allow(dead_code)]
330fn accumulate_counts_s32_8(
331 buckets: &mut [SaSint],
332 bucket00: usize,
333 bucket_size: usize,
334 bucket_stride: usize,
335) {
336 accumulate_counts_s32_n(buckets, bucket00, bucket_size, bucket_stride, 8);
337}
338
339#[allow(dead_code)]
340fn accumulate_counts_s32_9(
341 buckets: &mut [SaSint],
342 bucket00: usize,
343 bucket_size: usize,
344 bucket_stride: usize,
345) {
346 accumulate_counts_s32_n(buckets, bucket00, bucket_size, bucket_stride, 9);
347}
348
349#[allow(dead_code)]
350fn accumulate_counts_s32(
351 buckets: &mut [SaSint],
352 bucket00: usize,
353 bucket_size: usize,
354 bucket_stride: usize,
355 mut num_buckets: usize,
356) {
357 while num_buckets >= 9 {
358 accumulate_counts_s32_9(
359 buckets,
360 bucket00 - (num_buckets - 9) * bucket_stride,
361 bucket_size,
362 bucket_stride,
363 );
364 num_buckets -= 8;
365 }
366
367 match num_buckets {
368 2 => accumulate_counts_s32_2(buckets, bucket00, bucket_size, bucket_stride),
369 3 => accumulate_counts_s32_3(buckets, bucket00, bucket_size, bucket_stride),
370 4 => accumulate_counts_s32_4(buckets, bucket00, bucket_size, bucket_stride),
371 5 => accumulate_counts_s32_5(buckets, bucket00, bucket_size, bucket_stride),
372 6 => accumulate_counts_s32_6(buckets, bucket00, bucket_size, bucket_stride),
373 7 => accumulate_counts_s32_7(buckets, bucket00, bucket_size, bucket_stride),
374 8 => accumulate_counts_s32_8(buckets, bucket00, bucket_size, bucket_stride),
375 _ => {}
376 }
377}
378
379#[allow(dead_code)]
380fn flip_suffix_markers_omp(sa: &mut [SaSint], l: SaSint, threads: SaSint) {
381 let len = usize::try_from(l).expect("l must be non-negative");
382 let omp_num_threads = if threads > 1 && l >= 65_536 {
383 usize::try_from(threads).expect("threads must be non-negative")
384 } else {
385 1
386 };
387 let omp_block_stride = (len / omp_num_threads) & !15usize;
388 for omp_thread_num in 0..omp_num_threads {
389 let omp_block_start = omp_thread_num * omp_block_stride;
390 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
391 omp_block_stride
392 } else {
393 len - omp_block_start
394 };
395 for value in &mut sa[omp_block_start..omp_block_start + omp_block_size] {
396 *value ^= SAINT_MIN;
397 }
398 }
399}
400
401#[allow(dead_code)]
402fn gather_lms_suffixes_32s(t: &[SaSint], sa: &mut [SaSint], n: SaSint) -> SaSint {
403 let mut i = n - 2;
404 let mut m = n - 1;
405 let mut f0 = 1usize;
406 let mut f1: usize;
407 let mut c0 = t[(n - 1) as usize] as isize;
408 let mut c1: isize;
409
410 while i >= 3 {
411 c1 = t[i as usize] as isize;
412 f1 = usize::from(c1 > c0 - f0 as isize);
413 sa[m as usize] = i + 1;
414 m -= (f1 & !f0) as SaSint;
415
416 c0 = t[(i - 1) as usize] as isize;
417 f0 = usize::from(c0 > c1 - f1 as isize);
418 sa[m as usize] = i;
419 m -= (f0 & !f1) as SaSint;
420
421 c1 = t[(i - 2) as usize] as isize;
422 f1 = usize::from(c1 > c0 - f0 as isize);
423 sa[m as usize] = i - 1;
424 m -= (f1 & !f0) as SaSint;
425
426 c0 = t[(i - 3) as usize] as isize;
427 f0 = usize::from(c0 > c1 - f1 as isize);
428 sa[m as usize] = i - 2;
429 m -= (f0 & !f1) as SaSint;
430
431 i -= 4;
432 }
433
434 while i >= 0 {
435 c1 = c0;
436 c0 = t[i as usize] as isize;
437 f1 = f0;
438 f0 = usize::from(c0 > c1 - f1 as isize);
439 sa[m as usize] = i + 1;
440 m -= (f0 & !f1) as SaSint;
441 i -= 1;
442 }
443
444 n - 1 - m
445}
446
447#[allow(dead_code)]
448fn gather_compacted_lms_suffixes_32s(t: &[SaSint], sa: &mut [SaSint], n: SaSint) -> SaSint {
449 let mut i = n - 2;
450 let mut m = n - 1;
451 let mut f0 = 1usize;
452 let mut f1: usize;
453 let mut c0 = t[(n - 1) as usize] as isize;
454 let mut c1: isize;
455
456 while i >= 3 {
457 c1 = t[i as usize] as isize;
458 f1 = usize::from(c1 > c0 - f0 as isize);
459 sa[m as usize] = i + 1;
460 m -= (f1 & !f0 & usize::from(c0 >= 0)) as SaSint;
461
462 c0 = t[(i - 1) as usize] as isize;
463 f0 = usize::from(c0 > c1 - f1 as isize);
464 sa[m as usize] = i;
465 m -= (f0 & !f1 & usize::from(c1 >= 0)) as SaSint;
466
467 c1 = t[(i - 2) as usize] as isize;
468 f1 = usize::from(c1 > c0 - f0 as isize);
469 sa[m as usize] = i - 1;
470 m -= (f1 & !f0 & usize::from(c0 >= 0)) as SaSint;
471
472 c0 = t[(i - 3) as usize] as isize;
473 f0 = usize::from(c0 > c1 - f1 as isize);
474 sa[m as usize] = i - 2;
475 m -= (f0 & !f1 & usize::from(c1 >= 0)) as SaSint;
476
477 i -= 4;
478 }
479
480 while i >= 0 {
481 c1 = c0;
482 c0 = t[i as usize] as isize;
483 f1 = f0;
484 f0 = usize::from(c0 > c1 - f1 as isize);
485 sa[m as usize] = i + 1;
486 m -= (f0 & !f1 & usize::from(c1 >= 0)) as SaSint;
487 i -= 1;
488 }
489
490 n - 1 - m
491}
492
493#[allow(dead_code)]
494fn count_lms_suffixes_32s_4k(t: &[SaSint], n: SaSint, k: SaSint, buckets: &mut [SaSint]) {
495 buckets[..4 * k as usize].fill(0);
496 let mut i = n - 2;
497 let mut f0 = 1usize;
498 let mut f1: usize;
499 let mut c0 = t[(n - 1) as usize] as isize;
500 let mut c1: isize;
501
502 while i >= 3 {
503 c1 = t[i as usize] as isize;
504 f1 = usize::from(c1 > c0 - f0 as isize);
505 buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
506
507 c0 = t[(i - 1) as usize] as isize;
508 f0 = usize::from(c0 > c1 - f1 as isize);
509 buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
510
511 c1 = t[(i - 2) as usize] as isize;
512 f1 = usize::from(c1 > c0 - f0 as isize);
513 buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
514
515 c0 = t[(i - 3) as usize] as isize;
516 f0 = usize::from(c0 > c1 - f1 as isize);
517 buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
518
519 i -= 4;
520 }
521
522 while i >= 0 {
523 c1 = c0;
524 c0 = t[i as usize] as isize;
525 f1 = f0;
526 f0 = usize::from(c0 > c1 - f1 as isize);
527 buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
528 i -= 1;
529 }
530
531 buckets[buckets_index4(c0 as usize, f0 + f0)] += 1;
532}
533
534#[allow(dead_code)]
535fn count_lms_suffixes_32s_2k(t: &[SaSint], n: SaSint, k: SaSint, buckets: &mut [SaSint]) {
536 buckets[..2 * k as usize].fill(0);
537 let mut i = n - 2;
538 let mut f0 = 1usize;
539 let mut f1: usize;
540 let mut c0 = t[(n - 1) as usize] as isize;
541 let mut c1: isize;
542
543 while i >= 3 {
544 c1 = t[i as usize] as isize;
545 f1 = usize::from(c1 > c0 - f0 as isize);
546 buckets[buckets_index2(c0 as usize, f1 & !f0)] += 1;
547
548 c0 = t[(i - 1) as usize] as isize;
549 f0 = usize::from(c0 > c1 - f1 as isize);
550 buckets[buckets_index2(c1 as usize, f0 & !f1)] += 1;
551
552 c1 = t[(i - 2) as usize] as isize;
553 f1 = usize::from(c1 > c0 - f0 as isize);
554 buckets[buckets_index2(c0 as usize, f1 & !f0)] += 1;
555
556 c0 = t[(i - 3) as usize] as isize;
557 f0 = usize::from(c0 > c1 - f1 as isize);
558 buckets[buckets_index2(c1 as usize, f0 & !f1)] += 1;
559
560 i -= 4;
561 }
562
563 while i >= 0 {
564 c1 = c0;
565 c0 = t[i as usize] as isize;
566 f1 = f0;
567 f0 = usize::from(c0 > c1 - f1 as isize);
568 buckets[buckets_index2(c1 as usize, f0 & !f1)] += 1;
569 i -= 1;
570 }
571
572 buckets[buckets_index2(c0 as usize, 0)] += 1;
573}
574
575#[allow(dead_code)]
576fn count_compacted_lms_suffixes_32s_2k(t: &[SaSint], n: SaSint, k: SaSint, buckets: &mut [SaSint]) {
577 buckets[..2 * k as usize].fill(0);
578 let mut i = n - 2;
579 let mut f0 = 1usize;
580 let mut f1: usize;
581 let mut c0 = t[(n - 1) as usize] as isize;
582 let mut c1: isize;
583
584 while i >= 3 {
585 c1 = t[i as usize] as isize;
586 f1 = usize::from(c1 > c0 - f0 as isize);
587 buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
588
589 c0 = t[(i - 1) as usize] as isize;
590 f0 = usize::from(c0 > c1 - f1 as isize);
591 buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
592
593 c1 = t[(i - 2) as usize] as isize;
594 f1 = usize::from(c1 > c0 - f0 as isize);
595 buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
596
597 c0 = t[(i - 3) as usize] as isize;
598 f0 = usize::from(c0 > c1 - f1 as isize);
599 buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
600
601 i -= 4;
602 }
603
604 while i >= 0 {
605 c1 = c0;
606 c0 = t[i as usize] as isize;
607 f1 = f0;
608 f0 = usize::from(c0 > c1 - f1 as isize);
609 buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
610 i -= 1;
611 }
612
613 buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, 0)] += 1;
614}
615
616#[allow(dead_code)]
617fn get_bucket_stride(free_space: SaSint, bucket_size: SaSint, num_buckets: SaSint) -> SaSint {
618 let bucket_size_1024 = (bucket_size + 1023) & !1023;
619 if free_space / (num_buckets - 1) >= bucket_size_1024 {
620 return bucket_size_1024;
621 }
622 let bucket_size_16 = (bucket_size + 15) & !15;
623 if free_space / (num_buckets - 1) >= bucket_size_16 {
624 return bucket_size_16;
625 }
626 bucket_size
627}
628
629#[allow(dead_code)]
630fn count_and_gather_lms_suffixes_32s_4k(
631 t: &[SaSint],
632 sa: &mut [SaSint],
633 n: SaSint,
634 k: SaSint,
635 buckets: &mut [SaSint],
636 omp_block_start: isize,
637 omp_block_size: isize,
638) -> SaSint {
639 buckets[..4 * k as usize].fill(0);
640 let mut m = omp_block_start + omp_block_size - 1;
641
642 if omp_block_size > 0 {
643 let mut j = m + 1;
644 let mut c0 = t[m as usize] as isize;
645 let mut c1 = -1isize;
646 while j < n as isize {
647 c1 = t[j as usize] as isize;
648 if c1 != c0 {
649 break;
650 }
651 j += 1;
652 }
653
654 let mut f0 = usize::from(c0 >= c1);
655 let mut f1: usize;
656 let mut i = m - 1;
657 j = omp_block_start + 64 + 3;
658 while i >= j {
659 c1 = t[i as usize] as isize;
660 f1 = usize::from(c1 > c0 - f0 as isize);
661 sa[m as usize] = (i + 1) as SaSint;
662 m -= (f1 & !f0) as isize;
663 buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
664
665 c0 = t[(i - 1) as usize] as isize;
666 f0 = usize::from(c0 > c1 - f1 as isize);
667 sa[m as usize] = i as SaSint;
668 m -= (f0 & !f1) as isize;
669 buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
670
671 c1 = t[(i - 2) as usize] as isize;
672 f1 = usize::from(c1 > c0 - f0 as isize);
673 sa[m as usize] = (i - 1) as SaSint;
674 m -= (f1 & !f0) as isize;
675 buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
676
677 c0 = t[(i - 3) as usize] as isize;
678 f0 = usize::from(c0 > c1 - f1 as isize);
679 sa[m as usize] = (i - 2) as SaSint;
680 m -= (f0 & !f1) as isize;
681 buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
682
683 i -= 4;
684 }
685
686 j -= 64 + 3;
687 while i >= j {
688 c1 = c0;
689 c0 = t[i as usize] as isize;
690 f1 = f0;
691 f0 = usize::from(c0 > c1 - f1 as isize);
692 sa[m as usize] = (i + 1) as SaSint;
693 m -= (f0 & !f1) as isize;
694 buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
695 i -= 1;
696 }
697
698 c1 = if i >= 0 { t[i as usize] as isize } else { -1 };
699 f1 = usize::from(c1 > c0 - f0 as isize);
700 sa[m as usize] = (i + 1) as SaSint;
701 m -= (f1 & !f0) as isize;
702 buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
703 }
704
705 (omp_block_start + omp_block_size - 1 - m) as SaSint
706}
707
708#[allow(dead_code)]
709fn count_and_gather_lms_suffixes_32s_2k(
710 t: &[SaSint],
711 sa: &mut [SaSint],
712 n: SaSint,
713 k: SaSint,
714 buckets: &mut [SaSint],
715 omp_block_start: isize,
716 omp_block_size: isize,
717) -> SaSint {
718 buckets[..2 * k as usize].fill(0);
719 let mut m = omp_block_start + omp_block_size - 1;
720
721 if omp_block_size > 0 {
722 let mut j = m + 1;
723 let mut c0 = t[m as usize] as isize;
724 let mut c1 = -1isize;
725 while j < n as isize {
726 c1 = t[j as usize] as isize;
727 if c1 != c0 {
728 break;
729 }
730 j += 1;
731 }
732
733 let mut f0 = usize::from(c0 >= c1);
734 let mut f1: usize;
735 let mut i = m - 1;
736 j = omp_block_start + 64 + 3;
737 while i >= j {
738 c1 = t[i as usize] as isize;
739 f1 = usize::from(c1 > c0 - f0 as isize);
740 sa[m as usize] = (i + 1) as SaSint;
741 m -= (f1 & !f0) as isize;
742 buckets[buckets_index2(c0 as usize, f1 & !f0)] += 1;
743
744 c0 = t[(i - 1) as usize] as isize;
745 f0 = usize::from(c0 > c1 - f1 as isize);
746 sa[m as usize] = i as SaSint;
747 m -= (f0 & !f1) as isize;
748 buckets[buckets_index2(c1 as usize, f0 & !f1)] += 1;
749
750 c1 = t[(i - 2) as usize] as isize;
751 f1 = usize::from(c1 > c0 - f0 as isize);
752 sa[m as usize] = (i - 1) as SaSint;
753 m -= (f1 & !f0) as isize;
754 buckets[buckets_index2(c0 as usize, f1 & !f0)] += 1;
755
756 c0 = t[(i - 3) as usize] as isize;
757 f0 = usize::from(c0 > c1 - f1 as isize);
758 sa[m as usize] = (i - 2) as SaSint;
759 m -= (f0 & !f1) as isize;
760 buckets[buckets_index2(c1 as usize, f0 & !f1)] += 1;
761
762 i -= 4;
763 }
764
765 j -= 64 + 3;
766 while i >= j {
767 c1 = c0;
768 c0 = t[i as usize] as isize;
769 f1 = f0;
770 f0 = usize::from(c0 > c1 - f1 as isize);
771 sa[m as usize] = (i + 1) as SaSint;
772 m -= (f0 & !f1) as isize;
773 buckets[buckets_index2(c1 as usize, f0 & !f1)] += 1;
774 i -= 1;
775 }
776
777 c1 = if i >= 0 { t[i as usize] as isize } else { -1 };
778 f1 = usize::from(c1 > c0 - f0 as isize);
779 sa[m as usize] = (i + 1) as SaSint;
780 m -= (f1 & !f0) as isize;
781 buckets[buckets_index2(c0 as usize, f1 & !f0)] += 1;
782 }
783
784 (omp_block_start + omp_block_size - 1 - m) as SaSint
785}
786
787#[allow(dead_code)]
788fn count_and_gather_compacted_lms_suffixes_32s_2k(
789 t: &[SaSint],
790 sa: &mut [SaSint],
791 n: SaSint,
792 k: SaSint,
793 buckets: &mut [SaSint],
794 omp_block_start: isize,
795 omp_block_size: isize,
796) -> SaSint {
797 buckets[..2 * k as usize].fill(0);
798 let mut m = omp_block_start + omp_block_size - 1;
799
800 if omp_block_size > 0 {
801 let mut j = m + 1;
802 let mut c0 = t[m as usize] as isize;
803 let mut c1 = -1isize;
804 while j < n as isize {
805 c1 = t[j as usize] as isize;
806 if c1 != c0 {
807 break;
808 }
809 j += 1;
810 }
811
812 let mut f0 = usize::from(c0 >= c1);
813 let mut f1: usize;
814 let mut i = m - 1;
815 j = omp_block_start + 64 + 3;
816 while i >= j {
817 c1 = t[i as usize] as isize;
818 f1 = usize::from(c1 > c0 - f0 as isize);
819 sa[m as usize] = (i + 1) as SaSint;
820 m -= (f1 & !f0 & usize::from(c0 >= 0)) as isize;
821 buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
822
823 c0 = t[(i - 1) as usize] as isize;
824 f0 = usize::from(c0 > c1 - f1 as isize);
825 sa[m as usize] = i as SaSint;
826 m -= (f0 & !f1 & usize::from(c1 >= 0)) as isize;
827 buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
828
829 c1 = t[(i - 2) as usize] as isize;
830 f1 = usize::from(c1 > c0 - f0 as isize);
831 sa[m as usize] = (i - 1) as SaSint;
832 m -= (f1 & !f0 & usize::from(c0 >= 0)) as isize;
833 buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
834
835 c0 = t[(i - 3) as usize] as isize;
836 f0 = usize::from(c0 > c1 - f1 as isize);
837 sa[m as usize] = (i - 2) as SaSint;
838 m -= (f0 & !f1 & usize::from(c1 >= 0)) as isize;
839 buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
840
841 i -= 4;
842 }
843
844 j -= 64 + 3;
845 while i >= j {
846 c1 = c0;
847 c0 = t[i as usize] as isize;
848 f1 = f0;
849 f0 = usize::from(c0 > c1 - f1 as isize);
850 sa[m as usize] = (i + 1) as SaSint;
851 m -= (f0 & !f1 & usize::from(c1 >= 0)) as isize;
852 buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
853 i -= 1;
854 }
855
856 c1 = if i >= 0 { t[i as usize] as isize } else { -1 };
857 f1 = usize::from(c1 > c0 - f0 as isize);
858 sa[m as usize] = (i + 1) as SaSint;
859 m -= (f1 & !f0 & usize::from(c0 >= 0)) as isize;
860 buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
861 }
862
863 (omp_block_start + omp_block_size - 1 - m) as SaSint
864}
865
866#[allow(dead_code)]
867fn count_and_gather_lms_suffixes_32s_4k_fs_omp(
868 t: &[SaSint],
869 sa: &mut [SaSint],
870 n: SaSint,
871 k: SaSint,
872 buckets: &mut [SaSint],
873 local_buckets: SaSint,
874 threads: SaSint,
875 thread_state: &mut [ThreadState],
876) -> SaSint {
877 if threads == 1 || n < 65_536 {
878 return count_and_gather_lms_suffixes_32s_4k(t, sa, n, k, buckets, 0, n as isize);
879 }
880
881 let thread_count = threads as usize;
882 let n_usize = n as usize;
883 let bucket_size = 4 * k as usize;
884 let block_stride = (n / threads) & !15;
885 let free_space = if local_buckets != 0 {
886 LIBSAIS_LOCAL_BUFFER_SIZE as SaSint
887 } else {
888 buckets.len() as SaSint
889 };
890 let bucket_stride = get_bucket_stride(free_space, 4 * k, threads) as usize;
891 let workspace_len = bucket_size + bucket_stride * thread_count.saturating_sub(1);
892 let mut workspace = vec![0; workspace_len];
893
894 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
895 let block_start = thread as SaSint * block_stride;
896 let block_size = if thread + 1 < thread_count {
897 block_stride
898 } else {
899 n - block_start
900 };
901 let workspace_end = workspace_len - thread * bucket_stride;
902 let workspace_start = workspace_end - bucket_size;
903 state.count = count_and_gather_lms_suffixes_32s_4k(
904 t,
905 sa,
906 n,
907 k,
908 &mut workspace[workspace_start..workspace_end],
909 block_start as isize,
910 block_size as isize,
911 );
912 state.position = block_start + block_size;
913 }
914
915 let mut m = 0usize;
916 for thread in (0..thread_count).rev() {
917 let count =
918 usize::try_from(thread_state[thread].count).expect("count must be non-negative");
919 m += count;
920 if thread + 1 != thread_count && count > 0 {
921 let src_end = usize::try_from(thread_state[thread].position)
922 .expect("position must be non-negative");
923 let src_start = src_end - count;
924 let dst_start = n_usize - m;
925 sa.copy_within(src_start..src_end, dst_start);
926 }
927 }
928
929 let accumulation_threads = thread_count - 1;
930 let block_stride = (bucket_size / accumulation_threads) & !15usize;
931 for thread in 0..accumulation_threads {
932 let block_start = thread * block_stride;
933 let block_size = if thread + 1 < accumulation_threads {
934 block_stride
935 } else {
936 bucket_size - block_start
937 };
938 accumulate_counts_s32(
939 &mut workspace,
940 block_start,
941 block_size,
942 bucket_stride,
943 accumulation_threads + 1,
944 );
945 }
946
947 buckets[..bucket_size].copy_from_slice(&workspace[..bucket_size]);
948 m as SaSint
949}
950
951#[allow(dead_code)]
952fn count_and_gather_lms_suffixes_32s_2k_fs_omp(
953 t: &[SaSint],
954 sa: &mut [SaSint],
955 n: SaSint,
956 k: SaSint,
957 buckets: &mut [SaSint],
958 local_buckets: SaSint,
959 threads: SaSint,
960 thread_state: &mut [ThreadState],
961) -> SaSint {
962 if threads == 1 || n < 65_536 {
963 return count_and_gather_lms_suffixes_32s_2k(t, sa, n, k, buckets, 0, n as isize);
964 }
965
966 let thread_count = threads as usize;
967 let n_usize = n as usize;
968 let bucket_size = 2 * k as usize;
969 let block_stride = (n / threads) & !15;
970 let free_space = if local_buckets != 0 {
971 LIBSAIS_LOCAL_BUFFER_SIZE as SaSint
972 } else {
973 buckets.len() as SaSint
974 };
975 let bucket_stride = get_bucket_stride(free_space, 2 * k, threads) as usize;
976 let workspace_len = bucket_size + bucket_stride * thread_count.saturating_sub(1);
977 let mut workspace = vec![0; workspace_len];
978
979 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
980 let block_start = thread as SaSint * block_stride;
981 let block_size = if thread + 1 < thread_count {
982 block_stride
983 } else {
984 n - block_start
985 };
986 let workspace_end = workspace_len - thread * bucket_stride;
987 let workspace_start = workspace_end - bucket_size;
988 state.count = count_and_gather_lms_suffixes_32s_2k(
989 t,
990 sa,
991 n,
992 k,
993 &mut workspace[workspace_start..workspace_end],
994 block_start as isize,
995 block_size as isize,
996 );
997 state.position = block_start + block_size;
998 }
999
1000 let mut m = 0usize;
1001 for thread in (0..thread_count).rev() {
1002 let count =
1003 usize::try_from(thread_state[thread].count).expect("count must be non-negative");
1004 m += count;
1005 if thread + 1 != thread_count && count > 0 {
1006 let src_end = usize::try_from(thread_state[thread].position)
1007 .expect("position must be non-negative");
1008 let src_start = src_end - count;
1009 let dst_start = n_usize - m;
1010 sa.copy_within(src_start..src_end, dst_start);
1011 }
1012 }
1013
1014 let accumulation_threads = thread_count - 1;
1015 let block_stride = (bucket_size / accumulation_threads) & !15usize;
1016 for thread in 0..accumulation_threads {
1017 let block_start = thread * block_stride;
1018 let block_size = if thread + 1 < accumulation_threads {
1019 block_stride
1020 } else {
1021 bucket_size - block_start
1022 };
1023 accumulate_counts_s32(
1024 &mut workspace,
1025 block_start,
1026 block_size,
1027 bucket_stride,
1028 accumulation_threads + 1,
1029 );
1030 }
1031
1032 buckets[..bucket_size].copy_from_slice(&workspace[..bucket_size]);
1033 m as SaSint
1034}
1035
1036#[allow(dead_code)]
1037fn count_and_gather_compacted_lms_suffixes_32s_2k_fs_omp(
1038 t: &[SaSint],
1039 sa: &mut [SaSint],
1040 n: SaSint,
1041 k: SaSint,
1042 buckets: &mut [SaSint],
1043 _local_buckets: SaSint,
1044 threads: SaSint,
1045 thread_state: &mut [ThreadState],
1046) {
1047 if threads == 1 || n < 65_536 {
1048 count_and_gather_compacted_lms_suffixes_32s_2k(t, sa, n, k, buckets, 0, n as isize);
1049 return;
1050 }
1051
1052 let thread_count = threads as usize;
1053 let n_usize = n as usize;
1054 let bucket_size = 2 * k as usize;
1055 let block_stride = (n / threads) & !15;
1056 let mut workspaces = vec![vec![0; bucket_size]; thread_count];
1057 let mut gathered_runs = vec![Vec::<SaSint>::new(); thread_count];
1058
1059 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
1060 let block_start = thread as SaSint * block_stride;
1061 let block_size = if thread + 1 < thread_count {
1062 block_stride
1063 } else {
1064 n - block_start
1065 };
1066 let mut temp_sa = vec![0; n_usize + block_size as usize];
1067 state.count = count_and_gather_compacted_lms_suffixes_32s_2k(
1068 t,
1069 &mut temp_sa,
1070 n,
1071 k,
1072 &mut workspaces[thread],
1073 block_start as isize,
1074 block_size as isize,
1075 );
1076 state.position = block_start + block_size;
1077 let count = usize::try_from(state.count).expect("count must be non-negative");
1078 let src_end =
1079 n_usize + usize::try_from(state.position).expect("position must be non-negative");
1080 let src_start = src_end - count;
1081 gathered_runs[thread].extend_from_slice(&temp_sa[src_start..src_end]);
1082 }
1083
1084 let mut suffixes_before = 0usize;
1085 for thread in (0..thread_count).rev() {
1086 let count =
1087 usize::try_from(thread_state[thread].count).expect("count must be non-negative");
1088 suffixes_before += count;
1089 if count > 0 {
1090 let dst_start = n_usize - suffixes_before;
1091 let dst_end = dst_start + count;
1092 sa[dst_start..dst_end].copy_from_slice(&gathered_runs[thread]);
1093 }
1094 }
1095
1096 buckets.fill(0);
1097 for workspace in &workspaces {
1098 for (dst, src) in buckets.iter_mut().zip(workspace.iter()) {
1099 *dst += *src;
1100 }
1101 }
1102}
1103
1104#[allow(dead_code)]
1105fn count_and_gather_lms_suffixes_32s_4k_nofs_omp(
1106 t: &[SaSint],
1107 sa: &mut [SaSint],
1108 n: SaSint,
1109 k: SaSint,
1110 buckets: &mut [SaSint],
1111 threads: SaSint,
1112) -> SaSint {
1113 if threads > 1 && n >= 65_536 {
1114 count_lms_suffixes_32s_4k(t, n, k, buckets);
1115 gather_lms_suffixes_32s(t, sa, n)
1116 } else {
1117 count_and_gather_lms_suffixes_32s_4k(t, sa, n, k, buckets, 0, n as isize)
1118 }
1119}
1120
1121#[allow(dead_code)]
1122fn count_and_gather_lms_suffixes_32s_2k_nofs_omp(
1123 t: &[SaSint],
1124 sa: &mut [SaSint],
1125 n: SaSint,
1126 k: SaSint,
1127 buckets: &mut [SaSint],
1128 threads: SaSint,
1129) -> SaSint {
1130 if threads > 1 && n >= 65_536 {
1131 count_lms_suffixes_32s_2k(t, n, k, buckets);
1132 gather_lms_suffixes_32s(t, sa, n)
1133 } else {
1134 count_and_gather_lms_suffixes_32s_2k(t, sa, n, k, buckets, 0, n as isize)
1135 }
1136}
1137
1138#[allow(dead_code)]
1139fn count_and_gather_compacted_lms_suffixes_32s_2k_nofs_omp(
1140 t: &[SaSint],
1141 sa: &mut [SaSint],
1142 n: SaSint,
1143 k: SaSint,
1144 buckets: &mut [SaSint],
1145 threads: SaSint,
1146) -> SaSint {
1147 if threads > 1 && n >= 65_536 {
1148 count_compacted_lms_suffixes_32s_2k(t, n, k, buckets);
1149 gather_compacted_lms_suffixes_32s(t, sa, n)
1150 } else {
1151 count_and_gather_compacted_lms_suffixes_32s_2k(t, sa, n, k, buckets, 0, n as isize)
1152 }
1153}
1154
1155#[allow(dead_code)]
1156fn count_and_gather_lms_suffixes_32s_4k_omp(
1157 t: &[SaSint],
1158 sa: &mut [SaSint],
1159 n: SaSint,
1160 k: SaSint,
1161 buckets: &mut [SaSint],
1162 local_buckets: SaSint,
1163 threads: SaSint,
1164 thread_state: &mut [ThreadState],
1165) -> SaSint {
1166 let free_space = if local_buckets != 0 {
1167 LIBSAIS_LOCAL_BUFFER_SIZE as SaSint
1168 } else {
1169 buckets.len() as SaSint
1170 };
1171 let mut max_threads = (free_space / (((4 * k) + 15) & !15)).min(threads);
1172
1173 if max_threads > 1 && n >= 65_536 && n / k >= 2 {
1174 let thread_cap = n / (16 * k);
1175 if max_threads > thread_cap {
1176 max_threads = thread_cap;
1177 }
1178 count_and_gather_lms_suffixes_32s_4k_fs_omp(
1179 t,
1180 sa,
1181 n,
1182 k,
1183 buckets,
1184 local_buckets,
1185 max_threads.max(2),
1186 thread_state,
1187 )
1188 } else if threads > 1 && n >= 65_536 {
1189 count_lms_suffixes_32s_4k(t, n, k, buckets);
1190 gather_lms_suffixes_32s(t, sa, n)
1191 } else {
1192 count_and_gather_lms_suffixes_32s_4k_nofs_omp(t, sa, n, k, buckets, threads)
1193 }
1194}
1195
1196#[allow(dead_code)]
1197fn count_and_gather_lms_suffixes_32s_2k_omp(
1198 t: &[SaSint],
1199 sa: &mut [SaSint],
1200 n: SaSint,
1201 k: SaSint,
1202 buckets: &mut [SaSint],
1203 local_buckets: SaSint,
1204 threads: SaSint,
1205 thread_state: &mut [ThreadState],
1206) -> SaSint {
1207 let free_space = if local_buckets != 0 {
1208 LIBSAIS_LOCAL_BUFFER_SIZE as SaSint
1209 } else {
1210 buckets.len() as SaSint
1211 };
1212 let mut max_threads = (free_space / (((2 * k) + 15) & !15)).min(threads);
1213
1214 if max_threads > 1 && n >= 65_536 && n / k >= 2 {
1215 let thread_cap = n / (8 * k);
1216 if max_threads > thread_cap {
1217 max_threads = thread_cap;
1218 }
1219 count_and_gather_lms_suffixes_32s_2k_fs_omp(
1220 t,
1221 sa,
1222 n,
1223 k,
1224 buckets,
1225 local_buckets,
1226 max_threads.max(2),
1227 thread_state,
1228 )
1229 } else if threads > 1 && n >= 65_536 {
1230 count_lms_suffixes_32s_2k(t, n, k, buckets);
1231 gather_lms_suffixes_32s(t, sa, n)
1232 } else {
1233 count_and_gather_lms_suffixes_32s_2k_nofs_omp(t, sa, n, k, buckets, threads)
1234 }
1235}
1236
1237#[allow(dead_code)]
1238fn count_suffixes_32s(t: &[SaSint], n: SaSint, k: SaSint, buckets: &mut [SaSint]) {
1239 buckets[..k as usize].fill(0);
1240
1241 let mut i = 0usize;
1242 let mut j = (n as usize).saturating_sub(7);
1243 while i < j {
1244 buckets[t[i] as usize] += 1;
1245 buckets[t[i + 1] as usize] += 1;
1246 buckets[t[i + 2] as usize] += 1;
1247 buckets[t[i + 3] as usize] += 1;
1248 buckets[t[i + 4] as usize] += 1;
1249 buckets[t[i + 5] as usize] += 1;
1250 buckets[t[i + 6] as usize] += 1;
1251 buckets[t[i + 7] as usize] += 1;
1252 i += 8;
1253 }
1254
1255 j += 7;
1256 while i < j {
1257 buckets[t[i] as usize] += 1;
1258 i += 1;
1259 }
1260}
1261
1262#[allow(dead_code)]
1263fn initialize_buckets_start_and_end_32s_6k(k: SaSint, buckets: &mut [SaSint]) {
1264 let k = k as usize;
1265 let mut sum = 0;
1266 for j in 0..k {
1267 let i = buckets_index4(j, 0);
1268 buckets[4 * k + j] = sum;
1269 sum += buckets[i] + buckets[i + 1] + buckets[i + 2] + buckets[i + 3];
1270 buckets[5 * k + j] = sum;
1271 }
1272}
1273
1274#[allow(dead_code)]
1275fn initialize_buckets_start_and_end_32s_4k(k: SaSint, buckets: &mut [SaSint]) {
1276 let k = k as usize;
1277 let mut sum = 0;
1278 for j in 0..k {
1279 let i = buckets_index2(j, 0);
1280 buckets[2 * k + j] = sum;
1281 sum += buckets[i] + buckets[i + 1];
1282 buckets[3 * k + j] = sum;
1283 }
1284}
1285
1286#[allow(dead_code)]
1287fn initialize_buckets_end_32s_2k(k: SaSint, buckets: &mut [SaSint]) {
1288 let mut sum0 = 0;
1289 for j in 0..k as usize {
1290 let i = buckets_index2(j, 0);
1291 sum0 += buckets[i] + buckets[i + 1];
1292 buckets[i] = sum0;
1293 }
1294}
1295
1296#[allow(dead_code)]
1297fn initialize_buckets_start_and_end_32s_2k(k: SaSint, buckets: &mut [SaSint]) {
1298 let k = k as usize;
1299 for j in 0..k {
1300 let i = buckets_index2(j, 0);
1301 buckets[j] = buckets[i];
1302 }
1303 buckets[k] = 0;
1304 buckets.copy_within(0..k - 1, k + 1);
1305}
1306
1307#[allow(dead_code)]
1308fn initialize_buckets_start_32s_1k(k: SaSint, buckets: &mut [SaSint]) {
1309 let mut sum = 0;
1310 for bucket in buckets.iter_mut().take(k as usize) {
1311 let tmp = *bucket;
1312 *bucket = sum;
1313 sum += tmp;
1314 }
1315}
1316
1317#[allow(dead_code)]
1318fn initialize_buckets_end_32s_1k(k: SaSint, buckets: &mut [SaSint]) {
1319 let mut sum = 0;
1320 for bucket in buckets.iter_mut().take(k as usize) {
1321 sum += *bucket;
1322 *bucket = sum;
1323 }
1324}
1325
1326#[allow(dead_code)]
1327fn initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(
1328 t: &[SaSint],
1329 k: SaSint,
1330 buckets: &mut [SaSint],
1331 first_lms_suffix: SaSint,
1332) {
1333 buckets[buckets_index2(t[first_lms_suffix as usize] as usize, 0)] += 1;
1334 buckets[buckets_index2(t[first_lms_suffix as usize] as usize, 1)] -= 1;
1335
1336 let mut sum0 = 0;
1337 let mut sum1 = 0;
1338 for j in 0..k as usize {
1339 let i = buckets_index2(j, 0);
1340 sum0 += buckets[i] + buckets[i + 1];
1341 sum1 += buckets[i + 1];
1342 buckets[i] = sum0;
1343 buckets[i + 1] = sum1;
1344 }
1345}
1346
1347#[allow(dead_code)]
1348fn initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(
1349 t: &[SaSint],
1350 k: SaSint,
1351 buckets: &mut [SaSint],
1352 mut first_lms_suffix: SaSint,
1353) -> SaSint {
1354 let mut f0 = 0usize;
1355 let mut c0 = t[first_lms_suffix as usize] as isize;
1356
1357 loop {
1358 first_lms_suffix -= 1;
1359 if first_lms_suffix < 0 {
1360 break;
1361 }
1362 let c1 = c0;
1363 c0 = t[first_lms_suffix as usize] as isize;
1364 let f1 = f0;
1365 f0 = usize::from(c0 > c1 - f1 as isize);
1366 buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] -= 1;
1367 }
1368 buckets[buckets_index4(c0 as usize, f0 + f0)] -= 1;
1369
1370 let mut sum = 0;
1371 for j in 0..k as usize {
1372 let i = buckets_index4(j, 0);
1373 sum += buckets[i + 1] + buckets[i + 3];
1374 buckets[4 * k as usize + j] = sum;
1375 }
1376 sum
1377}
1378
1379#[allow(dead_code)]
1380fn initialize_buckets_for_partial_sorting_32s_6k(
1381 t: &[SaSint],
1382 k: SaSint,
1383 buckets: &mut [SaSint],
1384 first_lms_suffix: SaSint,
1385 left_suffixes_count: SaSint,
1386) {
1387 let k = k as usize;
1388 let temp_offset = 4 * k;
1389 let first_symbol = t[first_lms_suffix as usize] as usize;
1390 let mut sum0 = left_suffixes_count + 1;
1391 let mut sum1 = 0;
1392 let mut sum2 = 0;
1393
1394 for j in 0..first_symbol {
1395 let i = buckets_index4(j, 0);
1396 let tj = buckets_index2(j, 0);
1397 let ss = buckets[i];
1398 let ls = buckets[i + 1];
1399 let sl = buckets[i + 2];
1400 let ll = buckets[i + 3];
1401
1402 buckets[i] = sum0;
1403 buckets[i + 1] = sum2;
1404 buckets[i + 2] = 0;
1405 buckets[i + 3] = 0;
1406
1407 sum0 += ss + sl;
1408 sum1 += ls;
1409 sum2 += ls + ll;
1410
1411 buckets[temp_offset + tj] = sum0;
1412 buckets[temp_offset + tj + 1] = sum1;
1413 }
1414
1415 sum1 += 1;
1416 for j in first_symbol..k {
1417 let i = buckets_index4(j, 0);
1418 let tj = buckets_index2(j, 0);
1419 let ss = buckets[i];
1420 let ls = buckets[i + 1];
1421 let sl = buckets[i + 2];
1422 let ll = buckets[i + 3];
1423
1424 buckets[i] = sum0;
1425 buckets[i + 1] = sum2;
1426 buckets[i + 2] = 0;
1427 buckets[i + 3] = 0;
1428
1429 sum0 += ss + sl;
1430 sum1 += ls;
1431 sum2 += ls + ll;
1432
1433 buckets[temp_offset + tj] = sum0;
1434 buckets[temp_offset + tj + 1] = sum1;
1435 }
1436}
1437
1438#[allow(dead_code)]
1439fn initialize_buckets_for_radix_and_partial_sorting_32s_4k(
1440 t: &[SaSint],
1441 k: SaSint,
1442 buckets: &mut [SaSint],
1443 first_lms_suffix: SaSint,
1444) {
1445 let k = k as usize;
1446 buckets[buckets_index2(t[first_lms_suffix as usize] as usize, 0)] += 1;
1447 buckets[buckets_index2(t[first_lms_suffix as usize] as usize, 1)] -= 1;
1448
1449 let mut sum0 = 0;
1450 let mut sum1 = 0;
1451 for j in 0..k {
1452 let i = buckets_index2(j, 0);
1453 buckets[2 * k + j] = sum1;
1454 sum0 += buckets[i + 1];
1455 sum1 += buckets[i] + buckets[i + 1];
1456 buckets[i + 1] = sum0;
1457 buckets[3 * k + j] = sum1;
1458 }
1459}
1460
1461#[allow(dead_code)]
1462fn count_and_gather_compacted_lms_suffixes_32s_2k_omp(
1463 t: &[SaSint],
1464 sa: &mut [SaSint],
1465 n: SaSint,
1466 k: SaSint,
1467 buckets: &mut [SaSint],
1468 local_buckets: SaSint,
1469 threads: SaSint,
1470 thread_state: &mut [ThreadState],
1471) {
1472 let free_space = if local_buckets != 0 {
1473 LIBSAIS_LOCAL_BUFFER_SIZE as SaSint
1474 } else {
1475 buckets.len() as SaSint
1476 };
1477 let mut max_threads = (free_space / (((2 * k) + 15) & !15)).min(threads);
1478
1479 if local_buckets == 0 && max_threads > 1 && n >= 65_536 && n / k >= 2 {
1480 let thread_cap = n / (8 * k);
1481 if max_threads > thread_cap {
1482 max_threads = thread_cap;
1483 }
1484 count_and_gather_compacted_lms_suffixes_32s_2k_fs_omp(
1485 t,
1486 sa,
1487 n,
1488 k,
1489 buckets,
1490 local_buckets,
1491 max_threads.max(2),
1492 thread_state,
1493 );
1494 } else {
1495 count_and_gather_compacted_lms_suffixes_32s_2k_nofs_omp(t, sa, n, k, buckets, threads);
1496 }
1497}
1498
1499#[allow(dead_code)]
1500fn gather_lms_suffixes_16u(
1501 t: &[u16],
1502 sa: &mut [SaSint],
1503 n: SaSint,
1504 mut m: SaSint,
1505 omp_block_start: SaSint,
1506 omp_block_size: SaSint,
1507) {
1508 if omp_block_size > 0 {
1509 let n = n as isize;
1510 let mut i: isize;
1511 let mut j = (omp_block_start + omp_block_size) as isize;
1512 let mut c0 = t[(omp_block_start + omp_block_size - 1) as usize] as isize;
1513 let mut c1 = -1isize;
1514
1515 while j < n {
1516 c1 = t[j as usize] as isize;
1517 if c1 != c0 {
1518 break;
1519 }
1520 j += 1;
1521 }
1522
1523 let mut f0 = usize::from(c0 >= c1);
1524 let mut f1: usize;
1525
1526 i = (omp_block_start + omp_block_size - 2) as isize;
1527 j = (omp_block_start + 3) as isize;
1528 while i >= j {
1529 c1 = t[i as usize] as isize;
1530 f1 = usize::from(c1 > c0 - f0 as isize);
1531 sa[m as usize] = (i + 1) as SaSint;
1532 m -= (f1 & (1 - f0)) as SaSint;
1533
1534 c0 = t[(i - 1) as usize] as isize;
1535 f0 = usize::from(c0 > c1 - f1 as isize);
1536 sa[m as usize] = i as SaSint;
1537 m -= (f0 & (1 - f1)) as SaSint;
1538
1539 c1 = t[(i - 2) as usize] as isize;
1540 f1 = usize::from(c1 > c0 - f0 as isize);
1541 sa[m as usize] = (i - 1) as SaSint;
1542 m -= (f1 & (1 - f0)) as SaSint;
1543
1544 c0 = t[(i - 3) as usize] as isize;
1545 f0 = usize::from(c0 > c1 - f1 as isize);
1546 sa[m as usize] = (i - 2) as SaSint;
1547 m -= (f0 & (1 - f1)) as SaSint;
1548
1549 i -= 4;
1550 }
1551
1552 j -= 3;
1553 while i >= j {
1554 c1 = c0;
1555 c0 = t[i as usize] as isize;
1556 f1 = f0;
1557 f0 = usize::from(c0 > c1 - f1 as isize);
1558 sa[m as usize] = (i + 1) as SaSint;
1559 m -= (f0 & (1 - f1)) as SaSint;
1560 i -= 1;
1561 }
1562
1563 sa[m as usize] = (i + 1) as SaSint;
1564 }
1565}
1566
1567#[allow(dead_code)]
1568fn count_and_gather_lms_suffixes_16u(
1569 t: &[u16],
1570 sa: &mut [SaSint],
1571 n: SaSint,
1572 buckets: &mut [SaSint],
1573 omp_block_start: SaSint,
1574 omp_block_size: SaSint,
1575) -> SaSint {
1576 buckets[..4 * ALPHABET_SIZE].fill(0);
1577
1578 let mut m = (omp_block_start + omp_block_size - 1) as isize;
1579
1580 if omp_block_size > 0 {
1581 let n = n as isize;
1582 let mut i: isize;
1583 let mut j = m + 1;
1584 let mut c0 = t[m as usize] as isize;
1585 let mut c1 = -1isize;
1586
1587 while j < n {
1588 c1 = t[j as usize] as isize;
1589 if c1 != c0 {
1590 break;
1591 }
1592 j += 1;
1593 }
1594
1595 let mut f0 = usize::from(c0 >= c1);
1596 let mut f1: usize;
1597
1598 i = m - 1;
1599 j = (omp_block_start + 3) as isize;
1600 while i >= j {
1601 c1 = t[i as usize] as isize;
1602 f1 = usize::from(c1 > c0 - f0 as isize);
1603 sa[m as usize] = (i + 1) as SaSint;
1604 m -= (f1 & (1 - f0)) as isize;
1605 buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
1606
1607 c0 = t[(i - 1) as usize] as isize;
1608 f0 = usize::from(c0 > c1 - f1 as isize);
1609 sa[m as usize] = i as SaSint;
1610 m -= (f0 & (1 - f1)) as isize;
1611 buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
1612
1613 c1 = t[(i - 2) as usize] as isize;
1614 f1 = usize::from(c1 > c0 - f0 as isize);
1615 sa[m as usize] = (i - 1) as SaSint;
1616 m -= (f1 & (1 - f0)) as isize;
1617 buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
1618
1619 c0 = t[(i - 3) as usize] as isize;
1620 f0 = usize::from(c0 > c1 - f1 as isize);
1621 sa[m as usize] = (i - 2) as SaSint;
1622 m -= (f0 & (1 - f1)) as isize;
1623 buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
1624
1625 i -= 4;
1626 }
1627
1628 j -= 3;
1629 while i >= j {
1630 c1 = c0;
1631 c0 = t[i as usize] as isize;
1632 f1 = f0;
1633 f0 = usize::from(c0 > c1 - f1 as isize);
1634 sa[m as usize] = (i + 1) as SaSint;
1635 m -= (f0 & (1 - f1)) as isize;
1636 buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
1637 i -= 1;
1638 }
1639
1640 c1 = if i >= 0 { t[i as usize] as isize } else { -1 };
1641 f1 = usize::from(c1 > c0 - f0 as isize);
1642 sa[m as usize] = (i + 1) as SaSint;
1643 m -= (f1 & (1 - f0)) as isize;
1644 buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
1645 }
1646
1647 omp_block_start + omp_block_size - 1 - m as SaSint
1648}
1649
1650#[allow(dead_code)]
1651fn gather_lms_suffixes_16u_omp(
1652 t: &[u16],
1653 sa: &mut [SaSint],
1654 n: SaSint,
1655 threads: SaSint,
1656 thread_state: &mut [ThreadState],
1657) {
1658 if threads == 1 || n < 65_536 || thread_state.is_empty() {
1659 gather_lms_suffixes_16u(t, sa, n, n - 1, 0, n);
1660 return;
1661 }
1662
1663 let thread_count = threads as usize;
1664 let block_stride = (n / threads) & !15;
1665 let mut suffix_counts_after = vec![0; thread_count];
1666 let mut m = 0;
1667 for thread in (0..thread_count).rev() {
1668 suffix_counts_after[thread] = m;
1669 m += thread_state[thread].m;
1670 }
1671
1672 for thread in 0..thread_count {
1673 let block_start = thread as SaSint * block_stride;
1674 let block_size = if thread + 1 < thread_count {
1675 block_stride
1676 } else {
1677 n - block_start
1678 };
1679 gather_lms_suffixes_16u(
1680 t,
1681 sa,
1682 n,
1683 n - 1 - suffix_counts_after[thread],
1684 block_start,
1685 block_size,
1686 );
1687 }
1688
1689 for thread in 0..thread_count {
1690 if thread_state[thread].m > 0 {
1691 sa[(n - 1 - suffix_counts_after[thread]) as usize] =
1692 thread_state[thread].last_lms_suffix;
1693 }
1694 }
1695}
1696
1697#[allow(dead_code)]
1698fn count_and_gather_lms_suffixes_16u_omp(
1699 t: &[u16],
1700 sa: &mut [SaSint],
1701 n: SaSint,
1702 buckets: &mut [SaSint],
1703 threads: SaSint,
1704 thread_state: &mut [ThreadState],
1705) -> SaSint {
1706 if threads == 1 || n < 65_536 || thread_state.is_empty() {
1707 return count_and_gather_lms_suffixes_16u(t, sa, n, buckets, 0, n);
1708 }
1709
1710 let thread_count = threads as usize;
1711 let block_stride = (n / threads) & !15;
1712
1713 for thread in 0..thread_count {
1714 let block_start = thread as SaSint * block_stride;
1715 let block_size = if thread + 1 < thread_count {
1716 block_stride
1717 } else {
1718 n - block_start
1719 };
1720 let count = count_and_gather_lms_suffixes_16u(
1721 t,
1722 sa,
1723 n,
1724 &mut thread_state[thread].buckets,
1725 block_start,
1726 block_size,
1727 );
1728 thread_state[thread].m = count;
1729 thread_state[thread].position = block_start + block_size;
1730 if count > 0 {
1731 thread_state[thread].last_lms_suffix = sa[(block_start + block_size - 1) as usize];
1732 }
1733 }
1734
1735 buckets[..4 * ALPHABET_SIZE].fill(0);
1736 let mut m = 0;
1737 for thread in (0..thread_count).rev() {
1738 let position = thread_state[thread].position;
1739 let count = thread_state[thread].m;
1740 m += count;
1741 if thread + 1 != thread_count && count > 0 {
1742 let src_end = position as usize;
1743 let src_start = src_end - count as usize;
1744 let dst_start = (n - m) as usize;
1745 sa.copy_within(src_start..src_end, dst_start);
1746 }
1747 for s in 0..4 * ALPHABET_SIZE {
1748 let a = buckets[s];
1749 let b = thread_state[thread].buckets[s];
1750 buckets[s] = a + b;
1751 thread_state[thread].buckets[s] = a;
1752 }
1753 }
1754
1755 m
1756}
1757
1758#[allow(dead_code)]
1759fn initialize_buckets_start_and_end_16u(
1760 buckets: &mut [SaSint],
1761 freq: Option<&mut [SaSint]>,
1762) -> SaSint {
1763 let (count_buckets, start_end) = buckets.split_at_mut(6 * ALPHABET_SIZE);
1764 let (bucket_start, bucket_end) = start_end.split_at_mut(ALPHABET_SIZE);
1765
1766 let mut k = -1;
1767 let mut sum = 0;
1768
1769 if let Some(freq) = freq {
1770 for j in 0..ALPHABET_SIZE {
1771 let i = buckets_index4(j, 0);
1772 let total = count_buckets[i]
1773 + count_buckets[i + buckets_index4(0, 1)]
1774 + count_buckets[i + buckets_index4(0, 2)]
1775 + count_buckets[i + buckets_index4(0, 3)];
1776
1777 bucket_start[j] = sum;
1778 sum += total;
1779 bucket_end[j] = sum;
1780 if total > 0 {
1781 k = j as SaSint;
1782 }
1783 freq[j] = total;
1784 }
1785 } else {
1786 for j in 0..ALPHABET_SIZE {
1787 let i = buckets_index4(j, 0);
1788 let total = count_buckets[i]
1789 + count_buckets[i + buckets_index4(0, 1)]
1790 + count_buckets[i + buckets_index4(0, 2)]
1791 + count_buckets[i + buckets_index4(0, 3)];
1792
1793 bucket_start[j] = sum;
1794 sum += total;
1795 bucket_end[j] = sum;
1796 if total > 0 {
1797 k = j as SaSint;
1798 }
1799 }
1800 }
1801
1802 k + 1
1803}
1804
1805#[allow(dead_code)]
1806fn initialize_buckets_for_lms_suffixes_radix_sort_16u(
1807 t: &[u16],
1808 buckets: &mut [SaSint],
1809 mut first_lms_suffix: SaSint,
1810) -> SaSint {
1811 let mut f0 = 0usize;
1812 let mut c0 = t[first_lms_suffix as usize] as isize;
1813
1814 loop {
1815 first_lms_suffix -= 1;
1816 if first_lms_suffix < 0 {
1817 break;
1818 }
1819
1820 let c1 = c0;
1821 c0 = t[first_lms_suffix as usize] as isize;
1822 let f1 = f0;
1823 f0 = usize::from(c0 > c1 - f1 as isize);
1824 buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] -= 1;
1825 }
1826
1827 buckets[buckets_index4(c0 as usize, f0 + f0)] -= 1;
1828
1829 let (count_buckets, temp_bucket) = buckets.split_at_mut(4 * ALPHABET_SIZE);
1830 let mut sum = 0;
1831 for c in 0..ALPHABET_SIZE {
1832 let i = buckets_index4(c, 0);
1833 let j = buckets_index2(c, 0);
1834 temp_bucket[j + buckets_index2(0, 1)] = sum;
1835 sum += count_buckets[i + buckets_index4(0, 1)] + count_buckets[i + buckets_index4(0, 3)];
1836 temp_bucket[j] = sum;
1837 }
1838
1839 sum
1840}
1841
1842#[allow(dead_code)]
1843fn radix_sort_lms_suffixes_16u(
1844 t: &[u16],
1845 sa: &mut [SaSint],
1846 induction_bucket: &mut [SaSint],
1847 omp_block_start: SaSint,
1848 omp_block_size: SaSint,
1849) {
1850 let mut i = omp_block_start + omp_block_size - 1;
1851 let mut j = omp_block_start + 64 + 3;
1852 while i >= j {
1853 let p0 = sa[i as usize];
1854 induction_bucket[buckets_index2(t[p0 as usize] as usize, 0)] -= 1;
1855 sa[induction_bucket[buckets_index2(t[p0 as usize] as usize, 0)] as usize] = p0;
1856
1857 let p1 = sa[(i - 1) as usize];
1858 induction_bucket[buckets_index2(t[p1 as usize] as usize, 0)] -= 1;
1859 sa[induction_bucket[buckets_index2(t[p1 as usize] as usize, 0)] as usize] = p1;
1860
1861 let p2 = sa[(i - 2) as usize];
1862 induction_bucket[buckets_index2(t[p2 as usize] as usize, 0)] -= 1;
1863 sa[induction_bucket[buckets_index2(t[p2 as usize] as usize, 0)] as usize] = p2;
1864
1865 let p3 = sa[(i - 3) as usize];
1866 induction_bucket[buckets_index2(t[p3 as usize] as usize, 0)] -= 1;
1867 sa[induction_bucket[buckets_index2(t[p3 as usize] as usize, 0)] as usize] = p3;
1868
1869 i -= 4;
1870 }
1871
1872 j -= 64 + 3;
1873 while i >= j {
1874 let p = sa[i as usize];
1875 induction_bucket[buckets_index2(t[p as usize] as usize, 0)] -= 1;
1876 sa[induction_bucket[buckets_index2(t[p as usize] as usize, 0)] as usize] = p;
1877 i -= 1;
1878 }
1879}
1880
1881#[allow(dead_code)]
1882fn radix_sort_lms_suffixes_16u_omp(
1883 t: &[u16],
1884 sa: &mut [SaSint],
1885 n: SaSint,
1886 m: SaSint,
1887 flags: SaSint,
1888 buckets: &mut [SaSint],
1889 threads: SaSint,
1890 thread_state: &mut [ThreadState],
1891) {
1892 if (flags & LIBSAIS_FLAGS_GSA) != 0 {
1893 buckets[4 * ALPHABET_SIZE] -= 1;
1894 }
1895 if threads == 1 || n < 65_536 || m < 65_536 || thread_state.is_empty() {
1896 radix_sort_lms_suffixes_16u(t, sa, &mut buckets[4 * ALPHABET_SIZE..], n - m + 1, m - 1);
1897 return;
1898 }
1899
1900 let thread_count = threads as usize;
1901 for thread in 0..thread_count {
1902 let (src_buckets, state_buckets) = (
1903 &buckets[4 * ALPHABET_SIZE..],
1904 &mut thread_state[thread].buckets,
1905 );
1906 for c in 0..ALPHABET_SIZE {
1907 let i = buckets_index2(c, 0);
1908 let j = buckets_index4(c, 1);
1909 state_buckets[i] = src_buckets[i] - state_buckets[j];
1910 }
1911
1912 let mut block_start = 0;
1913 let mut block_size = thread_state[thread].m;
1914 for idx in (thread..thread_count).rev() {
1915 block_start += thread_state[idx].m;
1916 }
1917
1918 if block_start == m && block_size > 0 {
1919 block_start -= 1;
1920 block_size -= 1;
1921 }
1922
1923 radix_sort_lms_suffixes_16u(
1924 t,
1925 sa,
1926 &mut thread_state[thread].buckets,
1927 n - block_start,
1928 block_size,
1929 );
1930 }
1931}
1932
1933#[allow(dead_code)]
1934fn radix_sort_lms_suffixes_32s_6k(
1935 t: &[SaSint],
1936 sa: &mut [SaSint],
1937 induction_bucket: &mut [SaSint],
1938 omp_block_start: SaSint,
1939 omp_block_size: SaSint,
1940) {
1941 let mut i = omp_block_start + omp_block_size - 1;
1942 let mut j = omp_block_start + 64 + 3;
1943 while i >= j {
1944 let p0 = sa[i as usize];
1945 induction_bucket[t[p0 as usize] as usize] -= 1;
1946 sa[induction_bucket[t[p0 as usize] as usize] as usize] = p0;
1947 let p1 = sa[(i - 1) as usize];
1948 induction_bucket[t[p1 as usize] as usize] -= 1;
1949 sa[induction_bucket[t[p1 as usize] as usize] as usize] = p1;
1950 let p2 = sa[(i - 2) as usize];
1951 induction_bucket[t[p2 as usize] as usize] -= 1;
1952 sa[induction_bucket[t[p2 as usize] as usize] as usize] = p2;
1953 let p3 = sa[(i - 3) as usize];
1954 induction_bucket[t[p3 as usize] as usize] -= 1;
1955 sa[induction_bucket[t[p3 as usize] as usize] as usize] = p3;
1956 i -= 4;
1957 }
1958
1959 j -= 64 + 3;
1960 while i >= j {
1961 let p = sa[i as usize];
1962 induction_bucket[t[p as usize] as usize] -= 1;
1963 sa[induction_bucket[t[p as usize] as usize] as usize] = p;
1964 i -= 1;
1965 }
1966}
1967
1968#[allow(dead_code)]
1969fn radix_sort_lms_suffixes_32s_2k(
1970 t: &[SaSint],
1971 sa: &mut [SaSint],
1972 induction_bucket: &mut [SaSint],
1973 omp_block_start: SaSint,
1974 omp_block_size: SaSint,
1975) {
1976 let mut i = omp_block_start + omp_block_size - 1;
1977 let mut j = omp_block_start + 64 + 3;
1978 while i >= j {
1979 let p0 = sa[i as usize];
1980 induction_bucket[buckets_index2(t[p0 as usize] as usize, 0)] -= 1;
1981 sa[induction_bucket[buckets_index2(t[p0 as usize] as usize, 0)] as usize] = p0;
1982 let p1 = sa[(i - 1) as usize];
1983 induction_bucket[buckets_index2(t[p1 as usize] as usize, 0)] -= 1;
1984 sa[induction_bucket[buckets_index2(t[p1 as usize] as usize, 0)] as usize] = p1;
1985 let p2 = sa[(i - 2) as usize];
1986 induction_bucket[buckets_index2(t[p2 as usize] as usize, 0)] -= 1;
1987 sa[induction_bucket[buckets_index2(t[p2 as usize] as usize, 0)] as usize] = p2;
1988 let p3 = sa[(i - 3) as usize];
1989 induction_bucket[buckets_index2(t[p3 as usize] as usize, 0)] -= 1;
1990 sa[induction_bucket[buckets_index2(t[p3 as usize] as usize, 0)] as usize] = p3;
1991 i -= 4;
1992 }
1993
1994 j -= 64 + 3;
1995 while i >= j {
1996 let p = sa[i as usize];
1997 induction_bucket[buckets_index2(t[p as usize] as usize, 0)] -= 1;
1998 sa[induction_bucket[buckets_index2(t[p as usize] as usize, 0)] as usize] = p;
1999 i -= 1;
2000 }
2001}
2002
2003#[allow(dead_code)]
2004fn radix_sort_lms_suffixes_32s_block_gather(
2005 t: &[SaSint],
2006 sa: &[SaSint],
2007 cache: &mut [ThreadCache],
2008 omp_block_start: SaSint,
2009 omp_block_size: SaSint,
2010) {
2011 if omp_block_size <= 0 {
2012 return;
2013 }
2014
2015 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
2016 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
2017 let cache_base = if cache.len() >= start + size {
2018 0
2019 } else {
2020 start
2021 };
2022 let mut i = start;
2023 let mut j = if size > 67 { start + size - 67 } else { start };
2024
2025 while i < j {
2026 for current in [i, i + 1, i + 2, i + 3] {
2027 let ci = current - cache_base;
2028 let index = sa[current];
2029 cache[ci].index = index;
2030 cache[ci].symbol = t[index as usize];
2031 }
2032 i += 4;
2033 }
2034
2035 j = if size > 67 { j + 67 } else { start + size };
2036 while i < j {
2037 let ci = i - cache_base;
2038 let index = sa[i];
2039 cache[ci].index = index;
2040 cache[ci].symbol = t[index as usize];
2041 i += 1;
2042 }
2043}
2044
2045#[allow(dead_code)]
2046fn radix_sort_lms_suffixes_32s_6k_block_sort(
2047 induction_bucket: &mut [SaSint],
2048 cache: &mut [ThreadCache],
2049 omp_block_start: SaSint,
2050 omp_block_size: SaSint,
2051) {
2052 if omp_block_size <= 0 {
2053 return;
2054 }
2055
2056 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
2057 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
2058 let cache_base = if cache.len() >= start + size {
2059 0
2060 } else {
2061 start
2062 };
2063 let mut i = start + size - 1;
2064 let mut j = start + 64 + 3;
2065
2066 while i >= j {
2067 for current in [i, i - 1, i - 2, i - 3] {
2068 let ci = current - cache_base;
2069 let v = cache[ci].symbol as usize;
2070 induction_bucket[v] -= 1;
2071 cache[ci].symbol = induction_bucket[v];
2072 }
2073 i -= 4;
2074 }
2075
2076 j -= 64 + 3;
2077 while i >= j {
2078 let ci = i - cache_base;
2079 let v = cache[ci].symbol as usize;
2080 induction_bucket[v] -= 1;
2081 cache[ci].symbol = induction_bucket[v];
2082 if i == 0 {
2083 break;
2084 }
2085 i -= 1;
2086 }
2087}
2088
2089#[allow(dead_code)]
2090fn radix_sort_lms_suffixes_32s_2k_block_sort(
2091 induction_bucket: &mut [SaSint],
2092 cache: &mut [ThreadCache],
2093 omp_block_start: SaSint,
2094 omp_block_size: SaSint,
2095) {
2096 if omp_block_size <= 0 {
2097 return;
2098 }
2099
2100 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
2101 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
2102 let cache_base = if cache.len() >= start + size {
2103 0
2104 } else {
2105 start
2106 };
2107 let mut i = start + size - 1;
2108 let mut j = start + 64 + 3;
2109
2110 while i >= j {
2111 for current in [i, i - 1, i - 2, i - 3] {
2112 let ci = current - cache_base;
2113 let v = buckets_index2(cache[ci].symbol as usize, 0);
2114 induction_bucket[v] -= 1;
2115 cache[ci].symbol = induction_bucket[v];
2116 }
2117 i -= 4;
2118 }
2119
2120 j -= 64 + 3;
2121 while i >= j {
2122 let ci = i - cache_base;
2123 let v = buckets_index2(cache[ci].symbol as usize, 0);
2124 induction_bucket[v] -= 1;
2125 cache[ci].symbol = induction_bucket[v];
2126 if i == 0 {
2127 break;
2128 }
2129 i -= 1;
2130 }
2131}
2132
2133#[allow(dead_code)]
2134fn radix_sort_lms_suffixes_32s_6k_block_omp(
2135 t: &[SaSint],
2136 sa: &mut [SaSint],
2137 induction_bucket: &mut [SaSint],
2138 cache: &mut [ThreadCache],
2139 block_start: SaSint,
2140 block_size: SaSint,
2141 threads: SaSint,
2142) {
2143 if threads <= 1 || block_size < 16_384 {
2144 radix_sort_lms_suffixes_32s_6k(t, sa, induction_bucket, block_start, block_size);
2145 return;
2146 }
2147
2148 radix_sort_lms_suffixes_32s_block_gather(t, sa, cache, block_start, block_size);
2149 radix_sort_lms_suffixes_32s_6k_block_sort(induction_bucket, cache, block_start, block_size);
2150 place_cached_suffixes(sa, cache, block_start, block_size);
2151}
2152
2153#[allow(dead_code)]
2154fn radix_sort_lms_suffixes_32s_2k_block_omp(
2155 t: &[SaSint],
2156 sa: &mut [SaSint],
2157 induction_bucket: &mut [SaSint],
2158 cache: &mut [ThreadCache],
2159 block_start: SaSint,
2160 block_size: SaSint,
2161 threads: SaSint,
2162) {
2163 if threads <= 1 || block_size < 16_384 {
2164 radix_sort_lms_suffixes_32s_2k(t, sa, induction_bucket, block_start, block_size);
2165 return;
2166 }
2167
2168 radix_sort_lms_suffixes_32s_block_gather(t, sa, cache, block_start, block_size);
2169 radix_sort_lms_suffixes_32s_2k_block_sort(induction_bucket, cache, block_start, block_size);
2170 place_cached_suffixes(sa, cache, block_start, block_size);
2171}
2172
2173#[allow(dead_code)]
2174fn radix_sort_lms_suffixes_32s_6k_omp(
2175 t: &[SaSint],
2176 sa: &mut [SaSint],
2177 n: SaSint,
2178 m: SaSint,
2179 induction_bucket: &mut [SaSint],
2180 threads: SaSint,
2181) {
2182 if threads <= 1 || m < 65_536 {
2183 radix_sort_lms_suffixes_32s_6k(t, sa, induction_bucket, n - m + 1, m - 1);
2184 return;
2185 }
2186
2187 let threads_usize = usize::try_from(threads).expect("threads must be positive");
2188 let mut cache = vec![ThreadCache::default(); threads_usize * PER_THREAD_CACHE_SIZE];
2189 let mut block_start = 0usize;
2190 let m_usize = usize::try_from(m).expect("m must be non-negative");
2191 let n_usize = usize::try_from(n).expect("n must be non-negative");
2192 let last = m_usize - 1;
2193
2194 while block_start < last {
2195 let block_end = (block_start + threads_usize * PER_THREAD_CACHE_SIZE).min(last);
2196 radix_sort_lms_suffixes_32s_6k_block_omp(
2197 t,
2198 sa,
2199 induction_bucket,
2200 &mut cache,
2201 (n_usize - block_end) as SaSint,
2202 (block_end - block_start) as SaSint,
2203 threads,
2204 );
2205 block_start = block_end;
2206 }
2207}
2208
2209#[allow(dead_code)]
2210fn radix_sort_lms_suffixes_32s_2k_omp(
2211 t: &[SaSint],
2212 sa: &mut [SaSint],
2213 n: SaSint,
2214 m: SaSint,
2215 induction_bucket: &mut [SaSint],
2216 threads: SaSint,
2217) {
2218 if threads <= 1 || m < 65_536 {
2219 radix_sort_lms_suffixes_32s_2k(t, sa, induction_bucket, n - m + 1, m - 1);
2220 return;
2221 }
2222
2223 let threads_usize = usize::try_from(threads).expect("threads must be positive");
2224 let mut cache = vec![ThreadCache::default(); threads_usize * PER_THREAD_CACHE_SIZE];
2225 let mut block_start = 0usize;
2226 let m_usize = usize::try_from(m).expect("m must be non-negative");
2227 let n_usize = usize::try_from(n).expect("n must be non-negative");
2228 let last = m_usize - 1;
2229
2230 while block_start < last {
2231 let block_end = (block_start + threads_usize * PER_THREAD_CACHE_SIZE).min(last);
2232 radix_sort_lms_suffixes_32s_2k_block_omp(
2233 t,
2234 sa,
2235 induction_bucket,
2236 &mut cache,
2237 (n_usize - block_end) as SaSint,
2238 (block_end - block_start) as SaSint,
2239 threads,
2240 );
2241 block_start = block_end;
2242 }
2243}
2244
2245#[allow(dead_code)]
2246fn radix_sort_lms_suffixes_32s_1k(
2247 t: &[SaSint],
2248 sa: &mut [SaSint],
2249 n: SaSint,
2250 buckets: &mut [SaSint],
2251) -> SaSint {
2252 let mut i = n - 2;
2253 let mut m = 0;
2254 let mut f0 = 1usize;
2255 let mut f1: usize;
2256 let mut c0 = t[(n - 1) as usize] as isize;
2257 let mut c1: isize;
2258 let mut c2 = 0isize;
2259
2260 while i >= 64 + 3 {
2261 c1 = t[i as usize] as isize;
2262 f1 = usize::from(c1 > c0 - f0 as isize);
2263 if (f1 & !f0) != 0 {
2264 c2 = c0;
2265 buckets[c2 as usize] -= 1;
2266 sa[buckets[c2 as usize] as usize] = i + 1;
2267 m += 1;
2268 }
2269 c0 = t[(i - 1) as usize] as isize;
2270 f0 = usize::from(c0 > c1 - f1 as isize);
2271 if (f0 & !f1) != 0 {
2272 c2 = c1;
2273 buckets[c2 as usize] -= 1;
2274 sa[buckets[c2 as usize] as usize] = i;
2275 m += 1;
2276 }
2277 c1 = t[(i - 2) as usize] as isize;
2278 f1 = usize::from(c1 > c0 - f0 as isize);
2279 if (f1 & !f0) != 0 {
2280 c2 = c0;
2281 buckets[c2 as usize] -= 1;
2282 sa[buckets[c2 as usize] as usize] = i - 1;
2283 m += 1;
2284 }
2285 c0 = t[(i - 3) as usize] as isize;
2286 f0 = usize::from(c0 > c1 - f1 as isize);
2287 if (f0 & !f1) != 0 {
2288 c2 = c1;
2289 buckets[c2 as usize] -= 1;
2290 sa[buckets[c2 as usize] as usize] = i - 2;
2291 m += 1;
2292 }
2293 i -= 4;
2294 }
2295
2296 while i >= 0 {
2297 c1 = c0;
2298 c0 = t[i as usize] as isize;
2299 f1 = f0;
2300 f0 = usize::from(c0 > c1 - f1 as isize);
2301 if (f0 & !f1) != 0 {
2302 c2 = c1;
2303 buckets[c2 as usize] -= 1;
2304 sa[buckets[c2 as usize] as usize] = i + 1;
2305 m += 1;
2306 }
2307 i -= 1;
2308 }
2309
2310 if m > 1 {
2311 sa[buckets[c2 as usize] as usize] = 0;
2312 }
2313
2314 m
2315}
2316
2317#[allow(dead_code)]
2318fn radix_sort_set_markers_32s_6k(
2319 sa: &mut [SaSint],
2320 induction_bucket: &[SaSint],
2321 omp_block_start: SaSint,
2322 omp_block_size: SaSint,
2323) {
2324 let mut i = omp_block_start;
2325 let mut j = omp_block_start + omp_block_size - 64 - 3;
2326
2327 while i < j {
2328 sa[induction_bucket[i as usize] as usize] |= SAINT_MIN;
2329 sa[induction_bucket[(i + 1) as usize] as usize] |= SAINT_MIN;
2330 sa[induction_bucket[(i + 2) as usize] as usize] |= SAINT_MIN;
2331 sa[induction_bucket[(i + 3) as usize] as usize] |= SAINT_MIN;
2332 i += 4;
2333 }
2334
2335 j += 64 + 3;
2336 while i < j {
2337 sa[induction_bucket[i as usize] as usize] |= SAINT_MIN;
2338 i += 1;
2339 }
2340}
2341
2342#[allow(dead_code)]
2343fn radix_sort_set_markers_32s_4k(
2344 sa: &mut [SaSint],
2345 induction_bucket: &[SaSint],
2346 omp_block_start: SaSint,
2347 omp_block_size: SaSint,
2348) {
2349 let mut i = omp_block_start;
2350 let mut j = omp_block_start + omp_block_size - 64 - 3;
2351
2352 while i < j {
2353 sa[induction_bucket[buckets_index2(i as usize, 0)] as usize] |= SUFFIX_GROUP_MARKER;
2354 sa[induction_bucket[buckets_index2((i + 1) as usize, 0)] as usize] |= SUFFIX_GROUP_MARKER;
2355 sa[induction_bucket[buckets_index2((i + 2) as usize, 0)] as usize] |= SUFFIX_GROUP_MARKER;
2356 sa[induction_bucket[buckets_index2((i + 3) as usize, 0)] as usize] |= SUFFIX_GROUP_MARKER;
2357 i += 4;
2358 }
2359
2360 j += 64 + 3;
2361 while i < j {
2362 sa[induction_bucket[buckets_index2(i as usize, 0)] as usize] |= SUFFIX_GROUP_MARKER;
2363 i += 1;
2364 }
2365}
2366
2367#[allow(dead_code)]
2368fn radix_sort_set_markers_32s_6k_omp(
2369 sa: &mut [SaSint],
2370 k: SaSint,
2371 induction_bucket: &[SaSint],
2372 threads: SaSint,
2373) {
2374 if k <= 1 {
2375 return;
2376 }
2377
2378 if threads <= 1 || k < 65_536 {
2379 radix_sort_set_markers_32s_6k(sa, induction_bucket, 0, k - 1);
2380 return;
2381 }
2382
2383 let threads_usize = usize::try_from(threads).expect("threads must be positive");
2384 let last = usize::try_from(k - 1).expect("k must be positive");
2385 let stride = (last / threads_usize) & !15usize;
2386 let mut start = 0usize;
2387
2388 for thread in 0..threads_usize {
2389 let end = if thread + 1 == threads_usize {
2390 last
2391 } else {
2392 start + stride
2393 };
2394 if end > start {
2395 radix_sort_set_markers_32s_6k(
2396 sa,
2397 induction_bucket,
2398 start as SaSint,
2399 (end - start) as SaSint,
2400 );
2401 }
2402 start = end;
2403 }
2404}
2405
2406#[allow(dead_code)]
2407fn radix_sort_set_markers_32s_4k_omp(
2408 sa: &mut [SaSint],
2409 k: SaSint,
2410 induction_bucket: &[SaSint],
2411 threads: SaSint,
2412) {
2413 if k <= 1 {
2414 return;
2415 }
2416
2417 if threads <= 1 || k < 65_536 {
2418 radix_sort_set_markers_32s_4k(sa, induction_bucket, 0, k - 1);
2419 return;
2420 }
2421
2422 let threads_usize = usize::try_from(threads).expect("threads must be positive");
2423 let last = usize::try_from(k - 1).expect("k must be positive");
2424 let stride = (last / threads_usize) & !15usize;
2425 let mut start = 0usize;
2426
2427 for thread in 0..threads_usize {
2428 let end = if thread + 1 == threads_usize {
2429 last
2430 } else {
2431 start + stride
2432 };
2433 if end > start {
2434 radix_sort_set_markers_32s_4k(
2435 sa,
2436 induction_bucket,
2437 start as SaSint,
2438 (end - start) as SaSint,
2439 );
2440 }
2441 start = end;
2442 }
2443}
2444
2445#[allow(dead_code)]
2446fn initialize_buckets_for_partial_sorting_16u(
2447 t: &[u16],
2448 buckets: &mut [SaSint],
2449 first_lms_suffix: SaSint,
2450 left_suffixes_count: SaSint,
2451) {
2452 buckets[buckets_index4(t[first_lms_suffix as usize] as usize, 1)] += 1;
2453
2454 let (front, temp_bucket) = buckets.split_at_mut(4 * ALPHABET_SIZE);
2455 let mut sum0 = left_suffixes_count + 1;
2456 let mut sum1 = 0;
2457
2458 for c in 0..ALPHABET_SIZE {
2459 let i = buckets_index4(c, 0);
2460 let j = buckets_index2(c, 0);
2461
2462 temp_bucket[j + buckets_index2(0, 0)] = sum0;
2463
2464 sum0 += front[i + buckets_index4(0, 0)] + front[i + buckets_index4(0, 2)];
2465 sum1 += front[i + buckets_index4(0, 1)];
2466
2467 front[j + buckets_index2(0, 0)] = sum0;
2468 front[j + buckets_index2(0, 1)] = sum1;
2469 }
2470}
2471
2472#[allow(dead_code)]
2473fn partial_sorting_shift_markers_32s_6k_omp(
2474 sa: &mut [SaSint],
2475 k: SaSint,
2476 buckets: &[SaSint],
2477 threads: SaSint,
2478) {
2479 let k_usize = usize::try_from(k).expect("k must be non-negative");
2480 let temp_bucket = &buckets[4 * k_usize..];
2481 let thread_count = if threads > 1 && k >= 65536 {
2482 usize::try_from(threads).expect("threads must be positive")
2483 } else {
2484 1
2485 };
2486 for t in 0..thread_count {
2487 let mut c = k_usize as isize - 1 - t as isize;
2488 while c >= 1 {
2489 let c_usize = c as usize;
2490 let mut i = buckets[buckets_index4(c_usize, 0)] - 1;
2491 let mut j = temp_bucket[buckets_index2(c_usize - 1, 0)] + 3;
2492 let mut s = SAINT_MIN;
2493
2494 while i >= j {
2495 let p0 = sa[i as usize];
2496 let q0 = (p0 & SAINT_MIN) ^ s;
2497 s ^= q0;
2498 sa[i as usize] = p0 ^ q0;
2499
2500 let p1 = sa[(i - 1) as usize];
2501 let q1 = (p1 & SAINT_MIN) ^ s;
2502 s ^= q1;
2503 sa[(i - 1) as usize] = p1 ^ q1;
2504
2505 let p2 = sa[(i - 2) as usize];
2506 let q2 = (p2 & SAINT_MIN) ^ s;
2507 s ^= q2;
2508 sa[(i - 2) as usize] = p2 ^ q2;
2509
2510 let p3 = sa[(i - 3) as usize];
2511 let q3 = (p3 & SAINT_MIN) ^ s;
2512 s ^= q3;
2513 sa[(i - 3) as usize] = p3 ^ q3;
2514
2515 i -= 4;
2516 }
2517
2518 j -= 3;
2519 while i >= j {
2520 let p = sa[i as usize];
2521 let q = (p & SAINT_MIN) ^ s;
2522 s ^= q;
2523 sa[i as usize] = p ^ q;
2524 i -= 1;
2525 }
2526
2527 c -= thread_count as isize;
2528 }
2529 }
2530}
2531
2532#[allow(dead_code)]
2533fn partial_sorting_shift_markers_32s_4k(sa: &mut [SaSint], n: SaSint) {
2534 let mut i = n - 1;
2535 let mut s = SUFFIX_GROUP_MARKER;
2536
2537 while i >= 3 {
2538 let p0 = sa[i as usize];
2539 let q0 =
2540 ((p0 & SUFFIX_GROUP_MARKER) ^ s) & (((p0 > 0) as SaSint) << (SUFFIX_GROUP_BIT - 1));
2541 s ^= q0;
2542 sa[i as usize] = p0 ^ q0;
2543
2544 let p1 = sa[(i - 1) as usize];
2545 let q1 =
2546 ((p1 & SUFFIX_GROUP_MARKER) ^ s) & (((p1 > 0) as SaSint) << (SUFFIX_GROUP_BIT - 1));
2547 s ^= q1;
2548 sa[(i - 1) as usize] = p1 ^ q1;
2549
2550 let p2 = sa[(i - 2) as usize];
2551 let q2 =
2552 ((p2 & SUFFIX_GROUP_MARKER) ^ s) & (((p2 > 0) as SaSint) << (SUFFIX_GROUP_BIT - 1));
2553 s ^= q2;
2554 sa[(i - 2) as usize] = p2 ^ q2;
2555
2556 let p3 = sa[(i - 3) as usize];
2557 let q3 =
2558 ((p3 & SUFFIX_GROUP_MARKER) ^ s) & (((p3 > 0) as SaSint) << (SUFFIX_GROUP_BIT - 1));
2559 s ^= q3;
2560 sa[(i - 3) as usize] = p3 ^ q3;
2561
2562 i -= 4;
2563 }
2564
2565 while i >= 0 {
2566 let p = sa[i as usize];
2567 let q = ((p & SUFFIX_GROUP_MARKER) ^ s) & (((p > 0) as SaSint) << (SUFFIX_GROUP_BIT - 1));
2568 s ^= q;
2569 sa[i as usize] = p ^ q;
2570 i -= 1;
2571 }
2572}
2573
2574#[allow(dead_code)]
2575fn partial_sorting_shift_buckets_32s_6k(k: SaSint, buckets: &mut [SaSint]) {
2576 let temp_offset = 4 * k as usize;
2577 let mut i = buckets_index2(0, 0);
2578
2579 while i <= buckets_index2(k as usize - 1, 0) {
2580 buckets[2 * i + buckets_index4(0, 0)] = buckets[temp_offset + i + buckets_index2(0, 0)];
2581 buckets[2 * i + buckets_index4(0, 1)] = buckets[temp_offset + i + buckets_index2(0, 1)];
2582 i += buckets_index2(1, 0);
2583 }
2584}
2585
2586#[allow(dead_code)]
2587fn partial_sorting_scan_left_to_right_16u(
2588 t: &[u16],
2589 sa: &mut [SaSint],
2590 buckets: &mut [SaSint],
2591 mut d: SaSint,
2592 omp_block_start: SaSint,
2593 omp_block_size: SaSint,
2594) -> SaSint {
2595 let mut i = omp_block_start as isize;
2596 let mut j = (omp_block_start + omp_block_size - 64 - 1) as isize;
2597 while i < j {
2598 let mut p0 = sa[i as usize];
2599 d += SaSint::from(p0 < 0);
2600 p0 &= SAINT_MAX;
2601 let v0 = buckets_index2(
2602 t[(p0 - 1) as usize] as usize,
2603 usize::from(t[(p0 - 2) as usize] >= t[(p0 - 1) as usize]),
2604 );
2605 let mark0 = if buckets[2 * ALPHABET_SIZE + v0] != d {
2606 SAINT_MIN
2607 } else {
2608 0
2609 };
2610 let dst0 = buckets[4 * ALPHABET_SIZE + v0] as usize;
2611 sa[dst0] = (p0 - 1) | mark0;
2612 buckets[4 * ALPHABET_SIZE + v0] += 1;
2613 buckets[2 * ALPHABET_SIZE + v0] = d;
2614
2615 let mut p1 = sa[(i + 1) as usize];
2616 d += SaSint::from(p1 < 0);
2617 p1 &= SAINT_MAX;
2618 let v1 = buckets_index2(
2619 t[(p1 - 1) as usize] as usize,
2620 usize::from(t[(p1 - 2) as usize] >= t[(p1 - 1) as usize]),
2621 );
2622 let mark1 = if buckets[2 * ALPHABET_SIZE + v1] != d {
2623 SAINT_MIN
2624 } else {
2625 0
2626 };
2627 let dst1 = buckets[4 * ALPHABET_SIZE + v1] as usize;
2628 sa[dst1] = (p1 - 1) | mark1;
2629 buckets[4 * ALPHABET_SIZE + v1] += 1;
2630 buckets[2 * ALPHABET_SIZE + v1] = d;
2631
2632 i += 2;
2633 }
2634
2635 j += 64 + 1;
2636 while i < j {
2637 let mut p = sa[i as usize];
2638 d += SaSint::from(p < 0);
2639 p &= SAINT_MAX;
2640 let v = buckets_index2(
2641 t[(p - 1) as usize] as usize,
2642 usize::from(t[(p - 2) as usize] >= t[(p - 1) as usize]),
2643 );
2644 let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
2645 SAINT_MIN
2646 } else {
2647 0
2648 };
2649 let dst = buckets[4 * ALPHABET_SIZE + v] as usize;
2650 sa[dst] = (p - 1) | mark;
2651 buckets[4 * ALPHABET_SIZE + v] += 1;
2652 buckets[2 * ALPHABET_SIZE + v] = d;
2653 i += 1;
2654 }
2655
2656 d
2657}
2658
2659#[allow(dead_code)]
2660fn partial_sorting_scan_left_to_right_16u_block_prepare(
2661 t: &[u16],
2662 sa: &mut [SaSint],
2663 k: SaSint,
2664 buckets: &mut [SaSint],
2665 cache: &mut [ThreadCache],
2666 omp_block_start: SaSint,
2667 omp_block_size: SaSint,
2668 state: &mut ThreadState,
2669) -> SaSint {
2670 let width = 2 * k as usize;
2671 buckets[..width].fill(0);
2672 buckets[2 * ALPHABET_SIZE..2 * ALPHABET_SIZE + width].fill(0);
2673
2674 let mut count = 0usize;
2675 let mut d = 1;
2676 for i in omp_block_start as usize..(omp_block_start + omp_block_size) as usize {
2677 let mut p = sa[i];
2678 cache[count].index = p;
2679 d += SaSint::from(p < 0);
2680 p &= SAINT_MAX;
2681 let v = buckets_index2(
2682 t[(p - 1) as usize] as usize,
2683 usize::from(t[(p - 2) as usize] >= t[(p - 1) as usize]),
2684 );
2685 cache[count].symbol = v as SaSint;
2686 buckets[v] += 1;
2687 buckets[2 * ALPHABET_SIZE + v] = d;
2688 count += 1;
2689 }
2690 state.cache_entries = count;
2691 d - 1
2692}
2693
2694#[allow(dead_code)]
2695fn partial_sorting_scan_left_to_right_16u_block_place(
2696 sa: &mut [SaSint],
2697 buckets: &mut [SaSint],
2698 cache: &[ThreadCache],
2699 count: SaSint,
2700 mut d: SaSint,
2701) {
2702 for entry in cache.iter().take(count as usize) {
2703 let mut p = entry.index;
2704 d += SaSint::from(p < 0);
2705 p &= SAINT_MAX;
2706 let v = entry.symbol as usize;
2707 let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
2708 SAINT_MIN
2709 } else {
2710 0
2711 };
2712 let dst = buckets[v] as usize;
2713 sa[dst] = (p - 1) | mark;
2714 buckets[v] += 1;
2715 buckets[2 * ALPHABET_SIZE + v] = d;
2716 }
2717}
2718
2719#[allow(dead_code)]
2720fn partial_sorting_scan_left_to_right_16u_block_omp(
2721 t: &[u16],
2722 sa: &mut [SaSint],
2723 k: SaSint,
2724 buckets: &mut [SaSint],
2725 d: SaSint,
2726 block_start: SaSint,
2727 block_size: SaSint,
2728 threads: SaSint,
2729 thread_state: &mut [ThreadState],
2730) -> SaSint {
2731 let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
2732 usize::try_from(threads)
2733 .expect("threads must be non-negative")
2734 .min(thread_state.len())
2735 } else {
2736 1
2737 };
2738 if thread_count <= 1 {
2739 return partial_sorting_scan_left_to_right_16u(t, sa, buckets, d, block_start, block_size);
2740 }
2741
2742 let bucket_width = 2 * k as usize;
2743 let block_stride = (block_size / thread_count as SaSint) & !15;
2744
2745 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
2746 let local_start = thread as SaSint * block_stride;
2747 let local_size = if thread + 1 < thread_count {
2748 block_stride
2749 } else {
2750 block_size - local_start
2751 };
2752 let mut local_state = ThreadState::default();
2753 state.position = partial_sorting_scan_left_to_right_16u_block_prepare(
2754 t,
2755 sa,
2756 k,
2757 &mut state.buckets,
2758 &mut state.cache,
2759 block_start + local_start,
2760 local_size,
2761 &mut local_state,
2762 );
2763 state.count = local_state.cache_entries as SaSint;
2764 }
2765
2766 let mut next_d = d;
2767 for state in thread_state.iter_mut().take(thread_count) {
2768 for c in 0..bucket_width {
2769 let a = buckets[4 * ALPHABET_SIZE + c];
2770 let b = state.buckets[c];
2771 buckets[4 * ALPHABET_SIZE + c] = a + b;
2772 state.buckets[c] = a;
2773 }
2774
2775 next_d -= 1;
2776 for c in 0..bucket_width {
2777 let a = buckets[2 * ALPHABET_SIZE + c];
2778 let b = state.buckets[2 * ALPHABET_SIZE + c];
2779 let shifted = b + next_d;
2780 buckets[2 * ALPHABET_SIZE + c] = if b > 0 { shifted } else { a };
2781 state.buckets[2 * ALPHABET_SIZE + c] = a;
2782 }
2783 next_d += 1 + state.position;
2784 state.position = next_d - state.position;
2785 }
2786
2787 for state in thread_state.iter_mut().take(thread_count) {
2788 partial_sorting_scan_left_to_right_16u_block_place(
2789 sa,
2790 &mut state.buckets,
2791 &state.cache,
2792 state.count,
2793 state.position,
2794 );
2795 }
2796
2797 next_d
2798}
2799
2800#[allow(dead_code)]
2801fn partial_sorting_scan_left_to_right_16u_omp(
2802 t: &[u16],
2803 sa: &mut [SaSint],
2804 n: SaSint,
2805 k: SaSint,
2806 buckets: &mut [SaSint],
2807 left_suffixes_count: SaSint,
2808 mut d: SaSint,
2809 threads: SaSint,
2810) -> SaSint {
2811 let v = buckets_index2(
2812 t[(n - 1) as usize] as usize,
2813 usize::from(t[(n - 2) as usize] >= t[(n - 1) as usize]),
2814 );
2815 let dst = buckets[4 * ALPHABET_SIZE + v] as usize;
2816 buckets[4 * ALPHABET_SIZE + v] += 1;
2817 sa[dst] = (n - 1) | SAINT_MIN;
2818 d += 1;
2819 buckets[2 * ALPHABET_SIZE + v] = d;
2820
2821 if threads == 1 || left_suffixes_count < 65536 {
2822 d = partial_sorting_scan_left_to_right_16u(t, sa, buckets, d, 0, left_suffixes_count);
2823 } else {
2824 let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
2825 let mut block_start = 0;
2826 while block_start < left_suffixes_count {
2827 if sa[block_start as usize] == 0 {
2828 block_start += 1;
2829 } else {
2830 let mut block_end =
2831 block_start + threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
2832 if block_end > left_suffixes_count {
2833 block_end = left_suffixes_count;
2834 }
2835 let mut block_scan_end = block_start + 1;
2836 while block_scan_end < block_end && sa[block_scan_end as usize] != 0 {
2837 block_scan_end += 1;
2838 }
2839 let block_size = block_scan_end - block_start;
2840
2841 if block_size < 32 {
2842 while block_start < block_scan_end {
2843 let mut p = sa[block_start as usize];
2844 d += SaSint::from(p < 0);
2845 p &= SAINT_MAX;
2846 let v = buckets_index2(
2847 t[(p - 1) as usize] as usize,
2848 usize::from(t[(p - 2) as usize] >= t[(p - 1) as usize]),
2849 );
2850 let dst = buckets[4 * ALPHABET_SIZE + v] as usize;
2851 buckets[4 * ALPHABET_SIZE + v] += 1;
2852 let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
2853 SAINT_MIN
2854 } else {
2855 0
2856 };
2857 sa[dst] = (p - 1) | mark;
2858 buckets[2 * ALPHABET_SIZE + v] = d;
2859 block_start += 1;
2860 }
2861 } else {
2862 d = partial_sorting_scan_left_to_right_16u_block_omp(
2863 t,
2864 sa,
2865 k,
2866 buckets,
2867 d,
2868 block_start,
2869 block_size,
2870 threads,
2871 &mut thread_state,
2872 );
2873 block_start = block_scan_end;
2874 }
2875 }
2876 }
2877 }
2878 d
2879}
2880
2881#[allow(dead_code)]
2882fn partial_sorting_scan_right_to_left_16u(
2883 t: &[u16],
2884 sa: &mut [SaSint],
2885 buckets: &mut [SaSint],
2886 mut d: SaSint,
2887 omp_block_start: SaSint,
2888 omp_block_size: SaSint,
2889) -> SaSint {
2890 let mut i = (omp_block_start + omp_block_size - 1) as isize;
2891 let mut j = (omp_block_start + 64 + 1) as isize;
2892 while i >= j {
2893 let mut p0 = sa[i as usize];
2894 d += SaSint::from(p0 < 0);
2895 p0 &= SAINT_MAX;
2896 let v0 = buckets_index2(
2897 t[(p0 - 1) as usize] as usize,
2898 usize::from(t[(p0 - 2) as usize] > t[(p0 - 1) as usize]),
2899 );
2900 let mark0 = if buckets[2 * ALPHABET_SIZE + v0] != d {
2901 SAINT_MIN
2902 } else {
2903 0
2904 };
2905 buckets[v0] -= 1;
2906 sa[buckets[v0] as usize] = (p0 - 1) | mark0;
2907 buckets[2 * ALPHABET_SIZE + v0] = d;
2908
2909 let mut p1 = sa[(i - 1) as usize];
2910 d += SaSint::from(p1 < 0);
2911 p1 &= SAINT_MAX;
2912 let v1 = buckets_index2(
2913 t[(p1 - 1) as usize] as usize,
2914 usize::from(t[(p1 - 2) as usize] > t[(p1 - 1) as usize]),
2915 );
2916 let mark1 = if buckets[2 * ALPHABET_SIZE + v1] != d {
2917 SAINT_MIN
2918 } else {
2919 0
2920 };
2921 buckets[v1] -= 1;
2922 sa[buckets[v1] as usize] = (p1 - 1) | mark1;
2923 buckets[2 * ALPHABET_SIZE + v1] = d;
2924
2925 i -= 2;
2926 }
2927
2928 j -= 64 + 1;
2929 while i >= j {
2930 let mut p = sa[i as usize];
2931 d += SaSint::from(p < 0);
2932 p &= SAINT_MAX;
2933 let v = buckets_index2(
2934 t[(p - 1) as usize] as usize,
2935 usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
2936 );
2937 let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
2938 SAINT_MIN
2939 } else {
2940 0
2941 };
2942 buckets[v] -= 1;
2943 sa[buckets[v] as usize] = (p - 1) | mark;
2944 buckets[2 * ALPHABET_SIZE + v] = d;
2945 i -= 1;
2946 }
2947
2948 d
2949}
2950
2951#[allow(dead_code)]
2952fn partial_sorting_scan_right_to_left_16u_block_prepare(
2953 t: &[u16],
2954 sa: &mut [SaSint],
2955 k: SaSint,
2956 buckets: &mut [SaSint],
2957 cache: &mut [ThreadCache],
2958 omp_block_start: SaSint,
2959 omp_block_size: SaSint,
2960 state: &mut ThreadState,
2961) -> SaSint {
2962 let width = 2 * k as usize;
2963 buckets[..width].fill(0);
2964 buckets[2 * ALPHABET_SIZE..2 * ALPHABET_SIZE + width].fill(0);
2965
2966 let mut count = 0usize;
2967 let mut d = 1;
2968 for i in (omp_block_start as usize..(omp_block_start + omp_block_size) as usize).rev() {
2969 let mut p = sa[i];
2970 cache[count].index = p;
2971 d += SaSint::from(p < 0);
2972 p &= SAINT_MAX;
2973 let v = buckets_index2(
2974 t[(p - 1) as usize] as usize,
2975 usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
2976 );
2977 cache[count].symbol = v as SaSint;
2978 buckets[v] += 1;
2979 buckets[2 * ALPHABET_SIZE + v] = d;
2980 count += 1;
2981 }
2982 state.cache_entries = count;
2983 d - 1
2984}
2985
2986#[allow(dead_code)]
2987fn partial_sorting_scan_right_to_left_16u_block_place(
2988 sa: &mut [SaSint],
2989 buckets: &mut [SaSint],
2990 cache: &[ThreadCache],
2991 count: SaSint,
2992 mut d: SaSint,
2993) {
2994 for entry in cache.iter().take(count as usize) {
2995 let mut p = entry.index;
2996 d += SaSint::from(p < 0);
2997 p &= SAINT_MAX;
2998 let v = entry.symbol as usize;
2999 let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
3000 SAINT_MIN
3001 } else {
3002 0
3003 };
3004 buckets[v] -= 1;
3005 sa[buckets[v] as usize] = (p - 1) | mark;
3006 buckets[2 * ALPHABET_SIZE + v] = d;
3007 }
3008}
3009
3010#[allow(dead_code)]
3011fn partial_gsa_scan_right_to_left_16u_block_place(
3012 sa: &mut [SaSint],
3013 buckets: &mut [SaSint],
3014 cache: &[ThreadCache],
3015 count: SaSint,
3016 mut d: SaSint,
3017) {
3018 for entry in cache.iter().take(count as usize) {
3019 let mut p = entry.index;
3020 d += SaSint::from(p < 0);
3021 p &= SAINT_MAX;
3022 let v = entry.symbol as usize;
3023 if v != 1 {
3024 let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
3025 SAINT_MIN
3026 } else {
3027 0
3028 };
3029 buckets[v] -= 1;
3030 sa[buckets[v] as usize] = (p - 1) | mark;
3031 buckets[2 * ALPHABET_SIZE + v] = d;
3032 }
3033 }
3034}
3035
3036#[allow(dead_code)]
3037fn partial_sorting_scan_right_to_left_16u_block_omp(
3038 t: &[u16],
3039 sa: &mut [SaSint],
3040 k: SaSint,
3041 buckets: &mut [SaSint],
3042 d: SaSint,
3043 block_start: SaSint,
3044 block_size: SaSint,
3045 threads: SaSint,
3046 thread_state: &mut [ThreadState],
3047) -> SaSint {
3048 let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
3049 usize::try_from(threads)
3050 .expect("threads must be non-negative")
3051 .min(thread_state.len())
3052 } else {
3053 1
3054 };
3055 if thread_count <= 1 {
3056 return partial_sorting_scan_right_to_left_16u(t, sa, buckets, d, block_start, block_size);
3057 }
3058
3059 let width = 2 * k as usize;
3060 let distinct_offset = 2 * ALPHABET_SIZE;
3061 let block_stride = (block_size / thread_count as SaSint) & !15;
3062
3063 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
3064 let local_start = thread as SaSint * block_stride;
3065 let local_size = if thread + 1 < thread_count {
3066 block_stride
3067 } else {
3068 block_size - local_start
3069 };
3070 let mut local_state = ThreadState::default();
3071 state.position = partial_sorting_scan_right_to_left_16u_block_prepare(
3072 t,
3073 sa,
3074 k,
3075 &mut state.buckets,
3076 &mut state.cache,
3077 block_start + local_start,
3078 local_size,
3079 &mut local_state,
3080 );
3081 state.count = local_state.cache_entries as SaSint;
3082 }
3083
3084 let mut next_d = d;
3085 for state in thread_state.iter_mut().take(thread_count).rev() {
3086 for c in 0..width {
3087 let a = buckets[c];
3088 let b = state.buckets[c];
3089 buckets[c] = a - b;
3090 state.buckets[c] = a;
3091 }
3092
3093 next_d -= 1;
3094 for c in 0..width {
3095 let offset = distinct_offset + c;
3096 let a = buckets[offset];
3097 let b = state.buckets[offset];
3098 let shifted = b + next_d;
3099 buckets[offset] = if b > 0 { shifted } else { a };
3100 state.buckets[offset] = a;
3101 }
3102 next_d += 1 + state.position;
3103 state.position = next_d - state.position;
3104 }
3105
3106 for state in thread_state.iter_mut().take(thread_count) {
3107 partial_sorting_scan_right_to_left_16u_block_place(
3108 sa,
3109 &mut state.buckets,
3110 &state.cache,
3111 state.count,
3112 state.position,
3113 );
3114 }
3115
3116 next_d
3117}
3118
3119#[allow(dead_code)]
3120fn partial_sorting_scan_right_to_left_16u_omp(
3121 t: &[u16],
3122 sa: &mut [SaSint],
3123 n: SaSint,
3124 k: SaSint,
3125 buckets: &mut [SaSint],
3126 first_lms_suffix: SaSint,
3127 left_suffixes_count: SaSint,
3128 d: SaSint,
3129 threads: SaSint,
3130) {
3131 let scan_start = left_suffixes_count + 1;
3132 let scan_end = n - first_lms_suffix;
3133
3134 if threads == 1 || scan_end - scan_start < 65536 {
3135 partial_sorting_scan_right_to_left_16u(
3136 t,
3137 sa,
3138 buckets,
3139 d,
3140 scan_start,
3141 scan_end - scan_start,
3142 );
3143 } else {
3144 let mut d = d;
3145 let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
3146 let mut block_start = scan_end - 1;
3147 while block_start >= scan_start {
3148 if sa[block_start as usize] == 0 {
3149 block_start -= 1;
3150 } else {
3151 let block_limit = threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
3152 let mut block_max_end = block_start - block_limit;
3153 if block_max_end < scan_start {
3154 block_max_end = scan_start - 1;
3155 }
3156 let mut block_end = block_start - 1;
3157 while block_end > block_max_end && sa[block_end as usize] != 0 {
3158 block_end -= 1;
3159 }
3160 let block_size = block_start - block_end;
3161
3162 if block_size < 32 {
3163 while block_start > block_end {
3164 let mut p = sa[block_start as usize];
3165 d += SaSint::from(p < 0);
3166 p &= SAINT_MAX;
3167 let v = buckets_index2(
3168 t[(p - 1) as usize] as usize,
3169 usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
3170 );
3171 let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
3172 SAINT_MIN
3173 } else {
3174 0
3175 };
3176 buckets[v] -= 1;
3177 sa[buckets[v] as usize] = (p - 1) | mark;
3178 buckets[2 * ALPHABET_SIZE + v] = d;
3179 block_start -= 1;
3180 }
3181 } else {
3182 d = partial_sorting_scan_right_to_left_16u_block_omp(
3183 t,
3184 sa,
3185 k,
3186 buckets,
3187 d,
3188 block_end + 1,
3189 block_size,
3190 threads,
3191 &mut thread_state,
3192 );
3193 block_start = block_end;
3194 }
3195 }
3196 }
3197 }
3198}
3199
3200#[allow(dead_code)]
3201fn partial_sorting_scan_left_to_right_32s_6k(
3202 t: &[SaSint],
3203 sa: &mut [SaSint],
3204 buckets: &mut [SaSint],
3205 mut d: SaSint,
3206 omp_block_start: SaSint,
3207 omp_block_size: SaSint,
3208) -> SaSint {
3209 let mut i = omp_block_start;
3210 let mut j = omp_block_start + omp_block_size - 2 * 64 - 1;
3211
3212 while i < j {
3213 let mut p2 = sa[i as usize];
3214 d += SaSint::from(p2 < 0);
3215 p2 &= SAINT_MAX;
3216 let v2 = buckets_index4(
3217 t[(p2 - 1) as usize] as usize,
3218 usize::from(t[(p2 - 2) as usize] >= t[(p2 - 1) as usize]),
3219 );
3220 let pos2 = buckets[v2] as usize;
3221 buckets[v2] += 1;
3222 sa[pos2] = (p2 - 1) | (((buckets[2 + v2] != d) as SaSint) << (SAINT_BIT - 1));
3223 buckets[2 + v2] = d;
3224
3225 let mut p3 = sa[(i + 1) as usize];
3226 d += SaSint::from(p3 < 0);
3227 p3 &= SAINT_MAX;
3228 let v3 = buckets_index4(
3229 t[(p3 - 1) as usize] as usize,
3230 usize::from(t[(p3 - 2) as usize] >= t[(p3 - 1) as usize]),
3231 );
3232 let pos3 = buckets[v3] as usize;
3233 buckets[v3] += 1;
3234 sa[pos3] = (p3 - 1) | (((buckets[2 + v3] != d) as SaSint) << (SAINT_BIT - 1));
3235 buckets[2 + v3] = d;
3236
3237 i += 2;
3238 }
3239
3240 j += 2 * 64 + 1;
3241 while i < j {
3242 let mut p = sa[i as usize];
3243 d += SaSint::from(p < 0);
3244 p &= SAINT_MAX;
3245 let v = buckets_index4(
3246 t[(p - 1) as usize] as usize,
3247 usize::from(t[(p - 2) as usize] >= t[(p - 1) as usize]),
3248 );
3249 let pos = buckets[v] as usize;
3250 buckets[v] += 1;
3251 sa[pos] = (p - 1) | (((buckets[2 + v] != d) as SaSint) << (SAINT_BIT - 1));
3252 buckets[2 + v] = d;
3253 i += 1;
3254 }
3255
3256 d
3257}
3258
3259#[allow(dead_code)]
3260fn partial_sorting_scan_left_to_right_32s_4k(
3261 t: &[SaSint],
3262 sa: &mut [SaSint],
3263 k: SaSint,
3264 buckets: &mut [SaSint],
3265 mut d: SaSint,
3266 omp_block_start: SaSint,
3267 omp_block_size: SaSint,
3268) -> SaSint {
3269 let k = k as usize;
3270 let mut i = omp_block_start;
3271 let mut j = omp_block_start + omp_block_size - 2 * 64 - 1;
3272
3273 while i < j {
3274 let mut p0 = sa[i as usize];
3275 sa[i as usize] = p0 & SAINT_MAX;
3276 if p0 > 0 {
3277 sa[i as usize] = 0;
3278 d += p0 >> (SUFFIX_GROUP_BIT - 1);
3279 p0 &= !SUFFIX_GROUP_MARKER;
3280 let v0 = buckets_index2(
3281 t[(p0 - 1) as usize] as usize,
3282 usize::from(t[(p0 - 2) as usize] < t[(p0 - 1) as usize]),
3283 );
3284 let c0 = t[(p0 - 1) as usize] as usize;
3285 let pos0 = buckets[2 * k + c0] as usize;
3286 buckets[2 * k + c0] += 1;
3287 sa[pos0] = (p0 - 1)
3288 | ((usize::from(t[(p0 - 2) as usize] < t[(p0 - 1) as usize]) as SaSint)
3289 << (SAINT_BIT - 1))
3290 | (((buckets[v0] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3291 buckets[v0] = d;
3292 }
3293
3294 let mut p1 = sa[(i + 1) as usize];
3295 sa[(i + 1) as usize] = p1 & SAINT_MAX;
3296 if p1 > 0 {
3297 sa[(i + 1) as usize] = 0;
3298 d += p1 >> (SUFFIX_GROUP_BIT - 1);
3299 p1 &= !SUFFIX_GROUP_MARKER;
3300 let v1 = buckets_index2(
3301 t[(p1 - 1) as usize] as usize,
3302 usize::from(t[(p1 - 2) as usize] < t[(p1 - 1) as usize]),
3303 );
3304 let c1 = t[(p1 - 1) as usize] as usize;
3305 let pos1 = buckets[2 * k + c1] as usize;
3306 buckets[2 * k + c1] += 1;
3307 sa[pos1] = (p1 - 1)
3308 | ((usize::from(t[(p1 - 2) as usize] < t[(p1 - 1) as usize]) as SaSint)
3309 << (SAINT_BIT - 1))
3310 | (((buckets[v1] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3311 buckets[v1] = d;
3312 }
3313
3314 i += 2;
3315 }
3316
3317 j += 2 * 64 + 1;
3318 while i < j {
3319 let mut p = sa[i as usize];
3320 sa[i as usize] = p & SAINT_MAX;
3321 if p > 0 {
3322 sa[i as usize] = 0;
3323 d += p >> (SUFFIX_GROUP_BIT - 1);
3324 p &= !SUFFIX_GROUP_MARKER;
3325 let v = buckets_index2(
3326 t[(p - 1) as usize] as usize,
3327 usize::from(t[(p - 2) as usize] < t[(p - 1) as usize]),
3328 );
3329 let c = t[(p - 1) as usize] as usize;
3330 let pos = buckets[2 * k + c] as usize;
3331 buckets[2 * k + c] += 1;
3332 sa[pos] = (p - 1)
3333 | ((usize::from(t[(p - 2) as usize] < t[(p - 1) as usize]) as SaSint)
3334 << (SAINT_BIT - 1))
3335 | (((buckets[v] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3336 buckets[v] = d;
3337 }
3338 i += 1;
3339 }
3340
3341 d
3342}
3343
3344#[allow(dead_code)]
3345fn partial_sorting_scan_left_to_right_32s_1k(
3346 t: &[SaSint],
3347 sa: &mut [SaSint],
3348 induction_bucket: &mut [SaSint],
3349 omp_block_start: SaSint,
3350 omp_block_size: SaSint,
3351) {
3352 let mut i = omp_block_start;
3353 let mut j = omp_block_start + omp_block_size - 2 * 64 - 1;
3354
3355 while i < j {
3356 let p0 = sa[i as usize];
3357 sa[i as usize] = p0 & SAINT_MAX;
3358 if p0 > 0 {
3359 sa[i as usize] = 0;
3360 let c0 = t[(p0 - 1) as usize] as usize;
3361 let pos0 = induction_bucket[c0] as usize;
3362 induction_bucket[c0] += 1;
3363 sa[pos0] = (p0 - 1)
3364 | ((usize::from(t[(p0 - 2) as usize] < t[(p0 - 1) as usize]) as SaSint)
3365 << (SAINT_BIT - 1));
3366 }
3367
3368 let p1 = sa[(i + 1) as usize];
3369 sa[(i + 1) as usize] = p1 & SAINT_MAX;
3370 if p1 > 0 {
3371 sa[(i + 1) as usize] = 0;
3372 let c1 = t[(p1 - 1) as usize] as usize;
3373 let pos1 = induction_bucket[c1] as usize;
3374 induction_bucket[c1] += 1;
3375 sa[pos1] = (p1 - 1)
3376 | ((usize::from(t[(p1 - 2) as usize] < t[(p1 - 1) as usize]) as SaSint)
3377 << (SAINT_BIT - 1));
3378 }
3379
3380 i += 2;
3381 }
3382
3383 j += 2 * 64 + 1;
3384 while i < j {
3385 let p = sa[i as usize];
3386 sa[i as usize] = p & SAINT_MAX;
3387 if p > 0 {
3388 sa[i as usize] = 0;
3389 let c = t[(p - 1) as usize] as usize;
3390 let pos = induction_bucket[c] as usize;
3391 induction_bucket[c] += 1;
3392 sa[pos] = (p - 1)
3393 | ((usize::from(t[(p - 2) as usize] < t[(p - 1) as usize]) as SaSint)
3394 << (SAINT_BIT - 1));
3395 }
3396 i += 1;
3397 }
3398}
3399
3400#[allow(dead_code)]
3401fn partial_sorting_scan_left_to_right_32s_6k_omp(
3402 t: &[SaSint],
3403 sa: &mut [SaSint],
3404 n: SaSint,
3405 buckets: &mut [SaSint],
3406 left_suffixes_count: SaSint,
3407 mut d: SaSint,
3408 threads: SaSint,
3409 _thread_state: &mut [ThreadState],
3410) -> SaSint {
3411 let v = buckets_index4(
3412 t[(n - 1) as usize] as usize,
3413 usize::from(t[(n - 2) as usize] >= t[(n - 1) as usize]),
3414 );
3415 let pos = buckets[v] as usize;
3416 buckets[v] += 1;
3417 sa[pos] = (n - 1) | SAINT_MIN;
3418 d += 1;
3419 buckets[2 + v] = d;
3420
3421 if threads == 1 || left_suffixes_count < 65536 {
3422 d = partial_sorting_scan_left_to_right_32s_6k(t, sa, buckets, d, 0, left_suffixes_count);
3423 } else {
3424 let mut cache = vec![ThreadCache::default(); left_suffixes_count as usize];
3425 let mut block_start = 0;
3426 while block_start < left_suffixes_count {
3427 let mut block_end = block_start + threads * PER_THREAD_CACHE_SIZE as SaSint;
3428 if block_end > left_suffixes_count {
3429 block_end = left_suffixes_count;
3430 }
3431 d = partial_sorting_scan_left_to_right_32s_6k_block_omp(
3432 t,
3433 sa,
3434 buckets,
3435 d,
3436 &mut cache,
3437 block_start,
3438 block_end - block_start,
3439 threads,
3440 );
3441 block_start = block_end;
3442 }
3443 }
3444
3445 d
3446}
3447
3448#[allow(dead_code)]
3449fn partial_sorting_scan_left_to_right_32s_4k_omp(
3450 t: &[SaSint],
3451 sa: &mut [SaSint],
3452 n: SaSint,
3453 k: SaSint,
3454 buckets: &mut [SaSint],
3455 mut d: SaSint,
3456 threads: SaSint,
3457 _thread_state: &mut [ThreadState],
3458) -> SaSint {
3459 let k_usize = k as usize;
3460 let pos = buckets[2 * k_usize + t[(n - 1) as usize] as usize] as usize;
3461 buckets[2 * k_usize + t[(n - 1) as usize] as usize] += 1;
3462 sa[pos] = (n - 1)
3463 | ((usize::from(t[(n - 2) as usize] < t[(n - 1) as usize]) as SaSint) << (SAINT_BIT - 1))
3464 | SUFFIX_GROUP_MARKER;
3465 d += 1;
3466 buckets[buckets_index2(
3467 t[(n - 1) as usize] as usize,
3468 usize::from(t[(n - 2) as usize] < t[(n - 1) as usize]),
3469 )] = d;
3470
3471 if threads == 1 || n < 65536 {
3472 d = partial_sorting_scan_left_to_right_32s_4k(t, sa, k, buckets, d, 0, n);
3473 } else {
3474 let mut cache = vec![ThreadCache::default(); n as usize];
3475 let mut block_start = 0;
3476 while block_start < n {
3477 let mut block_end = block_start + threads * PER_THREAD_CACHE_SIZE as SaSint;
3478 if block_end > n {
3479 block_end = n;
3480 }
3481 d = partial_sorting_scan_left_to_right_32s_4k_block_omp(
3482 t,
3483 sa,
3484 k,
3485 buckets,
3486 d,
3487 &mut cache,
3488 block_start,
3489 block_end - block_start,
3490 threads,
3491 );
3492 block_start = block_end;
3493 }
3494 }
3495
3496 d
3497}
3498
3499#[allow(dead_code)]
3500fn partial_sorting_scan_left_to_right_32s_1k_omp(
3501 t: &[SaSint],
3502 sa: &mut [SaSint],
3503 n: SaSint,
3504 buckets: &mut [SaSint],
3505 threads: SaSint,
3506 _thread_state: &mut [ThreadState],
3507) {
3508 let pos = buckets[t[(n - 1) as usize] as usize] as usize;
3509 buckets[t[(n - 1) as usize] as usize] += 1;
3510 sa[pos] = (n - 1)
3511 | ((usize::from(t[(n - 2) as usize] < t[(n - 1) as usize]) as SaSint) << (SAINT_BIT - 1));
3512
3513 if threads == 1 || n < 65536 {
3514 partial_sorting_scan_left_to_right_32s_1k(t, sa, buckets, 0, n);
3515 } else {
3516 let mut cache = vec![ThreadCache::default(); n as usize];
3517 let mut block_start = 0;
3518 while block_start < n {
3519 let mut block_end = block_start + threads * PER_THREAD_CACHE_SIZE as SaSint;
3520 if block_end > n {
3521 block_end = n;
3522 }
3523 partial_sorting_scan_left_to_right_32s_1k_block_omp(
3524 t,
3525 sa,
3526 buckets,
3527 &mut cache,
3528 block_start,
3529 block_end - block_start,
3530 threads,
3531 );
3532 block_start = block_end;
3533 }
3534 }
3535}
3536
3537#[allow(dead_code)]
3538fn partial_sorting_scan_right_to_left_32s_6k(
3539 t: &[SaSint],
3540 sa: &mut [SaSint],
3541 buckets: &mut [SaSint],
3542 mut d: SaSint,
3543 omp_block_start: SaSint,
3544 omp_block_size: SaSint,
3545) -> SaSint {
3546 if omp_block_size <= 0 {
3547 return d;
3548 }
3549
3550 let mut i = omp_block_start + omp_block_size - 1;
3551 let mut j = omp_block_start + 2 * 64 + 1;
3552
3553 while i >= j {
3554 let mut p2 = sa[i as usize];
3555 d += SaSint::from(p2 < 0);
3556 p2 &= SAINT_MAX;
3557 let v2 = buckets_index4(
3558 t[(p2 - 1) as usize] as usize,
3559 usize::from(t[(p2 - 2) as usize] > t[(p2 - 1) as usize]),
3560 );
3561 buckets[v2] -= 1;
3562 sa[buckets[v2] as usize] =
3563 (p2 - 1) | (((buckets[2 + v2] != d) as SaSint) << (SAINT_BIT - 1));
3564 buckets[2 + v2] = d;
3565
3566 let mut p3 = sa[(i - 1) as usize];
3567 d += SaSint::from(p3 < 0);
3568 p3 &= SAINT_MAX;
3569 let v3 = buckets_index4(
3570 t[(p3 - 1) as usize] as usize,
3571 usize::from(t[(p3 - 2) as usize] > t[(p3 - 1) as usize]),
3572 );
3573 buckets[v3] -= 1;
3574 sa[buckets[v3] as usize] =
3575 (p3 - 1) | (((buckets[2 + v3] != d) as SaSint) << (SAINT_BIT - 1));
3576 buckets[2 + v3] = d;
3577
3578 i -= 2;
3579 }
3580
3581 j -= 2 * 64 + 1;
3582 while i >= j {
3583 let mut p = sa[i as usize];
3584 d += SaSint::from(p < 0);
3585 p &= SAINT_MAX;
3586 let v = buckets_index4(
3587 t[(p - 1) as usize] as usize,
3588 usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
3589 );
3590 buckets[v] -= 1;
3591 sa[buckets[v] as usize] = (p - 1) | (((buckets[2 + v] != d) as SaSint) << (SAINT_BIT - 1));
3592 buckets[2 + v] = d;
3593 i -= 1;
3594 }
3595
3596 d
3597}
3598
3599#[allow(dead_code)]
3600fn partial_sorting_scan_right_to_left_32s_4k(
3601 t: &[SaSint],
3602 sa: &mut [SaSint],
3603 k: SaSint,
3604 buckets: &mut [SaSint],
3605 mut d: SaSint,
3606 omp_block_start: SaSint,
3607 omp_block_size: SaSint,
3608) -> SaSint {
3609 if omp_block_size <= 0 {
3610 return d;
3611 }
3612
3613 let k = k as usize;
3614 let mut i = omp_block_start + omp_block_size - 1;
3615 let mut j = omp_block_start + 2 * 64 + 1;
3616
3617 while i >= j {
3618 let mut p0 = sa[i as usize];
3619 if p0 > 0 {
3620 sa[i as usize] = 0;
3621 d += p0 >> (SUFFIX_GROUP_BIT - 1);
3622 p0 &= !SUFFIX_GROUP_MARKER;
3623 let v0 = buckets_index2(
3624 t[(p0 - 1) as usize] as usize,
3625 usize::from(t[(p0 - 2) as usize] > t[(p0 - 1) as usize]),
3626 );
3627 let c0 = t[(p0 - 1) as usize] as usize;
3628 buckets[3 * k + c0] -= 1;
3629 sa[buckets[3 * k + c0] as usize] = (p0 - 1)
3630 | ((usize::from(t[(p0 - 2) as usize] > t[(p0 - 1) as usize]) as SaSint)
3631 << (SAINT_BIT - 1))
3632 | (((buckets[v0] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3633 buckets[v0] = d;
3634 }
3635
3636 let mut p1 = sa[(i - 1) as usize];
3637 if p1 > 0 {
3638 sa[(i - 1) as usize] = 0;
3639 d += p1 >> (SUFFIX_GROUP_BIT - 1);
3640 p1 &= !SUFFIX_GROUP_MARKER;
3641 let v1 = buckets_index2(
3642 t[(p1 - 1) as usize] as usize,
3643 usize::from(t[(p1 - 2) as usize] > t[(p1 - 1) as usize]),
3644 );
3645 let c1 = t[(p1 - 1) as usize] as usize;
3646 buckets[3 * k + c1] -= 1;
3647 sa[buckets[3 * k + c1] as usize] = (p1 - 1)
3648 | ((usize::from(t[(p1 - 2) as usize] > t[(p1 - 1) as usize]) as SaSint)
3649 << (SAINT_BIT - 1))
3650 | (((buckets[v1] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3651 buckets[v1] = d;
3652 }
3653
3654 i -= 2;
3655 }
3656
3657 j -= 2 * 64 + 1;
3658 while i >= j {
3659 let mut p = sa[i as usize];
3660 if p > 0 {
3661 sa[i as usize] = 0;
3662 d += p >> (SUFFIX_GROUP_BIT - 1);
3663 p &= !SUFFIX_GROUP_MARKER;
3664 let v = buckets_index2(
3665 t[(p - 1) as usize] as usize,
3666 usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
3667 );
3668 let c = t[(p - 1) as usize] as usize;
3669 buckets[3 * k + c] -= 1;
3670 sa[buckets[3 * k + c] as usize] = (p - 1)
3671 | ((usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]) as SaSint)
3672 << (SAINT_BIT - 1))
3673 | (((buckets[v] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3674 buckets[v] = d;
3675 }
3676 i -= 1;
3677 }
3678
3679 d
3680}
3681
3682#[allow(dead_code)]
3683fn partial_sorting_scan_right_to_left_32s_1k(
3684 t: &[SaSint],
3685 sa: &mut [SaSint],
3686 induction_bucket: &mut [SaSint],
3687 omp_block_start: SaSint,
3688 omp_block_size: SaSint,
3689) {
3690 if omp_block_size <= 0 {
3691 return;
3692 }
3693
3694 let mut i = omp_block_start + omp_block_size - 1;
3695 let mut j = omp_block_start + 2 * 64 + 1;
3696
3697 while i >= j {
3698 let p0 = sa[i as usize];
3699 if p0 > 0 {
3700 sa[i as usize] = 0;
3701 let c0 = t[(p0 - 1) as usize] as usize;
3702 induction_bucket[c0] -= 1;
3703 sa[induction_bucket[c0] as usize] = (p0 - 1)
3704 | ((usize::from(t[(p0 - 2) as usize] > t[(p0 - 1) as usize]) as SaSint)
3705 << (SAINT_BIT - 1));
3706 }
3707
3708 let p1 = sa[(i - 1) as usize];
3709 if p1 > 0 {
3710 sa[(i - 1) as usize] = 0;
3711 let c1 = t[(p1 - 1) as usize] as usize;
3712 induction_bucket[c1] -= 1;
3713 sa[induction_bucket[c1] as usize] = (p1 - 1)
3714 | ((usize::from(t[(p1 - 2) as usize] > t[(p1 - 1) as usize]) as SaSint)
3715 << (SAINT_BIT - 1));
3716 }
3717
3718 i -= 2;
3719 }
3720
3721 j -= 2 * 64 + 1;
3722 while i >= j {
3723 let p = sa[i as usize];
3724 if p > 0 {
3725 sa[i as usize] = 0;
3726 let c = t[(p - 1) as usize] as usize;
3727 induction_bucket[c] -= 1;
3728 sa[induction_bucket[c] as usize] = (p - 1)
3729 | ((usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]) as SaSint)
3730 << (SAINT_BIT - 1));
3731 }
3732 i -= 1;
3733 }
3734}
3735
3736#[allow(dead_code)]
3737fn partial_sorting_scan_right_to_left_32s_6k_omp(
3738 t: &[SaSint],
3739 sa: &mut [SaSint],
3740 n: SaSint,
3741 buckets: &mut [SaSint],
3742 first_lms_suffix: SaSint,
3743 left_suffixes_count: SaSint,
3744 mut d: SaSint,
3745 threads: SaSint,
3746 _thread_state: &mut [ThreadState],
3747) -> SaSint {
3748 let scan_start = left_suffixes_count + 1;
3749 let scan_end = n - first_lms_suffix;
3750
3751 if threads == 1 || scan_end - scan_start < 65536 {
3752 d = partial_sorting_scan_right_to_left_32s_6k(
3753 t,
3754 sa,
3755 buckets,
3756 d,
3757 scan_start,
3758 scan_end - scan_start,
3759 );
3760 } else {
3761 let mut cache = vec![ThreadCache::default(); (scan_end - scan_start) as usize];
3762 let mut block_start = scan_end;
3763 while block_start > scan_start {
3764 let block_size =
3765 (block_start - scan_start).min(threads * PER_THREAD_CACHE_SIZE as SaSint);
3766 block_start -= block_size;
3767 d = partial_sorting_scan_right_to_left_32s_6k_block_omp(
3768 t,
3769 sa,
3770 buckets,
3771 d,
3772 &mut cache,
3773 block_start,
3774 block_size,
3775 threads,
3776 );
3777 }
3778 }
3779
3780 d
3781}
3782
3783#[allow(dead_code)]
3784fn partial_sorting_scan_right_to_left_32s_4k_omp(
3785 t: &[SaSint],
3786 sa: &mut [SaSint],
3787 n: SaSint,
3788 k: SaSint,
3789 buckets: &mut [SaSint],
3790 mut d: SaSint,
3791 threads: SaSint,
3792 _thread_state: &mut [ThreadState],
3793) -> SaSint {
3794 if threads == 1 || n < 65536 {
3795 d = partial_sorting_scan_right_to_left_32s_4k(t, sa, k, buckets, d, 0, n);
3796 } else {
3797 let mut cache = vec![ThreadCache::default(); n as usize];
3798 let mut block_start = n;
3799 while block_start > 0 {
3800 let block_size = block_start.min(threads * PER_THREAD_CACHE_SIZE as SaSint);
3801 block_start -= block_size;
3802 d = partial_sorting_scan_right_to_left_32s_4k_block_omp(
3803 t,
3804 sa,
3805 k,
3806 buckets,
3807 d,
3808 &mut cache,
3809 block_start,
3810 block_size,
3811 threads,
3812 );
3813 }
3814 }
3815
3816 d
3817}
3818
3819#[allow(dead_code)]
3820fn partial_sorting_scan_right_to_left_32s_1k_omp(
3821 t: &[SaSint],
3822 sa: &mut [SaSint],
3823 n: SaSint,
3824 buckets: &mut [SaSint],
3825 threads: SaSint,
3826 _thread_state: &mut [ThreadState],
3827) {
3828 if threads == 1 || n < 65536 {
3829 partial_sorting_scan_right_to_left_32s_1k(t, sa, buckets, 0, n);
3830 } else {
3831 let mut cache = vec![ThreadCache::default(); n as usize];
3832 let mut block_start = n;
3833 while block_start > 0 {
3834 let block_size = block_start.min(threads * PER_THREAD_CACHE_SIZE as SaSint);
3835 block_start -= block_size;
3836 partial_sorting_scan_right_to_left_32s_1k_block_omp(
3837 t,
3838 sa,
3839 buckets,
3840 &mut cache,
3841 block_start,
3842 block_size,
3843 threads,
3844 );
3845 }
3846 }
3847}
3848
3849#[allow(dead_code)]
3850fn partial_sorting_scan_left_to_right_32s_6k_block_gather(
3851 t: &[SaSint],
3852 sa: &mut [SaSint],
3853 cache: &mut [ThreadCache],
3854 omp_block_start: SaSint,
3855 omp_block_size: SaSint,
3856) {
3857 let mut i = omp_block_start;
3858 let mut j = omp_block_start + omp_block_size - 64 - 1;
3859
3860 while i < j {
3861 let p0 = sa[i as usize];
3862 cache[i as usize].index = p0;
3863 let p0 = p0 & SAINT_MAX;
3864 cache[i as usize].symbol = if p0 != 0 {
3865 buckets_index4(
3866 t[(p0 - 1) as usize] as usize,
3867 usize::from(t[(p0 - 2) as usize] >= t[(p0 - 1) as usize]),
3868 ) as SaSint
3869 } else {
3870 0
3871 };
3872
3873 let p1 = sa[(i + 1) as usize];
3874 cache[(i + 1) as usize].index = p1;
3875 let p1 = p1 & SAINT_MAX;
3876 cache[(i + 1) as usize].symbol = if p1 != 0 {
3877 buckets_index4(
3878 t[(p1 - 1) as usize] as usize,
3879 usize::from(t[(p1 - 2) as usize] >= t[(p1 - 1) as usize]),
3880 ) as SaSint
3881 } else {
3882 0
3883 };
3884
3885 i += 2;
3886 }
3887
3888 j += 64 + 1;
3889 while i < j {
3890 let p = sa[i as usize];
3891 cache[i as usize].index = p;
3892 let p = p & SAINT_MAX;
3893 cache[i as usize].symbol = if p != 0 {
3894 buckets_index4(
3895 t[(p - 1) as usize] as usize,
3896 usize::from(t[(p - 2) as usize] >= t[(p - 1) as usize]),
3897 ) as SaSint
3898 } else {
3899 0
3900 };
3901 i += 1;
3902 }
3903}
3904
3905#[allow(dead_code)]
3906fn partial_sorting_scan_left_to_right_32s_4k_block_gather(
3907 t: &[SaSint],
3908 sa: &mut [SaSint],
3909 cache: &mut [ThreadCache],
3910 omp_block_start: SaSint,
3911 omp_block_size: SaSint,
3912) {
3913 let mut i = omp_block_start;
3914 let mut j = omp_block_start + omp_block_size - 64 - 1;
3915
3916 while i < j {
3917 let mut symbol0 = SAINT_MIN;
3918 let mut p0 = sa[i as usize];
3919 if p0 > 0 {
3920 cache[i as usize].index = p0;
3921 p0 &= !SUFFIX_GROUP_MARKER;
3922 symbol0 = buckets_index2(
3923 t[(p0 - 1) as usize] as usize,
3924 usize::from(t[(p0 - 2) as usize] < t[(p0 - 1) as usize]),
3925 ) as SaSint;
3926 p0 = 0;
3927 }
3928 cache[i as usize].symbol = symbol0;
3929 sa[i as usize] = p0 & SAINT_MAX;
3930
3931 let mut symbol1 = SAINT_MIN;
3932 let mut p1 = sa[(i + 1) as usize];
3933 if p1 > 0 {
3934 cache[(i + 1) as usize].index = p1;
3935 p1 &= !SUFFIX_GROUP_MARKER;
3936 symbol1 = buckets_index2(
3937 t[(p1 - 1) as usize] as usize,
3938 usize::from(t[(p1 - 2) as usize] < t[(p1 - 1) as usize]),
3939 ) as SaSint;
3940 p1 = 0;
3941 }
3942 cache[(i + 1) as usize].symbol = symbol1;
3943 sa[(i + 1) as usize] = p1 & SAINT_MAX;
3944
3945 i += 2;
3946 }
3947
3948 j += 64 + 1;
3949 while i < j {
3950 let mut symbol = SAINT_MIN;
3951 let mut p = sa[i as usize];
3952 if p > 0 {
3953 cache[i as usize].index = p;
3954 p &= !SUFFIX_GROUP_MARKER;
3955 symbol = buckets_index2(
3956 t[(p - 1) as usize] as usize,
3957 usize::from(t[(p - 2) as usize] < t[(p - 1) as usize]),
3958 ) as SaSint;
3959 p = 0;
3960 }
3961 cache[i as usize].symbol = symbol;
3962 sa[i as usize] = p & SAINT_MAX;
3963 i += 1;
3964 }
3965}
3966
3967#[allow(dead_code)]
3968fn partial_sorting_scan_left_to_right_32s_1k_block_gather(
3969 t: &[SaSint],
3970 sa: &mut [SaSint],
3971 cache: &mut [ThreadCache],
3972 omp_block_start: SaSint,
3973 omp_block_size: SaSint,
3974) {
3975 let mut i = omp_block_start;
3976 let mut j = omp_block_start + omp_block_size - 64 - 1;
3977
3978 while i < j {
3979 let mut symbol0 = SAINT_MIN;
3980 let mut p0 = sa[i as usize];
3981 if p0 > 0 {
3982 cache[i as usize].index = (p0 - 1)
3983 | ((usize::from(t[(p0 - 2) as usize] < t[(p0 - 1) as usize]) as SaSint)
3984 << (SAINT_BIT - 1));
3985 symbol0 = t[(p0 - 1) as usize];
3986 p0 = 0;
3987 }
3988 cache[i as usize].symbol = symbol0;
3989 sa[i as usize] = p0 & SAINT_MAX;
3990
3991 let mut symbol1 = SAINT_MIN;
3992 let mut p1 = sa[(i + 1) as usize];
3993 if p1 > 0 {
3994 cache[(i + 1) as usize].index = (p1 - 1)
3995 | ((usize::from(t[(p1 - 2) as usize] < t[(p1 - 1) as usize]) as SaSint)
3996 << (SAINT_BIT - 1));
3997 symbol1 = t[(p1 - 1) as usize];
3998 p1 = 0;
3999 }
4000 cache[(i + 1) as usize].symbol = symbol1;
4001 sa[(i + 1) as usize] = p1 & SAINT_MAX;
4002
4003 i += 2;
4004 }
4005
4006 j += 64 + 1;
4007 while i < j {
4008 let mut symbol = SAINT_MIN;
4009 let mut p = sa[i as usize];
4010 if p > 0 {
4011 cache[i as usize].index = (p - 1)
4012 | ((usize::from(t[(p - 2) as usize] < t[(p - 1) as usize]) as SaSint)
4013 << (SAINT_BIT - 1));
4014 symbol = t[(p - 1) as usize];
4015 p = 0;
4016 }
4017 cache[i as usize].symbol = symbol;
4018 sa[i as usize] = p & SAINT_MAX;
4019 i += 1;
4020 }
4021}
4022
4023#[allow(dead_code)]
4024fn partial_sorting_scan_right_to_left_32s_6k_block_gather(
4025 t: &[SaSint],
4026 sa: &mut [SaSint],
4027 cache: &mut [ThreadCache],
4028 omp_block_start: SaSint,
4029 omp_block_size: SaSint,
4030) {
4031 let mut i = omp_block_start;
4032 let mut j = omp_block_start + omp_block_size - 64 - 1;
4033
4034 while i < j {
4035 let p0 = sa[i as usize];
4036 cache[i as usize].index = p0;
4037 let p0 = p0 & SAINT_MAX;
4038 cache[i as usize].symbol = if p0 != 0 {
4039 buckets_index4(
4040 t[(p0 - 1) as usize] as usize,
4041 usize::from(t[(p0 - 2) as usize] > t[(p0 - 1) as usize]),
4042 ) as SaSint
4043 } else {
4044 0
4045 };
4046
4047 let p1 = sa[(i + 1) as usize];
4048 cache[(i + 1) as usize].index = p1;
4049 let p1 = p1 & SAINT_MAX;
4050 cache[(i + 1) as usize].symbol = if p1 != 0 {
4051 buckets_index4(
4052 t[(p1 - 1) as usize] as usize,
4053 usize::from(t[(p1 - 2) as usize] > t[(p1 - 1) as usize]),
4054 ) as SaSint
4055 } else {
4056 0
4057 };
4058
4059 i += 2;
4060 }
4061
4062 j += 64 + 1;
4063 while i < j {
4064 let p = sa[i as usize];
4065 cache[i as usize].index = p;
4066 let p = p & SAINT_MAX;
4067 cache[i as usize].symbol = if p != 0 {
4068 buckets_index4(
4069 t[(p - 1) as usize] as usize,
4070 usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
4071 ) as SaSint
4072 } else {
4073 0
4074 };
4075 i += 1;
4076 }
4077}
4078
4079#[allow(dead_code)]
4080fn partial_sorting_scan_right_to_left_32s_4k_block_gather(
4081 t: &[SaSint],
4082 sa: &mut [SaSint],
4083 cache: &mut [ThreadCache],
4084 omp_block_start: SaSint,
4085 omp_block_size: SaSint,
4086) {
4087 let mut i = omp_block_start;
4088 let mut j = omp_block_start + omp_block_size - 64 - 1;
4089
4090 while i < j {
4091 let mut symbol0 = SAINT_MIN;
4092 let mut p0 = sa[i as usize];
4093 if p0 > 0 {
4094 sa[i as usize] = 0;
4095 cache[i as usize].index = p0;
4096 p0 &= !SUFFIX_GROUP_MARKER;
4097 symbol0 = buckets_index2(
4098 t[(p0 - 1) as usize] as usize,
4099 usize::from(t[(p0 - 2) as usize] > t[(p0 - 1) as usize]),
4100 ) as SaSint;
4101 }
4102 cache[i as usize].symbol = symbol0;
4103
4104 let mut symbol1 = SAINT_MIN;
4105 let mut p1 = sa[(i + 1) as usize];
4106 if p1 > 0 {
4107 sa[(i + 1) as usize] = 0;
4108 cache[(i + 1) as usize].index = p1;
4109 p1 &= !SUFFIX_GROUP_MARKER;
4110 symbol1 = buckets_index2(
4111 t[(p1 - 1) as usize] as usize,
4112 usize::from(t[(p1 - 2) as usize] > t[(p1 - 1) as usize]),
4113 ) as SaSint;
4114 }
4115 cache[(i + 1) as usize].symbol = symbol1;
4116
4117 i += 2;
4118 }
4119
4120 j += 64 + 1;
4121 while i < j {
4122 let mut symbol = SAINT_MIN;
4123 let mut p = sa[i as usize];
4124 if p > 0 {
4125 sa[i as usize] = 0;
4126 cache[i as usize].index = p;
4127 p &= !SUFFIX_GROUP_MARKER;
4128 symbol = buckets_index2(
4129 t[(p - 1) as usize] as usize,
4130 usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
4131 ) as SaSint;
4132 }
4133 cache[i as usize].symbol = symbol;
4134 i += 1;
4135 }
4136}
4137
4138#[allow(dead_code)]
4139fn partial_sorting_scan_right_to_left_32s_1k_block_gather(
4140 t: &[SaSint],
4141 sa: &mut [SaSint],
4142 cache: &mut [ThreadCache],
4143 omp_block_start: SaSint,
4144 omp_block_size: SaSint,
4145) {
4146 let mut i = omp_block_start;
4147 let mut j = omp_block_start + omp_block_size - 64 - 1;
4148
4149 while i < j {
4150 let mut symbol0 = SAINT_MIN;
4151 let p0 = sa[i as usize];
4152 if p0 > 0 {
4153 sa[i as usize] = 0;
4154 cache[i as usize].index = (p0 - 1)
4155 | ((usize::from(t[(p0 - 2) as usize] > t[(p0 - 1) as usize]) as SaSint)
4156 << (SAINT_BIT - 1));
4157 symbol0 = t[(p0 - 1) as usize];
4158 }
4159 cache[i as usize].symbol = symbol0;
4160
4161 let mut symbol1 = SAINT_MIN;
4162 let p1 = sa[(i + 1) as usize];
4163 if p1 > 0 {
4164 sa[(i + 1) as usize] = 0;
4165 cache[(i + 1) as usize].index = (p1 - 1)
4166 | ((usize::from(t[(p1 - 2) as usize] > t[(p1 - 1) as usize]) as SaSint)
4167 << (SAINT_BIT - 1));
4168 symbol1 = t[(p1 - 1) as usize];
4169 }
4170 cache[(i + 1) as usize].symbol = symbol1;
4171
4172 i += 2;
4173 }
4174
4175 j += 64 + 1;
4176 while i < j {
4177 let mut symbol = SAINT_MIN;
4178 let p = sa[i as usize];
4179 if p > 0 {
4180 sa[i as usize] = 0;
4181 cache[i as usize].index = (p - 1)
4182 | ((usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]) as SaSint)
4183 << (SAINT_BIT - 1));
4184 symbol = t[(p - 1) as usize];
4185 }
4186 cache[i as usize].symbol = symbol;
4187 i += 1;
4188 }
4189}
4190
4191#[allow(dead_code)]
4192fn partial_sorting_scan_left_to_right_32s_6k_block_sort(
4193 t: &[SaSint],
4194 buckets: &mut [SaSint],
4195 mut d: SaSint,
4196 cache: &mut [ThreadCache],
4197 omp_block_start: SaSint,
4198 omp_block_size: SaSint,
4199) -> SaSint {
4200 let mut i = omp_block_start;
4201 let omp_block_end = omp_block_start + omp_block_size;
4202 let mut j = omp_block_end - 64 - 1;
4203
4204 while i < j {
4205 let v0 = cache[i as usize].symbol as usize;
4206 let p0 = cache[i as usize].index;
4207 d += SaSint::from(p0 < 0);
4208 cache[i as usize].symbol = buckets[v0];
4209 buckets[v0] += 1;
4210 cache[i as usize].index =
4211 (p0 - 1) | (((buckets[2 + v0] != d) as SaSint) << (SAINT_BIT - 1));
4212 buckets[2 + v0] = d;
4213 if cache[i as usize].symbol < omp_block_end {
4214 let s = cache[i as usize].symbol as usize;
4215 let q = cache[i as usize].index & SAINT_MAX;
4216 cache[s].index = cache[i as usize].index;
4217 cache[s].symbol = buckets_index4(
4218 t[(q - 1) as usize] as usize,
4219 usize::from(t[(q - 2) as usize] >= t[(q - 1) as usize]),
4220 ) as SaSint;
4221 }
4222
4223 let v1 = cache[(i + 1) as usize].symbol as usize;
4224 let p1 = cache[(i + 1) as usize].index;
4225 d += SaSint::from(p1 < 0);
4226 cache[(i + 1) as usize].symbol = buckets[v1];
4227 buckets[v1] += 1;
4228 cache[(i + 1) as usize].index =
4229 (p1 - 1) | (((buckets[2 + v1] != d) as SaSint) << (SAINT_BIT - 1));
4230 buckets[2 + v1] = d;
4231 if cache[(i + 1) as usize].symbol < omp_block_end {
4232 let s = cache[(i + 1) as usize].symbol as usize;
4233 let q = cache[(i + 1) as usize].index & SAINT_MAX;
4234 cache[s].index = cache[(i + 1) as usize].index;
4235 cache[s].symbol = buckets_index4(
4236 t[(q - 1) as usize] as usize,
4237 usize::from(t[(q - 2) as usize] >= t[(q - 1) as usize]),
4238 ) as SaSint;
4239 }
4240
4241 i += 2;
4242 }
4243
4244 j += 64 + 1;
4245 while i < j {
4246 let v = cache[i as usize].symbol as usize;
4247 let p = cache[i as usize].index;
4248 d += SaSint::from(p < 0);
4249 cache[i as usize].symbol = buckets[v];
4250 buckets[v] += 1;
4251 cache[i as usize].index = (p - 1) | (((buckets[2 + v] != d) as SaSint) << (SAINT_BIT - 1));
4252 buckets[2 + v] = d;
4253 if cache[i as usize].symbol < omp_block_end {
4254 let s = cache[i as usize].symbol as usize;
4255 let q = cache[i as usize].index & SAINT_MAX;
4256 cache[s].index = cache[i as usize].index;
4257 cache[s].symbol = buckets_index4(
4258 t[(q - 1) as usize] as usize,
4259 usize::from(t[(q - 2) as usize] >= t[(q - 1) as usize]),
4260 ) as SaSint;
4261 }
4262 i += 1;
4263 }
4264
4265 d
4266}
4267
4268#[allow(dead_code)]
4269fn partial_sorting_scan_left_to_right_32s_4k_block_sort(
4270 t: &[SaSint],
4271 k: SaSint,
4272 buckets: &mut [SaSint],
4273 mut d: SaSint,
4274 cache: &mut [ThreadCache],
4275 omp_block_start: SaSint,
4276 omp_block_size: SaSint,
4277) -> SaSint {
4278 let k = k as usize;
4279 let mut i = omp_block_start;
4280 let omp_block_end = omp_block_start + omp_block_size;
4281 let mut j = omp_block_end - 64 - 1;
4282
4283 while i < j {
4284 for current in [i, i + 1] {
4285 let v = cache[current as usize].symbol;
4286 if v >= 0 {
4287 let p = cache[current as usize].index;
4288 d += p >> (SUFFIX_GROUP_BIT - 1);
4289 let bucket_index = (v >> 1) as usize;
4290 let v_usize = v as usize;
4291 cache[current as usize].symbol = buckets[2 * k + bucket_index];
4292 buckets[2 * k + bucket_index] += 1;
4293 cache[current as usize].index = (p - 1)
4294 | ((v & 1) << (SAINT_BIT - 1))
4295 | (((buckets[v_usize] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
4296 buckets[v_usize] = d;
4297 if cache[current as usize].symbol < omp_block_end {
4298 let ni = cache[current as usize].symbol as usize;
4299 let mut np = cache[current as usize].index;
4300 if np > 0 {
4301 cache[ni].index = np;
4302 np &= !SUFFIX_GROUP_MARKER;
4303 cache[ni].symbol = buckets_index2(
4304 t[(np - 1) as usize] as usize,
4305 usize::from(t[(np - 2) as usize] < t[(np - 1) as usize]),
4306 ) as SaSint;
4307 np = 0;
4308 }
4309 cache[current as usize].index = np & SAINT_MAX;
4310 }
4311 }
4312 }
4313 i += 2;
4314 }
4315
4316 j += 64 + 1;
4317 while i < j {
4318 let v = cache[i as usize].symbol;
4319 if v >= 0 {
4320 let p = cache[i as usize].index;
4321 d += p >> (SUFFIX_GROUP_BIT - 1);
4322 let bucket_index = (v >> 1) as usize;
4323 let v_usize = v as usize;
4324 cache[i as usize].symbol = buckets[2 * k + bucket_index];
4325 buckets[2 * k + bucket_index] += 1;
4326 cache[i as usize].index = (p - 1)
4327 | ((v & 1) << (SAINT_BIT - 1))
4328 | (((buckets[v_usize] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
4329 buckets[v_usize] = d;
4330 if cache[i as usize].symbol < omp_block_end {
4331 let ni = cache[i as usize].symbol as usize;
4332 let mut np = cache[i as usize].index;
4333 if np > 0 {
4334 cache[ni].index = np;
4335 np &= !SUFFIX_GROUP_MARKER;
4336 cache[ni].symbol = buckets_index2(
4337 t[(np - 1) as usize] as usize,
4338 usize::from(t[(np - 2) as usize] < t[(np - 1) as usize]),
4339 ) as SaSint;
4340 np = 0;
4341 }
4342 cache[i as usize].index = np & SAINT_MAX;
4343 }
4344 }
4345 i += 1;
4346 }
4347
4348 d
4349}
4350
4351#[allow(dead_code)]
4352fn partial_sorting_scan_left_to_right_32s_1k_block_sort(
4353 t: &[SaSint],
4354 induction_bucket: &mut [SaSint],
4355 cache: &mut [ThreadCache],
4356 omp_block_start: SaSint,
4357 omp_block_size: SaSint,
4358) {
4359 let mut i = omp_block_start;
4360 let omp_block_end = omp_block_start + omp_block_size;
4361 let mut j = omp_block_end - 64 - 1;
4362
4363 while i < j {
4364 for current in [i, i + 1] {
4365 let v = cache[current as usize].symbol;
4366 if v >= 0 {
4367 cache[current as usize].symbol = induction_bucket[v as usize];
4368 induction_bucket[v as usize] += 1;
4369 if cache[current as usize].symbol < omp_block_end {
4370 let ni = cache[current as usize].symbol as usize;
4371 let mut np = cache[current as usize].index;
4372 if np > 0 {
4373 cache[ni].index = (np - 1)
4374 | ((usize::from(t[(np - 2) as usize] < t[(np - 1) as usize])
4375 as SaSint)
4376 << (SAINT_BIT - 1));
4377 cache[ni].symbol = t[(np - 1) as usize];
4378 np = 0;
4379 }
4380 cache[current as usize].index = np & SAINT_MAX;
4381 }
4382 }
4383 }
4384 i += 2;
4385 }
4386
4387 j = omp_block_end;
4388 while i < j {
4389 let v = cache[i as usize].symbol;
4390 if v >= 0 {
4391 cache[i as usize].symbol = induction_bucket[v as usize];
4392 induction_bucket[v as usize] += 1;
4393 if cache[i as usize].symbol < omp_block_end {
4394 let ni = cache[i as usize].symbol as usize;
4395 let mut np = cache[i as usize].index;
4396 if np > 0 {
4397 cache[ni].index = (np - 1)
4398 | ((usize::from(t[(np - 2) as usize] < t[(np - 1) as usize]) as SaSint)
4399 << (SAINT_BIT - 1));
4400 cache[ni].symbol = t[(np - 1) as usize];
4401 np = 0;
4402 }
4403 cache[i as usize].index = np & SAINT_MAX;
4404 }
4405 }
4406 i += 1;
4407 }
4408}
4409
4410#[allow(dead_code)]
4411fn partial_sorting_scan_right_to_left_32s_6k_block_sort(
4412 t: &[SaSint],
4413 buckets: &mut [SaSint],
4414 mut d: SaSint,
4415 cache: &mut [ThreadCache],
4416 omp_block_start: SaSint,
4417 omp_block_size: SaSint,
4418) -> SaSint {
4419 let mut i = omp_block_start + omp_block_size - 1;
4420 let mut j = omp_block_start + 64 + 1;
4421
4422 while i >= j {
4423 let v0 = cache[i as usize].symbol as usize;
4424 let p0 = cache[i as usize].index;
4425 d += SaSint::from(p0 < 0);
4426 buckets[v0] -= 1;
4427 cache[i as usize].symbol = buckets[v0];
4428 cache[i as usize].index =
4429 (p0 - 1) | (((buckets[2 + v0] != d) as SaSint) << (SAINT_BIT - 1));
4430 buckets[2 + v0] = d;
4431 if cache[i as usize].symbol >= omp_block_start {
4432 let s = cache[i as usize].symbol as usize;
4433 let q = cache[i as usize].index & SAINT_MAX;
4434 cache[s].index = cache[i as usize].index;
4435 cache[s].symbol = buckets_index4(
4436 t[(q - 1) as usize] as usize,
4437 usize::from(t[(q - 2) as usize] > t[(q - 1) as usize]),
4438 ) as SaSint;
4439 }
4440
4441 let v1 = cache[(i - 1) as usize].symbol as usize;
4442 let p1 = cache[(i - 1) as usize].index;
4443 d += SaSint::from(p1 < 0);
4444 buckets[v1] -= 1;
4445 cache[(i - 1) as usize].symbol = buckets[v1];
4446 cache[(i - 1) as usize].index =
4447 (p1 - 1) | (((buckets[2 + v1] != d) as SaSint) << (SAINT_BIT - 1));
4448 buckets[2 + v1] = d;
4449 if cache[(i - 1) as usize].symbol >= omp_block_start {
4450 let s = cache[(i - 1) as usize].symbol as usize;
4451 let q = cache[(i - 1) as usize].index & SAINT_MAX;
4452 cache[s].index = cache[(i - 1) as usize].index;
4453 cache[s].symbol = buckets_index4(
4454 t[(q - 1) as usize] as usize,
4455 usize::from(t[(q - 2) as usize] > t[(q - 1) as usize]),
4456 ) as SaSint;
4457 }
4458
4459 i -= 2;
4460 }
4461
4462 j -= 64 + 1;
4463 while i >= j {
4464 let v = cache[i as usize].symbol as usize;
4465 let p = cache[i as usize].index;
4466 d += SaSint::from(p < 0);
4467 buckets[v] -= 1;
4468 cache[i as usize].symbol = buckets[v];
4469 cache[i as usize].index = (p - 1) | (((buckets[2 + v] != d) as SaSint) << (SAINT_BIT - 1));
4470 buckets[2 + v] = d;
4471 if cache[i as usize].symbol >= omp_block_start {
4472 let s = cache[i as usize].symbol as usize;
4473 let q = cache[i as usize].index & SAINT_MAX;
4474 cache[s].index = cache[i as usize].index;
4475 cache[s].symbol = buckets_index4(
4476 t[(q - 1) as usize] as usize,
4477 usize::from(t[(q - 2) as usize] > t[(q - 1) as usize]),
4478 ) as SaSint;
4479 }
4480 i -= 1;
4481 }
4482
4483 d
4484}
4485
4486#[allow(dead_code)]
4487fn partial_sorting_scan_right_to_left_32s_4k_block_sort(
4488 t: &[SaSint],
4489 k: SaSint,
4490 buckets: &mut [SaSint],
4491 mut d: SaSint,
4492 cache: &mut [ThreadCache],
4493 omp_block_start: SaSint,
4494 omp_block_size: SaSint,
4495) -> SaSint {
4496 let k = k as usize;
4497 let mut i = omp_block_start + omp_block_size - 1;
4498 let mut j = omp_block_start + 64 + 1;
4499
4500 while i >= j {
4501 for current in [i, i - 1] {
4502 let v = cache[current as usize].symbol;
4503 if v >= 0 {
4504 let p = cache[current as usize].index;
4505 d += p >> (SUFFIX_GROUP_BIT - 1);
4506 let bucket_index = (v >> 1) as usize;
4507 let v_usize = v as usize;
4508 buckets[3 * k + bucket_index] -= 1;
4509 cache[current as usize].symbol = buckets[3 * k + bucket_index];
4510 cache[current as usize].index = (p - 1)
4511 | ((v & 1) << (SAINT_BIT - 1))
4512 | (((buckets[v_usize] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
4513 buckets[v_usize] = d;
4514 if cache[current as usize].symbol >= omp_block_start {
4515 let ni = cache[current as usize].symbol as usize;
4516 let mut np = cache[current as usize].index;
4517 if np > 0 {
4518 cache[current as usize].index = 0;
4519 cache[ni].index = np;
4520 np &= !SUFFIX_GROUP_MARKER;
4521 cache[ni].symbol = buckets_index2(
4522 t[(np - 1) as usize] as usize,
4523 usize::from(t[(np - 2) as usize] > t[(np - 1) as usize]),
4524 ) as SaSint;
4525 }
4526 }
4527 }
4528 }
4529 i -= 2;
4530 }
4531
4532 j -= 64 + 1;
4533 while i >= j {
4534 let v = cache[i as usize].symbol;
4535 if v >= 0 {
4536 let p = cache[i as usize].index;
4537 d += p >> (SUFFIX_GROUP_BIT - 1);
4538 let bucket_index = (v >> 1) as usize;
4539 let v_usize = v as usize;
4540 buckets[3 * k + bucket_index] -= 1;
4541 cache[i as usize].symbol = buckets[3 * k + bucket_index];
4542 cache[i as usize].index = (p - 1)
4543 | ((v & 1) << (SAINT_BIT - 1))
4544 | (((buckets[v_usize] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
4545 buckets[v_usize] = d;
4546 if cache[i as usize].symbol >= omp_block_start {
4547 let ni = cache[i as usize].symbol as usize;
4548 let mut np = cache[i as usize].index;
4549 if np > 0 {
4550 cache[i as usize].index = 0;
4551 cache[ni].index = np;
4552 np &= !SUFFIX_GROUP_MARKER;
4553 cache[ni].symbol = buckets_index2(
4554 t[(np - 1) as usize] as usize,
4555 usize::from(t[(np - 2) as usize] > t[(np - 1) as usize]),
4556 ) as SaSint;
4557 }
4558 }
4559 }
4560 i -= 1;
4561 }
4562
4563 d
4564}
4565
4566#[allow(dead_code)]
4567fn partial_sorting_scan_right_to_left_32s_1k_block_sort(
4568 t: &[SaSint],
4569 induction_bucket: &mut [SaSint],
4570 cache: &mut [ThreadCache],
4571 omp_block_start: SaSint,
4572 omp_block_size: SaSint,
4573) {
4574 let mut i = omp_block_start + omp_block_size - 1;
4575 let mut j = omp_block_start + 64 + 1;
4576
4577 while i >= j {
4578 for current in [i, i - 1] {
4579 let v = cache[current as usize].symbol;
4580 if v >= 0 {
4581 induction_bucket[v as usize] -= 1;
4582 cache[current as usize].symbol = induction_bucket[v as usize];
4583 if cache[current as usize].symbol >= omp_block_start {
4584 let ni = cache[current as usize].symbol as usize;
4585 let np = cache[current as usize].index;
4586 if np > 0 {
4587 cache[current as usize].index = 0;
4588 cache[ni].index = (np - 1)
4589 | ((usize::from(t[(np - 2) as usize] > t[(np - 1) as usize])
4590 as SaSint)
4591 << (SAINT_BIT - 1));
4592 cache[ni].symbol = t[(np - 1) as usize];
4593 }
4594 }
4595 }
4596 }
4597 i -= 2;
4598 }
4599
4600 j -= 64 + 1;
4601 while i >= j {
4602 let v = cache[i as usize].symbol;
4603 if v >= 0 {
4604 induction_bucket[v as usize] -= 1;
4605 cache[i as usize].symbol = induction_bucket[v as usize];
4606 if cache[i as usize].symbol >= omp_block_start {
4607 let ni = cache[i as usize].symbol as usize;
4608 let np = cache[i as usize].index;
4609 if np > 0 {
4610 cache[i as usize].index = 0;
4611 cache[ni].index = (np - 1)
4612 | ((usize::from(t[(np - 2) as usize] > t[(np - 1) as usize]) as SaSint)
4613 << (SAINT_BIT - 1));
4614 cache[ni].symbol = t[(np - 1) as usize];
4615 }
4616 }
4617 }
4618 i -= 1;
4619 }
4620}
4621
4622#[allow(dead_code)]
4623fn partial_sorting_scan_left_to_right_32s_6k_block_omp(
4624 t: &[SaSint],
4625 sa: &mut [SaSint],
4626 buckets: &mut [SaSint],
4627 d: SaSint,
4628 cache: &mut [ThreadCache],
4629 block_start: SaSint,
4630 block_size: SaSint,
4631 threads: SaSint,
4632) -> SaSint {
4633 if block_size <= 0 {
4634 return d;
4635 }
4636 if threads == 1 || block_size < 16_384 {
4637 return partial_sorting_scan_left_to_right_32s_6k(
4638 t,
4639 sa,
4640 buckets,
4641 d,
4642 block_start,
4643 block_size,
4644 );
4645 }
4646
4647 let threads_usize = usize::try_from(threads)
4648 .expect("threads must be non-negative")
4649 .max(1);
4650 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
4651 let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
4652 let omp_num_threads = threads_usize.min(block_size_usize.max(1));
4653 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
4654
4655 for omp_thread_num in 0..omp_num_threads {
4656 let mut omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4657 omp_block_stride
4658 } else {
4659 block_size_usize - omp_thread_num * omp_block_stride
4660 };
4661 let omp_block_start = block_start_usize + omp_thread_num * omp_block_stride;
4662 if omp_block_size == 0 {
4663 omp_block_size = block_size_usize - (omp_block_start - block_start_usize);
4664 }
4665 partial_sorting_scan_left_to_right_32s_6k_block_gather(
4666 t,
4667 sa,
4668 &mut cache[omp_thread_num * omp_block_stride
4669 ..omp_thread_num * omp_block_stride + omp_block_size],
4670 omp_block_start as SaSint,
4671 omp_block_size as SaSint,
4672 );
4673 }
4674
4675 let d = partial_sorting_scan_left_to_right_32s_6k_block_sort(
4676 t,
4677 buckets,
4678 d,
4679 &mut cache[..block_size_usize],
4680 block_start,
4681 block_size,
4682 );
4683 place_cached_suffixes(sa, &cache[..block_size_usize], 0, block_size);
4684 d
4685}
4686
4687#[allow(dead_code)]
4688fn partial_sorting_scan_left_to_right_32s_4k_block_omp(
4689 t: &[SaSint],
4690 sa: &mut [SaSint],
4691 k: SaSint,
4692 buckets: &mut [SaSint],
4693 d: SaSint,
4694 cache: &mut [ThreadCache],
4695 block_start: SaSint,
4696 block_size: SaSint,
4697 threads: SaSint,
4698) -> SaSint {
4699 if block_size <= 0 {
4700 return d;
4701 }
4702 if threads == 1 || block_size < 16_384 {
4703 return partial_sorting_scan_left_to_right_32s_4k(
4704 t,
4705 sa,
4706 k,
4707 buckets,
4708 d,
4709 block_start,
4710 block_size,
4711 );
4712 }
4713
4714 let threads_usize = usize::try_from(threads)
4715 .expect("threads must be non-negative")
4716 .max(1);
4717 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
4718 let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
4719 let omp_num_threads = threads_usize.min(block_size_usize.max(1));
4720 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
4721
4722 for omp_thread_num in 0..omp_num_threads {
4723 let mut omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4724 omp_block_stride
4725 } else {
4726 block_size_usize - omp_thread_num * omp_block_stride
4727 };
4728 let omp_block_start = block_start_usize + omp_thread_num * omp_block_stride;
4729 if omp_block_size == 0 {
4730 omp_block_size = block_size_usize - (omp_block_start - block_start_usize);
4731 }
4732 partial_sorting_scan_left_to_right_32s_4k_block_gather(
4733 t,
4734 sa,
4735 &mut cache[omp_thread_num * omp_block_stride
4736 ..omp_thread_num * omp_block_stride + omp_block_size],
4737 omp_block_start as SaSint,
4738 omp_block_size as SaSint,
4739 );
4740 }
4741
4742 let cache = &mut cache[..block_size_usize];
4743 let d = partial_sorting_scan_left_to_right_32s_4k_block_sort(
4744 t,
4745 k,
4746 buckets,
4747 d,
4748 cache,
4749 block_start,
4750 block_size,
4751 );
4752 for entry in cache.iter() {
4753 if entry.symbol >= 0 {
4754 sa[entry.symbol as usize] = entry.index;
4755 }
4756 }
4757 d
4758}
4759
4760#[allow(dead_code)]
4761fn partial_sorting_scan_left_to_right_32s_1k_block_omp(
4762 t: &[SaSint],
4763 sa: &mut [SaSint],
4764 buckets: &mut [SaSint],
4765 cache: &mut [ThreadCache],
4766 block_start: SaSint,
4767 block_size: SaSint,
4768 threads: SaSint,
4769) {
4770 if block_size <= 0 {
4771 return;
4772 }
4773 if threads == 1 || block_size < 16_384 {
4774 partial_sorting_scan_left_to_right_32s_1k(t, sa, buckets, block_start, block_size);
4775 return;
4776 }
4777
4778 let threads_usize = usize::try_from(threads)
4779 .expect("threads must be non-negative")
4780 .max(1);
4781 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
4782 let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
4783 let omp_num_threads = threads_usize.min(block_size_usize.max(1));
4784 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
4785
4786 for omp_thread_num in 0..omp_num_threads {
4787 let mut omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4788 omp_block_stride
4789 } else {
4790 block_size_usize - omp_thread_num * omp_block_stride
4791 };
4792 let omp_block_start = block_start_usize + omp_thread_num * omp_block_stride;
4793 if omp_block_size == 0 {
4794 omp_block_size = block_size_usize - (omp_block_start - block_start_usize);
4795 }
4796 partial_sorting_scan_left_to_right_32s_1k_block_gather(
4797 t,
4798 sa,
4799 &mut cache[omp_thread_num * omp_block_stride
4800 ..omp_thread_num * omp_block_stride + omp_block_size],
4801 omp_block_start as SaSint,
4802 omp_block_size as SaSint,
4803 );
4804 }
4805
4806 let cache = &mut cache[..block_size_usize];
4807 partial_sorting_scan_left_to_right_32s_1k_block_sort(
4808 t,
4809 buckets,
4810 cache,
4811 block_start,
4812 block_size,
4813 );
4814 compact_and_place_cached_suffixes(sa, cache, block_start, block_size);
4815}
4816
4817#[allow(dead_code)]
4818fn partial_sorting_scan_right_to_left_32s_6k_block_omp(
4819 t: &[SaSint],
4820 sa: &mut [SaSint],
4821 buckets: &mut [SaSint],
4822 mut d: SaSint,
4823 cache: &mut [ThreadCache],
4824 block_start: SaSint,
4825 block_size: SaSint,
4826 threads: SaSint,
4827) -> SaSint {
4828 if block_size <= 0 {
4829 return d;
4830 }
4831 if threads == 1 || block_size < 16_384 {
4832 return partial_sorting_scan_right_to_left_32s_6k(
4833 t,
4834 sa,
4835 buckets,
4836 d,
4837 block_start,
4838 block_size,
4839 );
4840 }
4841
4842 let threads_usize = usize::try_from(threads)
4843 .expect("threads must be non-negative")
4844 .max(1);
4845 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
4846 let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
4847 let omp_num_threads = threads_usize.min(block_size_usize.max(1));
4848 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
4849
4850 for omp_thread_num in 0..omp_num_threads {
4851 let mut omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4852 omp_block_stride
4853 } else {
4854 block_size_usize - omp_thread_num * omp_block_stride
4855 };
4856 let omp_block_start = block_start_usize + omp_thread_num * omp_block_stride;
4857 if omp_block_size == 0 {
4858 omp_block_size = block_size_usize - (omp_block_start - block_start_usize);
4859 }
4860 partial_sorting_scan_right_to_left_32s_6k_block_gather(
4861 t,
4862 sa,
4863 &mut cache[omp_thread_num * omp_block_stride
4864 ..omp_thread_num * omp_block_stride + omp_block_size],
4865 omp_block_start as SaSint,
4866 omp_block_size as SaSint,
4867 );
4868 }
4869
4870 d = partial_sorting_scan_right_to_left_32s_6k_block_sort(
4871 t,
4872 buckets,
4873 d,
4874 &mut cache[..block_size_usize],
4875 block_start,
4876 block_size,
4877 );
4878 for omp_thread_num in 0..omp_num_threads {
4879 let mut omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4880 omp_block_stride
4881 } else {
4882 block_size_usize - omp_thread_num * omp_block_stride
4883 };
4884 let cache_start = omp_thread_num * omp_block_stride;
4885 if omp_block_size == 0 {
4886 omp_block_size = block_size_usize - cache_start;
4887 }
4888 for entry in &cache[cache_start..cache_start + omp_block_size] {
4889 sa[entry.symbol as usize] = entry.index;
4890 }
4891 }
4892 d
4893}
4894
4895#[allow(dead_code)]
4896fn partial_sorting_scan_right_to_left_32s_4k_block_omp(
4897 t: &[SaSint],
4898 sa: &mut [SaSint],
4899 k: SaSint,
4900 buckets: &mut [SaSint],
4901 mut d: SaSint,
4902 cache: &mut [ThreadCache],
4903 block_start: SaSint,
4904 block_size: SaSint,
4905 threads: SaSint,
4906) -> SaSint {
4907 if block_size <= 0 {
4908 return d;
4909 }
4910 if threads == 1 || block_size < 16_384 {
4911 return partial_sorting_scan_right_to_left_32s_4k(
4912 t,
4913 sa,
4914 k,
4915 buckets,
4916 d,
4917 block_start,
4918 block_size,
4919 );
4920 }
4921
4922 let threads_usize = usize::try_from(threads)
4923 .expect("threads must be non-negative")
4924 .max(1);
4925 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
4926 let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
4927 let omp_num_threads = threads_usize.min(block_size_usize.max(1));
4928 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
4929
4930 for omp_thread_num in 0..omp_num_threads {
4931 let mut omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4932 omp_block_stride
4933 } else {
4934 block_size_usize - omp_thread_num * omp_block_stride
4935 };
4936 let omp_block_start = block_start_usize + omp_thread_num * omp_block_stride;
4937 if omp_block_size == 0 {
4938 omp_block_size = block_size_usize - (omp_block_start - block_start_usize);
4939 }
4940 partial_sorting_scan_right_to_left_32s_4k_block_gather(
4941 t,
4942 sa,
4943 &mut cache[omp_thread_num * omp_block_stride
4944 ..omp_thread_num * omp_block_stride + omp_block_size],
4945 omp_block_start as SaSint,
4946 omp_block_size as SaSint,
4947 );
4948 }
4949
4950 d = partial_sorting_scan_right_to_left_32s_4k_block_sort(
4951 t,
4952 k,
4953 buckets,
4954 d,
4955 &mut cache[..block_size_usize],
4956 block_start,
4957 block_size,
4958 );
4959 let mut write = 0usize;
4960 for read in 0..block_size_usize {
4961 let entry = cache[read];
4962 if entry.symbol >= 0 {
4963 cache[write] = entry;
4964 write += 1;
4965 }
4966 }
4967 for entry in &cache[..write] {
4968 sa[entry.symbol as usize] = entry.index;
4969 }
4970 d
4971}
4972
4973#[allow(dead_code)]
4974fn partial_sorting_scan_right_to_left_32s_1k_block_omp(
4975 t: &[SaSint],
4976 sa: &mut [SaSint],
4977 buckets: &mut [SaSint],
4978 cache: &mut [ThreadCache],
4979 block_start: SaSint,
4980 block_size: SaSint,
4981 threads: SaSint,
4982) {
4983 if block_size <= 0 {
4984 return;
4985 }
4986 if threads == 1 || block_size < 16_384 {
4987 partial_sorting_scan_right_to_left_32s_1k(t, sa, buckets, block_start, block_size);
4988 return;
4989 }
4990
4991 let threads_usize = usize::try_from(threads)
4992 .expect("threads must be non-negative")
4993 .max(1);
4994 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
4995 let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
4996 let omp_num_threads = threads_usize.min(block_size_usize.max(1));
4997 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
4998
4999 for omp_thread_num in 0..omp_num_threads {
5000 let mut omp_block_size = if omp_thread_num + 1 < omp_num_threads {
5001 omp_block_stride
5002 } else {
5003 block_size_usize - omp_thread_num * omp_block_stride
5004 };
5005 let omp_block_start = block_start_usize + omp_thread_num * omp_block_stride;
5006 if omp_block_size == 0 {
5007 omp_block_size = block_size_usize - (omp_block_start - block_start_usize);
5008 }
5009 partial_sorting_scan_right_to_left_32s_1k_block_gather(
5010 t,
5011 sa,
5012 &mut cache[omp_thread_num * omp_block_stride
5013 ..omp_thread_num * omp_block_stride + omp_block_size],
5014 omp_block_start as SaSint,
5015 omp_block_size as SaSint,
5016 );
5017 }
5018
5019 let cache = &mut cache[..block_size_usize];
5020 partial_sorting_scan_right_to_left_32s_1k_block_sort(
5021 t,
5022 buckets,
5023 cache,
5024 block_start,
5025 block_size,
5026 );
5027 compact_and_place_cached_suffixes(sa, cache, block_start, block_size);
5028}
5029
5030#[allow(dead_code)]
5031fn partial_sorting_gather_lms_suffixes_32s_4k(
5032 sa: &mut [SaSint],
5033 omp_block_start: SaSint,
5034 omp_block_size: SaSint,
5035) -> SaSint {
5036 let mut i = omp_block_start;
5037 let mut j = omp_block_start + omp_block_size - 3;
5038 let mut l = omp_block_start;
5039
5040 while i < j {
5041 let s0 = sa[i as usize] as SaUint;
5042 sa[l as usize] = (s0.wrapping_sub(SUFFIX_GROUP_MARKER as SaUint)
5043 & !(SUFFIX_GROUP_MARKER as SaUint)) as SaSint;
5044 l += SaSint::from((s0 as SaSint) < 0);
5045
5046 let s1 = sa[(i + 1) as usize] as SaUint;
5047 sa[l as usize] = (s1.wrapping_sub(SUFFIX_GROUP_MARKER as SaUint)
5048 & !(SUFFIX_GROUP_MARKER as SaUint)) as SaSint;
5049 l += SaSint::from((s1 as SaSint) < 0);
5050
5051 let s2 = sa[(i + 2) as usize] as SaUint;
5052 sa[l as usize] = (s2.wrapping_sub(SUFFIX_GROUP_MARKER as SaUint)
5053 & !(SUFFIX_GROUP_MARKER as SaUint)) as SaSint;
5054 l += SaSint::from((s2 as SaSint) < 0);
5055
5056 let s3 = sa[(i + 3) as usize] as SaUint;
5057 sa[l as usize] = (s3.wrapping_sub(SUFFIX_GROUP_MARKER as SaUint)
5058 & !(SUFFIX_GROUP_MARKER as SaUint)) as SaSint;
5059 l += SaSint::from((s3 as SaSint) < 0);
5060
5061 i += 4;
5062 }
5063
5064 j += 3;
5065 while i < j {
5066 let s = sa[i as usize] as SaUint;
5067 sa[l as usize] = (s.wrapping_sub(SUFFIX_GROUP_MARKER as SaUint)
5068 & !(SUFFIX_GROUP_MARKER as SaUint)) as SaSint;
5069 l += SaSint::from((s as SaSint) < 0);
5070 i += 1;
5071 }
5072
5073 l
5074}
5075
5076#[allow(dead_code)]
5077fn partial_sorting_gather_lms_suffixes_32s_1k(
5078 sa: &mut [SaSint],
5079 omp_block_start: SaSint,
5080 omp_block_size: SaSint,
5081) -> SaSint {
5082 let mut i = omp_block_start;
5083 let mut j = omp_block_start + omp_block_size - 3;
5084 let mut l = omp_block_start;
5085
5086 while i < j {
5087 let s0 = sa[i as usize];
5088 sa[l as usize] = s0 & SAINT_MAX;
5089 l += SaSint::from(s0 < 0);
5090
5091 let s1 = sa[(i + 1) as usize];
5092 sa[l as usize] = s1 & SAINT_MAX;
5093 l += SaSint::from(s1 < 0);
5094
5095 let s2 = sa[(i + 2) as usize];
5096 sa[l as usize] = s2 & SAINT_MAX;
5097 l += SaSint::from(s2 < 0);
5098
5099 let s3 = sa[(i + 3) as usize];
5100 sa[l as usize] = s3 & SAINT_MAX;
5101 l += SaSint::from(s3 < 0);
5102
5103 i += 4;
5104 }
5105
5106 j += 3;
5107 while i < j {
5108 let s = sa[i as usize];
5109 sa[l as usize] = s & SAINT_MAX;
5110 l += SaSint::from(s < 0);
5111 i += 1;
5112 }
5113
5114 l
5115}
5116
5117#[allow(dead_code)]
5118fn partial_sorting_gather_lms_suffixes_32s_4k_omp(
5119 sa: &mut [SaSint],
5120 n: SaSint,
5121 threads: SaSint,
5122 thread_state: &mut [ThreadState],
5123) {
5124 let n_usize = usize::try_from(n).expect("n must be non-negative");
5125 let thread_count = if threads > 1 && n >= 65_536 {
5126 usize::try_from(threads)
5127 .expect("threads must be non-negative")
5128 .min(thread_state.len())
5129 .max(1)
5130 } else {
5131 1
5132 };
5133
5134 if thread_count == 1 {
5135 let _ = partial_sorting_gather_lms_suffixes_32s_4k(sa, 0, n);
5136 return;
5137 }
5138
5139 let block_stride = (n_usize / thread_count) & !15usize;
5140 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
5141 let block_start = thread * block_stride;
5142 let block_size = if thread + 1 < thread_count {
5143 block_stride
5144 } else {
5145 n_usize - block_start
5146 };
5147 state.position = block_start as SaSint;
5148 state.count = partial_sorting_gather_lms_suffixes_32s_4k(
5149 sa,
5150 block_start as SaSint,
5151 block_size as SaSint,
5152 ) - block_start as SaSint;
5153 }
5154
5155 let mut position = 0usize;
5156 for (thread, state) in thread_state.iter().take(thread_count).enumerate() {
5157 let count = usize::try_from(state.count).expect("count must be non-negative");
5158 let src = usize::try_from(state.position).expect("position must be non-negative");
5159 if thread > 0 && count > 0 {
5160 sa.copy_within(src..src + count, position);
5161 }
5162 position += count;
5163 }
5164}
5165
5166#[allow(dead_code)]
5167fn partial_sorting_gather_lms_suffixes_32s_1k_omp(
5168 sa: &mut [SaSint],
5169 n: SaSint,
5170 threads: SaSint,
5171 thread_state: &mut [ThreadState],
5172) {
5173 let n_usize = usize::try_from(n).expect("n must be non-negative");
5174 let thread_count = if threads > 1 && n >= 65_536 {
5175 usize::try_from(threads)
5176 .expect("threads must be non-negative")
5177 .min(thread_state.len())
5178 .max(1)
5179 } else {
5180 1
5181 };
5182
5183 if thread_count == 1 {
5184 let _ = partial_sorting_gather_lms_suffixes_32s_1k(sa, 0, n);
5185 return;
5186 }
5187
5188 let block_stride = (n_usize / thread_count) & !15usize;
5189 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
5190 let block_start = thread * block_stride;
5191 let block_size = if thread + 1 < thread_count {
5192 block_stride
5193 } else {
5194 n_usize - block_start
5195 };
5196 state.position = block_start as SaSint;
5197 state.count = partial_sorting_gather_lms_suffixes_32s_1k(
5198 sa,
5199 block_start as SaSint,
5200 block_size as SaSint,
5201 ) - block_start as SaSint;
5202 }
5203
5204 let mut position = 0usize;
5205 for (thread, state) in thread_state.iter().take(thread_count).enumerate() {
5206 let count = usize::try_from(state.count).expect("count must be non-negative");
5207 let src = usize::try_from(state.position).expect("position must be non-negative");
5208 if thread > 0 && count > 0 {
5209 sa.copy_within(src..src + count, position);
5210 }
5211 position += count;
5212 }
5213}
5214
5215#[allow(dead_code)]
5216fn partial_gsa_scan_right_to_left_16u(
5217 t: &[u16],
5218 sa: &mut [SaSint],
5219 buckets: &mut [SaSint],
5220 mut d: SaSint,
5221 omp_block_start: SaSint,
5222 omp_block_size: SaSint,
5223) -> SaSint {
5224 let mut i = (omp_block_start + omp_block_size - 1) as isize;
5225 let mut j = (omp_block_start + 64 + 1) as isize;
5226 while i >= j {
5227 let mut p0 = sa[i as usize];
5228 d += SaSint::from(p0 < 0);
5229 p0 &= SAINT_MAX;
5230 let v0 = buckets_index2(
5231 t[(p0 - 1) as usize] as usize,
5232 usize::from(t[(p0 - 2) as usize] > t[(p0 - 1) as usize]),
5233 );
5234 if v0 != 1 {
5235 let mark0 = if buckets[2 * ALPHABET_SIZE + v0] != d {
5236 SAINT_MIN
5237 } else {
5238 0
5239 };
5240 buckets[v0] -= 1;
5241 sa[buckets[v0] as usize] = (p0 - 1) | mark0;
5242 buckets[2 * ALPHABET_SIZE + v0] = d;
5243 }
5244
5245 let mut p1 = sa[(i - 1) as usize];
5246 d += SaSint::from(p1 < 0);
5247 p1 &= SAINT_MAX;
5248 let v1 = buckets_index2(
5249 t[(p1 - 1) as usize] as usize,
5250 usize::from(t[(p1 - 2) as usize] > t[(p1 - 1) as usize]),
5251 );
5252 if v1 != 1 {
5253 let mark1 = if buckets[2 * ALPHABET_SIZE + v1] != d {
5254 SAINT_MIN
5255 } else {
5256 0
5257 };
5258 buckets[v1] -= 1;
5259 sa[buckets[v1] as usize] = (p1 - 1) | mark1;
5260 buckets[2 * ALPHABET_SIZE + v1] = d;
5261 }
5262
5263 i -= 2;
5264 }
5265
5266 j -= 64 + 1;
5267 while i >= j {
5268 let mut p = sa[i as usize];
5269 d += SaSint::from(p < 0);
5270 p &= SAINT_MAX;
5271 let v = buckets_index2(
5272 t[(p - 1) as usize] as usize,
5273 usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
5274 );
5275 if v != 1 {
5276 let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
5277 SAINT_MIN
5278 } else {
5279 0
5280 };
5281 buckets[v] -= 1;
5282 sa[buckets[v] as usize] = (p - 1) | mark;
5283 buckets[2 * ALPHABET_SIZE + v] = d;
5284 }
5285 i -= 1;
5286 }
5287
5288 d
5289}
5290
5291#[allow(dead_code)]
5292fn partial_gsa_scan_right_to_left_16u_block_omp(
5293 t: &[u16],
5294 sa: &mut [SaSint],
5295 k: SaSint,
5296 buckets: &mut [SaSint],
5297 d: SaSint,
5298 block_start: SaSint,
5299 block_size: SaSint,
5300 threads: SaSint,
5301 thread_state: &mut [ThreadState],
5302) -> SaSint {
5303 let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
5304 usize::try_from(threads)
5305 .expect("threads must be non-negative")
5306 .min(thread_state.len())
5307 } else {
5308 1
5309 };
5310 if thread_count <= 1 {
5311 return partial_gsa_scan_right_to_left_16u(t, sa, buckets, d, block_start, block_size);
5312 }
5313
5314 let width = 2 * k as usize;
5315 let distinct_offset = 2 * ALPHABET_SIZE;
5316 let block_stride = (block_size / thread_count as SaSint) & !15;
5317
5318 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
5319 let local_start = thread as SaSint * block_stride;
5320 let local_size = if thread + 1 < thread_count {
5321 block_stride
5322 } else {
5323 block_size - local_start
5324 };
5325 let mut local_state = ThreadState::default();
5326 state.position = partial_sorting_scan_right_to_left_16u_block_prepare(
5327 t,
5328 sa,
5329 k,
5330 &mut state.buckets,
5331 &mut state.cache,
5332 block_start + local_start,
5333 local_size,
5334 &mut local_state,
5335 );
5336 state.count = local_state.cache_entries as SaSint;
5337 }
5338
5339 let mut next_d = d;
5340 for state in thread_state.iter_mut().take(thread_count).rev() {
5341 for c in 0..width {
5342 let a = buckets[c];
5343 let b = state.buckets[c];
5344 buckets[c] = a - b;
5345 state.buckets[c] = a;
5346 }
5347
5348 next_d -= 1;
5349 for c in 0..width {
5350 let offset = distinct_offset + c;
5351 let a = buckets[offset];
5352 let b = state.buckets[offset];
5353 let shifted = b + next_d;
5354 buckets[offset] = if b > 0 { shifted } else { a };
5355 state.buckets[offset] = a;
5356 }
5357 next_d += 1 + state.position;
5358 state.position = next_d - state.position;
5359 }
5360
5361 for state in thread_state.iter_mut().take(thread_count) {
5362 partial_gsa_scan_right_to_left_16u_block_place(
5363 sa,
5364 &mut state.buckets,
5365 &state.cache,
5366 state.count,
5367 state.position,
5368 );
5369 }
5370
5371 next_d
5372}
5373
5374#[allow(dead_code)]
5375fn partial_gsa_scan_right_to_left_16u_omp(
5376 t: &[u16],
5377 sa: &mut [SaSint],
5378 n: SaSint,
5379 k: SaSint,
5380 buckets: &mut [SaSint],
5381 first_lms_suffix: SaSint,
5382 left_suffixes_count: SaSint,
5383 d: SaSint,
5384 threads: SaSint,
5385) {
5386 let scan_start = left_suffixes_count + 1;
5387 let scan_end = n - first_lms_suffix;
5388
5389 if threads == 1 || scan_end - scan_start < 65536 {
5390 partial_gsa_scan_right_to_left_16u(t, sa, buckets, d, scan_start, scan_end - scan_start);
5391 } else {
5392 let mut d = d;
5393 let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
5394 let mut block_start = scan_end - 1;
5395 while block_start >= scan_start {
5396 if sa[block_start as usize] == 0 {
5397 block_start -= 1;
5398 } else {
5399 let block_limit = threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
5400 let mut block_max_end = block_start - block_limit;
5401 if block_max_end < scan_start {
5402 block_max_end = scan_start - 1;
5403 }
5404 let mut block_end = block_start - 1;
5405 while block_end > block_max_end && sa[block_end as usize] != 0 {
5406 block_end -= 1;
5407 }
5408 let block_size = block_start - block_end;
5409
5410 if block_size < 32 {
5411 while block_start > block_end {
5412 let mut p = sa[block_start as usize];
5413 d += SaSint::from(p < 0);
5414 p &= SAINT_MAX;
5415 let v = buckets_index2(
5416 t[(p - 1) as usize] as usize,
5417 usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
5418 );
5419 if v != 1 {
5420 let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
5421 SAINT_MIN
5422 } else {
5423 0
5424 };
5425 buckets[v] -= 1;
5426 sa[buckets[v] as usize] = (p - 1) | mark;
5427 buckets[2 * ALPHABET_SIZE + v] = d;
5428 }
5429 block_start -= 1;
5430 }
5431 } else {
5432 d = partial_gsa_scan_right_to_left_16u_block_omp(
5433 t,
5434 sa,
5435 k,
5436 buckets,
5437 d,
5438 block_end + 1,
5439 block_size,
5440 threads,
5441 &mut thread_state,
5442 );
5443 block_start = block_end;
5444 }
5445 }
5446 }
5447 }
5448}
5449
5450#[allow(dead_code)]
5451fn partial_sorting_shift_markers_16u_omp(
5452 sa: &mut [SaSint],
5453 n: SaSint,
5454 buckets: &[SaSint],
5455 threads: SaSint,
5456) {
5457 let thread_count = if threads > 1 && n >= 65536 {
5458 usize::try_from(threads).expect("threads must be positive")
5459 } else {
5460 1
5461 };
5462 let c_step = buckets_index2(1, 0) as isize;
5463 let c_min = buckets_index2(1, 0) as isize;
5464 let c_max = buckets_index2(ALPHABET_SIZE - 1, 0) as isize;
5465 for t in 0..thread_count {
5466 let mut c = c_max - (t as isize * c_step);
5467 while c >= c_min {
5468 let c_usize = c as usize;
5469 let mut s = SAINT_MIN;
5470 let mut i = buckets[4 * ALPHABET_SIZE + c_usize] as isize - 1;
5471 let mut j = buckets[c_usize - buckets_index2(1, 0)] as isize + 3;
5472 while i >= j {
5473 let p0 = sa[i as usize];
5474 let q0 = (p0 & SAINT_MIN) ^ s;
5475 s ^= q0;
5476 sa[i as usize] = p0 ^ q0;
5477
5478 let p1 = sa[(i - 1) as usize];
5479 let q1 = (p1 & SAINT_MIN) ^ s;
5480 s ^= q1;
5481 sa[(i - 1) as usize] = p1 ^ q1;
5482
5483 let p2 = sa[(i - 2) as usize];
5484 let q2 = (p2 & SAINT_MIN) ^ s;
5485 s ^= q2;
5486 sa[(i - 2) as usize] = p2 ^ q2;
5487
5488 let p3 = sa[(i - 3) as usize];
5489 let q3 = (p3 & SAINT_MIN) ^ s;
5490 s ^= q3;
5491 sa[(i - 3) as usize] = p3 ^ q3;
5492
5493 i -= 4;
5494 }
5495
5496 j -= 3;
5497 while i >= j {
5498 let p = sa[i as usize];
5499 let q = (p & SAINT_MIN) ^ s;
5500 s ^= q;
5501 sa[i as usize] = p ^ q;
5502 i -= 1;
5503 }
5504
5505 c -= c_step * thread_count as isize;
5506 }
5507 }
5508}
5509
5510#[allow(dead_code)]
5511fn induce_partial_order_16u_omp(
5512 t: &[u16],
5513 sa: &mut [SaSint],
5514 n: SaSint,
5515 k: SaSint,
5516 flags: SaSint,
5517 buckets: &mut [SaSint],
5518 first_lms_suffix: SaSint,
5519 left_suffixes_count: SaSint,
5520 threads: SaSint,
5521) {
5522 buckets[2 * ALPHABET_SIZE..4 * ALPHABET_SIZE].fill(0);
5523
5524 if (flags & LIBSAIS_FLAGS_GSA) != 0 {
5525 let marker = 4 * ALPHABET_SIZE + buckets_index2(0, 1);
5526 buckets[marker] = buckets[4 * ALPHABET_SIZE + buckets_index2(1, 1)] - 1;
5527 flip_suffix_markers_omp(sa, buckets[marker], threads);
5528 }
5529
5530 let d = partial_sorting_scan_left_to_right_16u_omp(
5531 t,
5532 sa,
5533 n,
5534 k,
5535 buckets,
5536 left_suffixes_count,
5537 0,
5538 threads,
5539 );
5540 partial_sorting_shift_markers_16u_omp(sa, n, buckets, threads);
5541
5542 if (flags & LIBSAIS_FLAGS_GSA) != 0 {
5543 partial_gsa_scan_right_to_left_16u_omp(
5544 t,
5545 sa,
5546 n,
5547 k,
5548 buckets,
5549 first_lms_suffix,
5550 left_suffixes_count,
5551 d,
5552 threads,
5553 );
5554
5555 if t[first_lms_suffix as usize] == 0 {
5556 let count = (buckets[buckets_index2(1, 1)] - 1) as usize;
5557 sa.copy_within(0..count, 1);
5558 sa[0] = first_lms_suffix | SAINT_MIN;
5559 }
5560
5561 buckets[buckets_index2(0, 1)] = 0;
5562 } else {
5563 partial_sorting_scan_right_to_left_16u_omp(
5564 t,
5565 sa,
5566 n,
5567 k,
5568 buckets,
5569 first_lms_suffix,
5570 left_suffixes_count,
5571 d,
5572 threads,
5573 );
5574 }
5575}
5576
5577#[allow(dead_code)]
5578fn induce_partial_order_32s_6k_omp(
5579 t: &[SaSint],
5580 sa: &mut [SaSint],
5581 n: SaSint,
5582 k: SaSint,
5583 buckets: &mut [SaSint],
5584 first_lms_suffix: SaSint,
5585 left_suffixes_count: SaSint,
5586 threads: SaSint,
5587 thread_state: &mut [ThreadState],
5588) {
5589 let d = partial_sorting_scan_left_to_right_32s_6k_omp(
5590 t,
5591 sa,
5592 n,
5593 buckets,
5594 left_suffixes_count,
5595 0,
5596 threads,
5597 thread_state,
5598 );
5599 partial_sorting_shift_markers_32s_6k_omp(sa, k, buckets, threads);
5600 partial_sorting_shift_buckets_32s_6k(k, buckets);
5601 partial_sorting_scan_right_to_left_32s_6k_omp(
5602 t,
5603 sa,
5604 n,
5605 buckets,
5606 first_lms_suffix,
5607 left_suffixes_count,
5608 d,
5609 threads,
5610 thread_state,
5611 );
5612}
5613
5614#[allow(dead_code)]
5615fn induce_partial_order_32s_4k_omp(
5616 t: &[SaSint],
5617 sa: &mut [SaSint],
5618 n: SaSint,
5619 k: SaSint,
5620 buckets: &mut [SaSint],
5621 threads: SaSint,
5622 thread_state: &mut [ThreadState],
5623) {
5624 buckets[..2 * k as usize].fill(0);
5625 let d = partial_sorting_scan_left_to_right_32s_4k_omp(
5626 t,
5627 sa,
5628 n,
5629 k,
5630 buckets,
5631 0,
5632 threads,
5633 thread_state,
5634 );
5635 partial_sorting_shift_markers_32s_4k(sa, n);
5636 partial_sorting_scan_right_to_left_32s_4k_omp(t, sa, n, k, buckets, d, threads, thread_state);
5637 partial_sorting_gather_lms_suffixes_32s_4k_omp(sa, n, threads, thread_state);
5638}
5639
5640#[allow(dead_code)]
5641fn induce_partial_order_32s_2k_omp(
5642 t: &[SaSint],
5643 sa: &mut [SaSint],
5644 n: SaSint,
5645 k: SaSint,
5646 buckets: &mut [SaSint],
5647 threads: SaSint,
5648 thread_state: &mut [ThreadState],
5649) {
5650 let k = k as usize;
5651 let (left, right) = buckets.split_at_mut(k);
5652 partial_sorting_scan_left_to_right_32s_1k_omp(t, sa, n, right, threads, thread_state);
5653 partial_sorting_scan_right_to_left_32s_1k_omp(t, sa, n, left, threads, thread_state);
5654 partial_sorting_gather_lms_suffixes_32s_1k_omp(sa, n, threads, thread_state);
5655}
5656
5657#[allow(dead_code)]
5658fn induce_partial_order_32s_1k_omp(
5659 t: &[SaSint],
5660 sa: &mut [SaSint],
5661 n: SaSint,
5662 k: SaSint,
5663 buckets: &mut [SaSint],
5664 threads: SaSint,
5665 thread_state: &mut [ThreadState],
5666) {
5667 count_suffixes_32s(t, n, k, buckets);
5668 initialize_buckets_start_32s_1k(k, buckets);
5669 partial_sorting_scan_left_to_right_32s_1k_omp(t, sa, n, buckets, threads, thread_state);
5670
5671 count_suffixes_32s(t, n, k, buckets);
5672 initialize_buckets_end_32s_1k(k, buckets);
5673 partial_sorting_scan_right_to_left_32s_1k_omp(t, sa, n, buckets, threads, thread_state);
5674
5675 partial_sorting_gather_lms_suffixes_32s_1k_omp(sa, n, threads, thread_state);
5676}
5677
5678#[allow(dead_code)]
5679fn final_sorting_scan_left_to_right_16u(
5680 t: &[u16],
5681 sa: &mut [SaSint],
5682 induction_bucket: &mut [SaSint],
5683 omp_block_start: SaSint,
5684 omp_block_size: SaSint,
5685) {
5686 let mut i = omp_block_start as isize;
5687 let mut j = (omp_block_start + omp_block_size - 64 - 1) as isize;
5688 while i < j {
5689 final_sorting_ltr_step(t, sa, induction_bucket, i as usize);
5690 final_sorting_ltr_step(t, sa, induction_bucket, (i + 1) as usize);
5691 i += 2;
5692 }
5693 j += 64 + 1;
5694 while i < j {
5695 final_sorting_ltr_step(t, sa, induction_bucket, i as usize);
5696 i += 1;
5697 }
5698}
5699
5700#[allow(dead_code)]
5701fn final_sorting_scan_right_to_left_16u(
5702 t: &[u16],
5703 sa: &mut [SaSint],
5704 induction_bucket: &mut [SaSint],
5705 omp_block_start: SaSint,
5706 omp_block_size: SaSint,
5707) {
5708 let mut i = (omp_block_start + omp_block_size - 1) as isize;
5709 let mut j = (omp_block_start + 64 + 1) as isize;
5710 while i >= j {
5711 final_sorting_rtl_step(t, sa, induction_bucket, i as usize, false);
5712 final_sorting_rtl_step(t, sa, induction_bucket, (i - 1) as usize, false);
5713 i -= 2;
5714 }
5715 j -= 64 + 1;
5716 while i >= j {
5717 final_sorting_rtl_step(t, sa, induction_bucket, i as usize, false);
5718 i -= 1;
5719 }
5720}
5721
5722#[allow(dead_code)]
5723fn final_sorting_scan_left_to_right_32s(
5724 t: &[SaSint],
5725 sa: &mut [SaSint],
5726 induction_bucket: &mut [SaSint],
5727 omp_block_start: SaSint,
5728 omp_block_size: SaSint,
5729) {
5730 let mut i = omp_block_start as isize;
5731 let mut j = (omp_block_start + omp_block_size - 2 * 64 - 1) as isize;
5732 while i < j {
5733 for current in [i, i + 1] {
5734 let current = current as usize;
5735 let mut p = sa[current];
5736 sa[current] = p ^ SAINT_MIN;
5737 if p > 0 {
5738 p -= 1;
5739 let p_usize = p as usize;
5740 let bucket = t[p_usize] as usize;
5741 let slot = induction_bucket[bucket] as usize;
5742 sa[slot] = p
5743 | ((usize::from(t[p_usize - usize::from(p > 0)] < t[p_usize]) as SaSint)
5744 << (SAINT_BIT - 1));
5745 induction_bucket[bucket] += 1;
5746 }
5747 }
5748 i += 2;
5749 }
5750
5751 j += 2 * 64 + 1;
5752 while i < j {
5753 let current = i as usize;
5754 let mut p = sa[current];
5755 sa[current] = p ^ SAINT_MIN;
5756 if p > 0 {
5757 p -= 1;
5758 let p_usize = p as usize;
5759 let bucket = t[p_usize] as usize;
5760 let slot = induction_bucket[bucket] as usize;
5761 sa[slot] = p
5762 | ((usize::from(t[p_usize - usize::from(p > 0)] < t[p_usize]) as SaSint)
5763 << (SAINT_BIT - 1));
5764 induction_bucket[bucket] += 1;
5765 }
5766 i += 1;
5767 }
5768}
5769
5770#[allow(dead_code)]
5771fn final_sorting_scan_left_to_right_32s_block_gather(
5772 t: &[SaSint],
5773 sa: &mut [SaSint],
5774 cache: &mut [ThreadCache],
5775 omp_block_start: SaSint,
5776 omp_block_size: SaSint,
5777) {
5778 if omp_block_size <= 0 {
5779 return;
5780 }
5781
5782 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
5783 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
5784 for offset in 0..size {
5785 let current = start + offset;
5786 let mut symbol = SAINT_MIN;
5787 let mut p = sa[current];
5788 sa[current] = p ^ SAINT_MIN;
5789 if p > 0 {
5790 p -= 1;
5791 let p_usize = p as usize;
5792 cache[offset].index = p
5793 | ((usize::from(t[p_usize - usize::from(p > 0)] < t[p_usize]) as SaSint)
5794 << (SAINT_BIT - 1));
5795 symbol = t[p_usize];
5796 }
5797 cache[offset].symbol = symbol;
5798 }
5799}
5800
5801#[allow(dead_code)]
5802fn final_sorting_scan_left_to_right_32s_block_sort(
5803 t: &[SaSint],
5804 induction_bucket: &mut [SaSint],
5805 cache: &mut [ThreadCache],
5806 omp_block_start: SaSint,
5807 omp_block_size: SaSint,
5808) {
5809 if omp_block_size <= 0 {
5810 return;
5811 }
5812
5813 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
5814 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
5815 let block_end = start + size;
5816
5817 for offset in 0..size {
5818 let v = cache[offset].symbol;
5819 if v >= 0 {
5820 let bucket_index = v as usize;
5821 let target = induction_bucket[bucket_index];
5822 cache[offset].symbol = target;
5823 induction_bucket[bucket_index] += 1;
5824 if target >= omp_block_start && target < block_end as SaSint {
5825 let ni = usize::try_from(target - omp_block_start)
5826 .expect("cache slot must be non-negative");
5827 let mut np = cache[offset].index;
5828 cache[offset].index = np ^ SAINT_MIN;
5829 if np > 0 {
5830 np -= 1;
5831 let np_usize = np as usize;
5832 cache[ni].index = np
5833 | ((usize::from(t[np_usize - usize::from(np > 0)] < t[np_usize])
5834 as SaSint)
5835 << (SAINT_BIT - 1));
5836 cache[ni].symbol = t[np_usize];
5837 }
5838 }
5839 }
5840 }
5841}
5842
5843#[allow(dead_code)]
5844fn final_sorting_scan_left_to_right_32s_block_omp(
5845 t: &[SaSint],
5846 sa: &mut [SaSint],
5847 buckets: &mut [SaSint],
5848 cache: &mut [ThreadCache],
5849 block_start: SaSint,
5850 block_size: SaSint,
5851 threads: SaSint,
5852) {
5853 if threads <= 1 || block_size < 16_384 {
5854 final_sorting_scan_left_to_right_32s(t, sa, buckets, block_start, block_size);
5855 return;
5856 }
5857
5858 final_sorting_scan_left_to_right_32s_block_gather(t, sa, cache, block_start, block_size);
5859 final_sorting_scan_left_to_right_32s_block_sort(t, buckets, cache, block_start, block_size);
5860
5861 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
5862 let threads_usize = usize::try_from(threads.max(1)).expect("threads must be positive");
5863 let omp_num_threads = threads_usize.min(block_size_usize.max(1));
5864 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
5865 for omp_thread_num in 0..omp_num_threads {
5866 let omp_block_start = omp_thread_num * omp_block_stride;
5867 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
5868 omp_block_stride
5869 } else {
5870 block_size_usize - omp_block_start
5871 };
5872 compact_and_place_cached_suffixes(
5873 sa,
5874 cache,
5875 omp_block_start as SaSint,
5876 omp_block_size as SaSint,
5877 );
5878 }
5879}
5880
5881#[allow(dead_code)]
5882fn final_sorting_scan_left_to_right_32s_omp(
5883 t: &[SaSint],
5884 sa: &mut [SaSint],
5885 n: SaSint,
5886 induction_bucket: &mut [SaSint],
5887 threads: SaSint,
5888 thread_state: &mut [ThreadState],
5889) {
5890 let last = (n - 1) as usize;
5891 let bucket = t[last] as usize;
5892 let slot = induction_bucket[bucket] as usize;
5893 sa[slot] = (n - 1) | ((usize::from(t[last - 1] < t[last]) as SaSint) << (SAINT_BIT - 1));
5894 induction_bucket[bucket] += 1;
5895
5896 if threads == 1 || n < 65536 || thread_state.is_empty() {
5897 final_sorting_scan_left_to_right_32s(t, sa, induction_bucket, 0, n);
5898 return;
5899 }
5900
5901 let threads_usize = usize::try_from(threads)
5902 .expect("threads must be non-negative")
5903 .max(1);
5904 let block_span = threads_usize * PER_THREAD_CACHE_SIZE;
5905 let mut cache = vec![ThreadCache::default(); block_span];
5906 let mut block_start = 0;
5907 while block_start < n {
5908 let block_end = (block_start + block_span as SaSint).min(n);
5909 final_sorting_scan_left_to_right_32s_block_omp(
5910 t,
5911 sa,
5912 induction_bucket,
5913 &mut cache,
5914 block_start,
5915 block_end - block_start,
5916 threads,
5917 );
5918 block_start = block_end;
5919 }
5920}
5921
5922#[allow(dead_code)]
5923fn final_sorting_scan_right_to_left_32s(
5924 t: &[SaSint],
5925 sa: &mut [SaSint],
5926 induction_bucket: &mut [SaSint],
5927 omp_block_start: SaSint,
5928 omp_block_size: SaSint,
5929) {
5930 let mut i = (omp_block_start + omp_block_size - 1) as isize;
5931 let mut j = (omp_block_start + 2 * 64 + 1) as isize;
5932 while i >= j {
5933 for current in [i, i - 1] {
5934 let current = current as usize;
5935 let mut p = sa[current];
5936 sa[current] = p & SAINT_MAX;
5937 if p > 0 {
5938 p -= 1;
5939 let p_usize = p as usize;
5940 let bucket = t[p_usize] as usize;
5941 induction_bucket[bucket] -= 1;
5942 let slot = induction_bucket[bucket] as usize;
5943 sa[slot] = p
5944 | ((usize::from(t[p_usize - usize::from(p > 0)] > t[p_usize]) as SaSint)
5945 << (SAINT_BIT - 1));
5946 }
5947 }
5948 i -= 2;
5949 }
5950
5951 j -= 2 * 64 + 1;
5952 while i >= j {
5953 let current = i as usize;
5954 let mut p = sa[current];
5955 sa[current] = p & SAINT_MAX;
5956 if p > 0 {
5957 p -= 1;
5958 let p_usize = p as usize;
5959 let bucket = t[p_usize] as usize;
5960 induction_bucket[bucket] -= 1;
5961 let slot = induction_bucket[bucket] as usize;
5962 sa[slot] = p
5963 | ((usize::from(t[p_usize - usize::from(p > 0)] > t[p_usize]) as SaSint)
5964 << (SAINT_BIT - 1));
5965 }
5966 i -= 1;
5967 }
5968}
5969
5970#[allow(dead_code)]
5971fn final_sorting_scan_right_to_left_32s_block_gather(
5972 t: &[SaSint],
5973 sa: &mut [SaSint],
5974 cache: &mut [ThreadCache],
5975 omp_block_start: SaSint,
5976 omp_block_size: SaSint,
5977) {
5978 if omp_block_size <= 0 {
5979 return;
5980 }
5981
5982 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
5983 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
5984 for offset in 0..size {
5985 let current = start + offset;
5986 let mut symbol = SAINT_MIN;
5987 let mut p = sa[current];
5988 sa[current] = p & SAINT_MAX;
5989 if p > 0 {
5990 p -= 1;
5991 let p_usize = p as usize;
5992 cache[offset].index = p
5993 | ((usize::from(t[p_usize - usize::from(p > 0)] > t[p_usize]) as SaSint)
5994 << (SAINT_BIT - 1));
5995 symbol = t[p_usize];
5996 }
5997 cache[offset].symbol = symbol;
5998 }
5999}
6000
6001#[allow(dead_code)]
6002fn final_sorting_scan_right_to_left_32s_block_sort(
6003 t: &[SaSint],
6004 induction_bucket: &mut [SaSint],
6005 cache: &mut [ThreadCache],
6006 omp_block_start: SaSint,
6007 omp_block_size: SaSint,
6008) {
6009 if omp_block_size <= 0 {
6010 return;
6011 }
6012
6013 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
6014 let block_end = omp_block_start + omp_block_size;
6015 let mut offset = size;
6016
6017 while offset > 0 {
6018 offset -= 1;
6019 let v = cache[offset].symbol;
6020 if v >= 0 {
6021 let bucket_index = v as usize;
6022 induction_bucket[bucket_index] -= 1;
6023 let target = induction_bucket[bucket_index];
6024 cache[offset].symbol = target;
6025 if target >= omp_block_start && target < block_end {
6026 let ni = usize::try_from(target - omp_block_start)
6027 .expect("cache slot must be non-negative");
6028 let mut np = cache[offset].index;
6029 cache[offset].index = np & SAINT_MAX;
6030 if np > 0 {
6031 np -= 1;
6032 let np_usize = np as usize;
6033 cache[ni].index = np
6034 | ((usize::from(t[np_usize - usize::from(np > 0)] > t[np_usize])
6035 as SaSint)
6036 << (SAINT_BIT - 1));
6037 cache[ni].symbol = t[np_usize];
6038 }
6039 }
6040 }
6041 }
6042}
6043
6044#[allow(dead_code)]
6045fn final_sorting_scan_right_to_left_32s_block_omp(
6046 t: &[SaSint],
6047 sa: &mut [SaSint],
6048 buckets: &mut [SaSint],
6049 cache: &mut [ThreadCache],
6050 block_start: SaSint,
6051 block_size: SaSint,
6052 threads: SaSint,
6053) {
6054 if threads <= 1 || block_size < 16_384 {
6055 final_sorting_scan_right_to_left_32s(t, sa, buckets, block_start, block_size);
6056 return;
6057 }
6058
6059 final_sorting_scan_right_to_left_32s_block_gather(t, sa, cache, block_start, block_size);
6060 final_sorting_scan_right_to_left_32s_block_sort(t, buckets, cache, block_start, block_size);
6061
6062 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
6063 let threads_usize = usize::try_from(threads.max(1)).expect("threads must be positive");
6064 let omp_num_threads = threads_usize.min(block_size_usize.max(1));
6065 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
6066 for omp_thread_num in 0..omp_num_threads {
6067 let omp_block_start = omp_thread_num * omp_block_stride;
6068 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
6069 omp_block_stride
6070 } else {
6071 block_size_usize - omp_block_start
6072 };
6073 compact_and_place_cached_suffixes(
6074 sa,
6075 cache,
6076 omp_block_start as SaSint,
6077 omp_block_size as SaSint,
6078 );
6079 }
6080}
6081
6082#[allow(dead_code)]
6083fn final_sorting_scan_right_to_left_32s_omp(
6084 t: &[SaSint],
6085 sa: &mut [SaSint],
6086 n: SaSint,
6087 induction_bucket: &mut [SaSint],
6088 threads: SaSint,
6089 thread_state: &mut [ThreadState],
6090) {
6091 if threads == 1 || n < 65536 || thread_state.is_empty() {
6092 final_sorting_scan_right_to_left_32s(t, sa, induction_bucket, 0, n);
6093 return;
6094 }
6095
6096 let threads_usize = usize::try_from(threads)
6097 .expect("threads must be non-negative")
6098 .max(1);
6099 let block_span = threads_usize * PER_THREAD_CACHE_SIZE;
6100 let mut cache = vec![ThreadCache::default(); block_span];
6101 let mut block_start = n - 1;
6102 while block_start >= 0 {
6103 let block_end = (block_start - block_span as SaSint).max(-1);
6104 final_sorting_scan_right_to_left_32s_block_omp(
6105 t,
6106 sa,
6107 induction_bucket,
6108 &mut cache,
6109 block_end + 1,
6110 block_start - block_end,
6111 threads,
6112 );
6113 block_start = block_end;
6114 }
6115}
6116
6117#[allow(dead_code)]
6118fn induce_final_order_32s_6k(
6119 t: &[SaSint],
6120 sa: &mut [SaSint],
6121 n: SaSint,
6122 k: SaSint,
6123 buckets: &mut [SaSint],
6124 threads: SaSint,
6125 thread_state: &mut [ThreadState],
6126) {
6127 let k = k as usize;
6128 final_sorting_scan_left_to_right_32s_omp(
6129 t,
6130 sa,
6131 n,
6132 &mut buckets[4 * k..5 * k],
6133 threads,
6134 thread_state,
6135 );
6136 final_sorting_scan_right_to_left_32s_omp(
6137 t,
6138 sa,
6139 n,
6140 &mut buckets[5 * k..6 * k],
6141 threads,
6142 thread_state,
6143 );
6144}
6145
6146#[allow(dead_code)]
6147fn induce_final_order_32s_4k(
6148 t: &[SaSint],
6149 sa: &mut [SaSint],
6150 n: SaSint,
6151 k: SaSint,
6152 buckets: &mut [SaSint],
6153 threads: SaSint,
6154 thread_state: &mut [ThreadState],
6155) {
6156 let k = k as usize;
6157 final_sorting_scan_left_to_right_32s_omp(
6158 t,
6159 sa,
6160 n,
6161 &mut buckets[2 * k..3 * k],
6162 threads,
6163 thread_state,
6164 );
6165 final_sorting_scan_right_to_left_32s_omp(
6166 t,
6167 sa,
6168 n,
6169 &mut buckets[3 * k..4 * k],
6170 threads,
6171 thread_state,
6172 );
6173}
6174
6175#[allow(dead_code)]
6176fn induce_final_order_32s_2k(
6177 t: &[SaSint],
6178 sa: &mut [SaSint],
6179 n: SaSint,
6180 k: SaSint,
6181 buckets: &mut [SaSint],
6182 threads: SaSint,
6183 thread_state: &mut [ThreadState],
6184) {
6185 let k = k as usize;
6186 final_sorting_scan_left_to_right_32s_omp(
6187 t,
6188 sa,
6189 n,
6190 &mut buckets[k..2 * k],
6191 threads,
6192 thread_state,
6193 );
6194 final_sorting_scan_right_to_left_32s_omp(t, sa, n, &mut buckets[..k], threads, thread_state);
6195}
6196
6197#[allow(dead_code)]
6198fn induce_final_order_32s_1k(
6199 t: &[SaSint],
6200 sa: &mut [SaSint],
6201 n: SaSint,
6202 k: SaSint,
6203 buckets: &mut [SaSint],
6204 threads: SaSint,
6205 thread_state: &mut [ThreadState],
6206) {
6207 count_suffixes_32s(t, n, k, buckets);
6208 initialize_buckets_start_32s_1k(k, buckets);
6209 final_sorting_scan_left_to_right_32s_omp(t, sa, n, buckets, threads, thread_state);
6210
6211 count_suffixes_32s(t, n, k, buckets);
6212 initialize_buckets_end_32s_1k(k, buckets);
6213 final_sorting_scan_right_to_left_32s_omp(t, sa, n, buckets, threads, thread_state);
6214}
6215
6216#[allow(dead_code)]
6217fn clear_lms_suffixes_omp(
6218 sa: &mut [SaSint],
6219 n: SaSint,
6220 k: SaSint,
6221 bucket_start: &[SaSint],
6222 bucket_end: &[SaSint],
6223 threads: SaSint,
6224) {
6225 let k_usize = usize::try_from(k).expect("k must be non-negative");
6226 let thread_count = if threads > 1 && n >= 65536 {
6227 usize::try_from(threads).expect("threads must be positive")
6228 } else {
6229 1
6230 };
6231 for t in 0..thread_count {
6232 let mut c = t;
6233 while c < k_usize {
6234 if bucket_end[c] > bucket_start[c] {
6235 let start = bucket_start[c] as usize;
6236 let end = bucket_end[c] as usize;
6237 sa[start..end].fill(0);
6238 }
6239 c += thread_count;
6240 }
6241 }
6242}
6243
6244#[allow(dead_code)]
6245fn final_gsa_scan_right_to_left_16u(
6246 t: &[u16],
6247 sa: &mut [SaSint],
6248 induction_bucket: &mut [SaSint],
6249 omp_block_start: SaSint,
6250 omp_block_size: SaSint,
6251) {
6252 let mut i = (omp_block_start + omp_block_size - 1) as isize;
6253 let mut j = (omp_block_start + 64 + 1) as isize;
6254 while i >= j {
6255 final_sorting_rtl_step(t, sa, induction_bucket, i as usize, true);
6256 final_sorting_rtl_step(t, sa, induction_bucket, (i - 1) as usize, true);
6257 i -= 2;
6258 }
6259 j -= 64 + 1;
6260 while i >= j {
6261 final_sorting_rtl_step(t, sa, induction_bucket, i as usize, true);
6262 i -= 1;
6263 }
6264}
6265
6266#[allow(dead_code)]
6267fn final_sorting_ltr_step(
6268 t: &[u16],
6269 sa: &mut [SaSint],
6270 induction_bucket: &mut [SaSint],
6271 index: usize,
6272) {
6273 let mut p = sa[index];
6274 sa[index] = p ^ SAINT_MIN;
6275 if p > 0 {
6276 p -= 1;
6277 let c = t[p as usize] as usize;
6278 let mark = if t[(p - SaSint::from(p > 0)) as usize] < t[p as usize] {
6279 SAINT_MIN
6280 } else {
6281 0
6282 };
6283 let dst = induction_bucket[c] as usize;
6284 sa[dst] = p | mark;
6285 induction_bucket[c] += 1;
6286 }
6287}
6288
6289#[allow(dead_code)]
6290fn final_sorting_rtl_step(
6291 t: &[u16],
6292 sa: &mut [SaSint],
6293 induction_bucket: &mut [SaSint],
6294 index: usize,
6295 gsa: bool,
6296) {
6297 let mut p = sa[index];
6298 sa[index] = p & SAINT_MAX;
6299 if p > 0 && (!gsa || t[(p - 1) as usize] > 0) {
6300 p -= 1;
6301 let c = t[p as usize] as usize;
6302 let mark = if t[(p - SaSint::from(p > 0)) as usize] > t[p as usize] {
6303 SAINT_MIN
6304 } else {
6305 0
6306 };
6307 induction_bucket[c] -= 1;
6308 sa[induction_bucket[c] as usize] = p | mark;
6309 }
6310}
6311
6312#[allow(dead_code)]
6313fn final_bwt_scan_left_to_right_16u(
6314 t: &[u16],
6315 sa: &mut [SaSint],
6316 induction_bucket: &mut [SaSint],
6317 omp_block_start: SaSint,
6318 omp_block_size: SaSint,
6319) {
6320 let mut i = omp_block_start as isize;
6321 let mut j = (omp_block_start + omp_block_size - 64 - 1) as isize;
6322 while i < j {
6323 final_bwt_ltr_step(t, sa, induction_bucket, i as usize);
6324 final_bwt_ltr_step(t, sa, induction_bucket, (i + 1) as usize);
6325 i += 2;
6326 }
6327 j += 64 + 1;
6328 while i < j {
6329 final_bwt_ltr_step(t, sa, induction_bucket, i as usize);
6330 i += 1;
6331 }
6332}
6333
6334#[allow(dead_code)]
6335fn final_bwt_scan_right_to_left_16u(
6336 t: &[u16],
6337 sa: &mut [SaSint],
6338 induction_bucket: &mut [SaSint],
6339 omp_block_start: SaSint,
6340 omp_block_size: SaSint,
6341) -> SaSint {
6342 let mut index = -1;
6343 let mut i = (omp_block_start + omp_block_size - 1) as isize;
6344 let mut j = (omp_block_start + 64 + 1) as isize;
6345 while i >= j {
6346 final_bwt_rtl_step(t, sa, induction_bucket, i as usize, &mut index);
6347 final_bwt_rtl_step(t, sa, induction_bucket, (i - 1) as usize, &mut index);
6348 i -= 2;
6349 }
6350 j -= 64 + 1;
6351 while i >= j {
6352 final_bwt_rtl_step(t, sa, induction_bucket, i as usize, &mut index);
6353 i -= 1;
6354 }
6355 index
6356}
6357
6358#[allow(dead_code)]
6359fn final_bwt_aux_scan_left_to_right_16u(
6360 t: &[u16],
6361 sa: &mut [SaSint],
6362 rm: SaSint,
6363 i_sample: &mut [SaSint],
6364 induction_bucket: &mut [SaSint],
6365 omp_block_start: SaSint,
6366 omp_block_size: SaSint,
6367) {
6368 let mut i = omp_block_start as isize;
6369 let mut j = (omp_block_start + omp_block_size - 64 - 1) as isize;
6370 while i < j {
6371 final_bwt_aux_ltr_step(t, sa, rm, i_sample, induction_bucket, i as usize);
6372 final_bwt_aux_ltr_step(t, sa, rm, i_sample, induction_bucket, (i + 1) as usize);
6373 i += 2;
6374 }
6375 j += 64 + 1;
6376 while i < j {
6377 final_bwt_aux_ltr_step(t, sa, rm, i_sample, induction_bucket, i as usize);
6378 i += 1;
6379 }
6380}
6381
6382#[allow(dead_code)]
6383fn final_bwt_aux_scan_right_to_left_16u(
6384 t: &[u16],
6385 sa: &mut [SaSint],
6386 rm: SaSint,
6387 i_sample: &mut [SaSint],
6388 induction_bucket: &mut [SaSint],
6389 omp_block_start: SaSint,
6390 omp_block_size: SaSint,
6391) {
6392 let mut i = (omp_block_start + omp_block_size - 1) as isize;
6393 let mut j = (omp_block_start + 64 + 1) as isize;
6394 while i >= j {
6395 final_bwt_aux_rtl_step(t, sa, rm, i_sample, induction_bucket, i as usize);
6396 final_bwt_aux_rtl_step(t, sa, rm, i_sample, induction_bucket, (i - 1) as usize);
6397 i -= 2;
6398 }
6399 j -= 64 + 1;
6400 while i >= j {
6401 final_bwt_aux_rtl_step(t, sa, rm, i_sample, induction_bucket, i as usize);
6402 i -= 1;
6403 }
6404}
6405
6406#[allow(dead_code)]
6407fn renumber_lms_suffixes_16u(
6408 sa: &mut [SaSint],
6409 m: SaSint,
6410 mut name: SaSint,
6411 omp_block_start: SaSint,
6412 omp_block_size: SaSint,
6413) -> SaSint {
6414 let mut i = omp_block_start as isize;
6415 let mut j = (omp_block_start + omp_block_size - 64 - 3) as isize;
6416 while i < j {
6417 let p0 = sa[i as usize];
6418 sa[m as usize + ((p0 & SAINT_MAX) >> 1) as usize] = name | SAINT_MIN;
6419 name += SaSint::from(p0 < 0);
6420
6421 let p1 = sa[(i + 1) as usize];
6422 sa[m as usize + ((p1 & SAINT_MAX) >> 1) as usize] = name | SAINT_MIN;
6423 name += SaSint::from(p1 < 0);
6424
6425 let p2 = sa[(i + 2) as usize];
6426 sa[m as usize + ((p2 & SAINT_MAX) >> 1) as usize] = name | SAINT_MIN;
6427 name += SaSint::from(p2 < 0);
6428
6429 let p3 = sa[(i + 3) as usize];
6430 sa[m as usize + ((p3 & SAINT_MAX) >> 1) as usize] = name | SAINT_MIN;
6431 name += SaSint::from(p3 < 0);
6432
6433 i += 4;
6434 }
6435
6436 j += 64 + 3;
6437 while i < j {
6438 let p = sa[i as usize];
6439 sa[m as usize + ((p & SAINT_MAX) >> 1) as usize] = name | SAINT_MIN;
6440 name += SaSint::from(p < 0);
6441 i += 1;
6442 }
6443
6444 name
6445}
6446
6447#[allow(dead_code)]
6448fn renumber_lms_suffixes_16u_omp(
6449 sa: &mut [SaSint],
6450 m: SaSint,
6451 threads: SaSint,
6452 thread_state: &mut [ThreadState],
6453) -> SaSint {
6454 if threads == 1 || m < 65_536 || thread_state.is_empty() {
6455 return renumber_lms_suffixes_16u(sa, m, 0, 0, m);
6456 }
6457
6458 let thread_count = usize::try_from(threads)
6459 .expect("threads must be non-negative")
6460 .min(thread_state.len());
6461 let block_stride = (m / thread_count as SaSint) & !15;
6462
6463 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
6464 let block_start = thread as SaSint * block_stride;
6465 let block_size = if thread + 1 < thread_count {
6466 block_stride
6467 } else {
6468 m - block_start
6469 };
6470 state.count = count_negative_marked_suffixes(sa, block_start, block_size);
6471 }
6472
6473 let mut name = 0;
6474 for thread in 0..thread_count {
6475 let block_start = thread as SaSint * block_stride;
6476 let block_size = if thread + 1 < thread_count {
6477 block_stride
6478 } else {
6479 m - block_start
6480 };
6481 renumber_lms_suffixes_16u(sa, m, name, block_start, block_size);
6482 name += thread_state[thread].count;
6483 }
6484
6485 name
6486}
6487
6488#[allow(dead_code)]
6489fn gather_marked_lms_suffixes(
6490 sa: &mut [SaSint],
6491 m: SaSint,
6492 mut l: isize,
6493 omp_block_start: isize,
6494 omp_block_size: isize,
6495) -> isize {
6496 if omp_block_size <= 0 {
6497 return l;
6498 }
6499
6500 l -= 1;
6501 let mut i = m as isize + omp_block_start + omp_block_size - 1;
6502 let mut j = m as isize + omp_block_start + 3;
6503 while i >= j {
6504 let s0 = sa[i as usize];
6505 sa[l as usize] = s0 & SAINT_MAX;
6506 l -= isize::from(s0 < 0);
6507
6508 let s1 = sa[(i - 1) as usize];
6509 sa[l as usize] = s1 & SAINT_MAX;
6510 l -= isize::from(s1 < 0);
6511
6512 let s2 = sa[(i - 2) as usize];
6513 sa[l as usize] = s2 & SAINT_MAX;
6514 l -= isize::from(s2 < 0);
6515
6516 let s3 = sa[(i - 3) as usize];
6517 sa[l as usize] = s3 & SAINT_MAX;
6518 l -= isize::from(s3 < 0);
6519
6520 i -= 4;
6521 }
6522
6523 j -= 3;
6524 while i >= j {
6525 let s = sa[i as usize];
6526 sa[l as usize] = s & SAINT_MAX;
6527 l -= isize::from(s < 0);
6528 i -= 1;
6529 }
6530
6531 l + 1
6532}
6533
6534#[allow(dead_code)]
6535fn gather_marked_lms_suffixes_omp(
6536 sa: &mut [SaSint],
6537 n: SaSint,
6538 m: SaSint,
6539 fs: SaSint,
6540 threads: SaSint,
6541 thread_state: &mut [ThreadState],
6542) {
6543 let half_n = n >> 1;
6544 if threads == 1 || n < 131_072 || thread_state.is_empty() {
6545 let _ = gather_marked_lms_suffixes(sa, m, (n + fs) as isize, 0, half_n as isize);
6546 return;
6547 }
6548
6549 let thread_count = usize::try_from(threads)
6550 .expect("threads must be non-negative")
6551 .min(thread_state.len());
6552 let block_stride = (half_n / thread_count as SaSint) & !15;
6553
6554 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
6555 let block_start = thread as SaSint * block_stride;
6556 let block_size = if thread + 1 < thread_count {
6557 block_stride
6558 } else {
6559 half_n - block_start
6560 };
6561 let local_end = if thread + 1 < thread_count {
6562 m + block_start + block_size
6563 } else {
6564 n + fs
6565 } as isize;
6566 let gathered_position =
6567 gather_marked_lms_suffixes(sa, m, local_end, block_start as isize, block_size as isize);
6568 state.position = gathered_position as SaSint;
6569 state.count = (local_end - gathered_position) as SaSint;
6570 }
6571
6572 let mut position = (n + fs) as isize;
6573 for thread in (0..thread_count).rev() {
6574 let count =
6575 usize::try_from(thread_state[thread].count).expect("count must be non-negative");
6576 position -= thread_state[thread].count as isize;
6577 if thread + 1 != thread_count && count > 0 {
6578 let src = usize::try_from(thread_state[thread].position)
6579 .expect("position must be non-negative");
6580 let dst = position as usize;
6581 sa.copy_within(src..src + count, dst);
6582 }
6583 }
6584}
6585
6586#[allow(dead_code)]
6587fn renumber_and_gather_lms_suffixes_omp(
6588 sa: &mut [SaSint],
6589 n: SaSint,
6590 m: SaSint,
6591 fs: SaSint,
6592 threads: SaSint,
6593 thread_state: &mut [ThreadState],
6594) -> SaSint {
6595 let m_usize = m as usize;
6596 let half_n = (n >> 1) as usize;
6597 sa[m_usize..m_usize + half_n].fill(0);
6598
6599 let name = renumber_lms_suffixes_16u_omp(sa, m, threads, thread_state);
6600 if name < m {
6601 gather_marked_lms_suffixes_omp(sa, n, m, fs, threads, thread_state);
6602 } else {
6603 for item in &mut sa[..m_usize] {
6604 *item &= SAINT_MAX;
6605 }
6606 }
6607
6608 name
6609}
6610
6611#[allow(dead_code)]
6612fn reconstruct_lms_suffixes(
6613 sa: &mut [SaSint],
6614 n: SaSint,
6615 m: SaSint,
6616 omp_block_start: isize,
6617 omp_block_size: isize,
6618) {
6619 if omp_block_size <= 0 {
6620 return;
6621 }
6622
6623 let base = (n - m) as usize;
6624 let mut i = omp_block_start;
6625 let mut j = omp_block_start + omp_block_size - 64 - 3;
6626 while i < j {
6627 let iu = i as usize;
6628 let s0 = sa[iu] as usize;
6629 let s1 = sa[iu + 1] as usize;
6630 let s2 = sa[iu + 2] as usize;
6631 let s3 = sa[iu + 3] as usize;
6632 sa[iu] = sa[base + s0];
6633 sa[iu + 1] = sa[base + s1];
6634 sa[iu + 2] = sa[base + s2];
6635 sa[iu + 3] = sa[base + s3];
6636 i += 4;
6637 }
6638
6639 j += 64 + 3;
6640 while i < j {
6641 let iu = i as usize;
6642 let s = sa[iu] as usize;
6643 sa[iu] = sa[base + s];
6644 i += 1;
6645 }
6646}
6647
6648#[allow(dead_code)]
6649fn reconstruct_lms_suffixes_omp(sa: &mut [SaSint], n: SaSint, m: SaSint, threads: SaSint) {
6650 if threads == 1 || m < 65_536 {
6651 reconstruct_lms_suffixes(sa, n, m, 0, m as isize);
6652 return;
6653 }
6654
6655 let thread_count = threads as usize;
6656 let block_stride = (m / threads) & !15;
6657 for thread in 0..thread_count {
6658 let block_start = thread as SaSint * block_stride;
6659 let block_size = if thread + 1 < thread_count {
6660 block_stride
6661 } else {
6662 m - block_start
6663 };
6664 reconstruct_lms_suffixes(sa, n, m, block_start as isize, block_size as isize);
6665 }
6666}
6667
6668#[allow(dead_code)]
6669fn renumber_distinct_lms_suffixes_32s_4k(
6670 sa: &mut [SaSint],
6671 m: SaSint,
6672 mut name: SaSint,
6673 omp_block_start: isize,
6674 omp_block_size: isize,
6675) -> SaSint {
6676 if omp_block_size <= 0 {
6677 return name;
6678 }
6679
6680 let m_usize = m as usize;
6681 let start = omp_block_start as usize;
6682 let size = omp_block_size as usize;
6683 let (sa_head, sam) = sa.split_at_mut(m_usize);
6684 let mut i = start;
6685 let mut j = start + size.saturating_sub(64 + 3);
6686 let mut p3 = 0;
6687
6688 while i < j {
6689 let p0 = sa_head[i];
6690 sa_head[i] = p0 & SAINT_MAX;
6691 sam[(sa_head[i] >> 1) as usize] = name | (p0 & p3 & SAINT_MIN);
6692 name += SaSint::from(p0 < 0);
6693
6694 let p1 = sa_head[i + 1];
6695 sa_head[i + 1] = p1 & SAINT_MAX;
6696 sam[(sa_head[i + 1] >> 1) as usize] = name | (p1 & p0 & SAINT_MIN);
6697 name += SaSint::from(p1 < 0);
6698
6699 let p2 = sa_head[i + 2];
6700 sa_head[i + 2] = p2 & SAINT_MAX;
6701 sam[(sa_head[i + 2] >> 1) as usize] = name | (p2 & p1 & SAINT_MIN);
6702 name += SaSint::from(p2 < 0);
6703
6704 p3 = sa_head[i + 3];
6705 sa_head[i + 3] = p3 & SAINT_MAX;
6706 sam[(sa_head[i + 3] >> 1) as usize] = name | (p3 & p2 & SAINT_MIN);
6707 name += SaSint::from(p3 < 0);
6708
6709 i += 4;
6710 }
6711
6712 j = start + size;
6713 while i < j {
6714 let p2 = p3;
6715 p3 = sa_head[i];
6716 sa_head[i] = p3 & SAINT_MAX;
6717 sam[(sa_head[i] >> 1) as usize] = name | (p3 & p2 & SAINT_MIN);
6718 name += SaSint::from(p3 < 0);
6719 i += 1;
6720 }
6721
6722 name
6723}
6724
6725#[allow(dead_code)]
6726fn mark_distinct_lms_suffixes_32s(
6727 sa: &mut [SaSint],
6728 m: SaSint,
6729 omp_block_start: isize,
6730 omp_block_size: isize,
6731) {
6732 if omp_block_size <= 0 {
6733 return;
6734 }
6735
6736 let mut i = m as usize + omp_block_start as usize;
6737 let mut j = i + (omp_block_size as usize).saturating_sub(3);
6738 let mut p3 = 0;
6739 while i < j {
6740 let mut p0 = sa[i];
6741 sa[i] = p0 & (p3 | SAINT_MAX);
6742 p0 = if p0 == 0 { p3 } else { p0 };
6743
6744 let mut p1 = sa[i + 1];
6745 sa[i + 1] = p1 & (p0 | SAINT_MAX);
6746 p1 = if p1 == 0 { p0 } else { p1 };
6747
6748 let mut p2 = sa[i + 2];
6749 sa[i + 2] = p2 & (p1 | SAINT_MAX);
6750 p2 = if p2 == 0 { p1 } else { p2 };
6751
6752 p3 = sa[i + 3];
6753 sa[i + 3] = p3 & (p2 | SAINT_MAX);
6754 p3 = if p3 == 0 { p2 } else { p3 };
6755 i += 4;
6756 }
6757
6758 j = m as usize + omp_block_start as usize + omp_block_size as usize;
6759 while i < j {
6760 let p2 = p3;
6761 p3 = sa[i];
6762 sa[i] = p3 & (p2 | SAINT_MAX);
6763 p3 = if p3 == 0 { p2 } else { p3 };
6764 i += 1;
6765 }
6766}
6767
6768#[allow(dead_code)]
6769fn clamp_lms_suffixes_length_32s(
6770 sa: &mut [SaSint],
6771 m: SaSint,
6772 omp_block_start: isize,
6773 omp_block_size: isize,
6774) {
6775 if omp_block_size <= 0 {
6776 return;
6777 }
6778
6779 let mut i = m as usize + omp_block_start as usize;
6780 let mut j = i + (omp_block_size as usize).saturating_sub(3);
6781 while i < j {
6782 let s0 = sa[i];
6783 sa[i] = if s0 < 0 { s0 } else { 0 } & SAINT_MAX;
6784
6785 let s1 = sa[i + 1];
6786 sa[i + 1] = if s1 < 0 { s1 } else { 0 } & SAINT_MAX;
6787
6788 let s2 = sa[i + 2];
6789 sa[i + 2] = if s2 < 0 { s2 } else { 0 } & SAINT_MAX;
6790
6791 let s3 = sa[i + 3];
6792 sa[i + 3] = if s3 < 0 { s3 } else { 0 } & SAINT_MAX;
6793
6794 i += 4;
6795 }
6796
6797 j = m as usize + omp_block_start as usize + omp_block_size as usize;
6798 while i < j {
6799 let s = sa[i];
6800 sa[i] = if s < 0 { s } else { 0 } & SAINT_MAX;
6801 i += 1;
6802 }
6803}
6804
6805#[allow(dead_code)]
6806fn renumber_distinct_lms_suffixes_32s_4k_omp(
6807 sa: &mut [SaSint],
6808 m: SaSint,
6809 threads: SaSint,
6810 thread_state: &mut [ThreadState],
6811) -> SaSint {
6812 if threads == 1 || m < 65_536 || thread_state.is_empty() {
6813 return renumber_distinct_lms_suffixes_32s_4k(sa, m, 1, 0, m as isize) - 1;
6814 }
6815
6816 let thread_count = usize::try_from(threads)
6817 .expect("threads must be non-negative")
6818 .min(thread_state.len());
6819 let block_stride = (m / thread_count as SaSint) & !15;
6820
6821 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
6822 let block_start = thread as SaSint * block_stride;
6823 let block_size = if thread + 1 < thread_count {
6824 block_stride
6825 } else {
6826 m - block_start
6827 };
6828 state.count = count_negative_marked_suffixes(sa, block_start, block_size);
6829 }
6830
6831 let mut count = 1;
6832 for thread in 0..thread_count {
6833 let block_start = thread as SaSint * block_stride;
6834 let block_size = if thread + 1 < thread_count {
6835 block_stride
6836 } else {
6837 m - block_start
6838 };
6839 renumber_distinct_lms_suffixes_32s_4k(
6840 sa,
6841 m,
6842 count,
6843 block_start as isize,
6844 block_size as isize,
6845 );
6846 count += thread_state[thread].count;
6847 }
6848
6849 count - 1
6850}
6851
6852#[allow(dead_code)]
6853fn mark_distinct_lms_suffixes_32s_omp(sa: &mut [SaSint], n: SaSint, m: SaSint, threads: SaSint) {
6854 let half_n = n >> 1;
6855 if threads == 1 || n < 131_072 {
6856 mark_distinct_lms_suffixes_32s(sa, m, 0, half_n as isize);
6857 return;
6858 }
6859
6860 let thread_count = threads as usize;
6861 let block_stride = (half_n / threads) & !15;
6862 for thread in 0..thread_count {
6863 let block_start = thread as SaSint * block_stride;
6864 let block_size = if thread + 1 < thread_count {
6865 block_stride
6866 } else {
6867 half_n - block_start
6868 };
6869 mark_distinct_lms_suffixes_32s(sa, m, block_start as isize, block_size as isize);
6870 }
6871}
6872
6873#[allow(dead_code)]
6874fn clamp_lms_suffixes_length_32s_omp(sa: &mut [SaSint], n: SaSint, m: SaSint, threads: SaSint) {
6875 let half_n = n >> 1;
6876 if threads == 1 || n < 131_072 {
6877 clamp_lms_suffixes_length_32s(sa, m, 0, half_n as isize);
6878 return;
6879 }
6880
6881 let thread_count = threads as usize;
6882 let block_stride = (half_n / threads) & !15;
6883 for thread in 0..thread_count {
6884 let block_start = thread as SaSint * block_stride;
6885 let block_size = if thread + 1 < thread_count {
6886 block_stride
6887 } else {
6888 half_n - block_start
6889 };
6890 clamp_lms_suffixes_length_32s(sa, m, block_start as isize, block_size as isize);
6891 }
6892}
6893
6894#[allow(dead_code)]
6895fn renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(
6896 sa: &mut [SaSint],
6897 n: SaSint,
6898 m: SaSint,
6899 threads: SaSint,
6900 thread_state: &mut [ThreadState],
6901) -> SaSint {
6902 let m_usize = m as usize;
6903 let half_n = (n >> 1) as usize;
6904 sa[m_usize..m_usize + half_n].fill(0);
6905
6906 let name = renumber_distinct_lms_suffixes_32s_4k_omp(sa, m, threads, thread_state);
6907 if name < m {
6908 mark_distinct_lms_suffixes_32s_omp(sa, n, m, threads);
6909 }
6910
6911 name
6912}
6913
6914#[allow(dead_code)]
6915fn renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(
6916 t: &[SaSint],
6917 sa: &mut [SaSint],
6918 n: SaSint,
6919 m: SaSint,
6920 threads: SaSint,
6921) -> SaSint {
6922 let m_usize = m as usize;
6923 let n_usize = n as usize;
6924
6925 gather_lms_suffixes_32s(t, sa, n);
6926 sa[m_usize..n_usize - m_usize].fill(0);
6927
6928 let mut i = n - m;
6929 let mut j = n - 1 - 64 - 3;
6930 while i < j {
6931 let s0 = (sa[i as usize] as SaUint >> 1) as usize;
6932 let s1 = (sa[(i + 1) as usize] as SaUint >> 1) as usize;
6933 let s2 = (sa[(i + 2) as usize] as SaUint >> 1) as usize;
6934 let s3 = (sa[(i + 3) as usize] as SaUint >> 1) as usize;
6935 sa[m_usize + s0] = sa[(i + 1) as usize] - sa[i as usize] + 1 + SAINT_MIN;
6936 sa[m_usize + s1] = sa[(i + 2) as usize] - sa[(i + 1) as usize] + 1 + SAINT_MIN;
6937 sa[m_usize + s2] = sa[(i + 3) as usize] - sa[(i + 2) as usize] + 1 + SAINT_MIN;
6938 sa[m_usize + s3] = sa[(i + 4) as usize] - sa[(i + 3) as usize] + 1 + SAINT_MIN;
6939 i += 4;
6940 }
6941
6942 j += 64 + 3;
6943 while i < j {
6944 let s = (sa[i as usize] as SaUint >> 1) as usize;
6945 sa[m_usize + s] = sa[(i + 1) as usize] - sa[i as usize] + 1 + SAINT_MIN;
6946 i += 1;
6947 }
6948
6949 let tail = (sa[n_usize - 1] as SaUint >> 1) as usize;
6950 sa[m_usize + tail] = 1 + SAINT_MIN;
6951
6952 clamp_lms_suffixes_length_32s_omp(sa, n, m, threads);
6953
6954 let mut name = 1;
6955 if m_usize > 0 {
6956 let mut i = 1usize;
6957 let mut j = m_usize.saturating_sub(64 + 1);
6958 let mut p = sa[0] as usize;
6959 let mut plen = sa[m_usize + (p >> 1)];
6960 let mut pdiff = SAINT_MIN;
6961
6962 while i < j {
6963 let q = sa[i] as usize;
6964 let qlen = sa[m_usize + (q >> 1)];
6965 let mut qdiff = SAINT_MIN;
6966 if plen == qlen {
6967 let mut l = 0;
6968 while l < qlen as usize {
6969 if t[p + l] != t[q + l] {
6970 break;
6971 }
6972 l += 1;
6973 }
6974 qdiff = ((l as SaSint) - qlen) & SAINT_MIN;
6975 }
6976 sa[m_usize + (p >> 1)] = name | (pdiff & qdiff);
6977 name += SaSint::from(qdiff < 0);
6978
6979 p = sa[i + 1] as usize;
6980 plen = sa[m_usize + (p >> 1)];
6981 pdiff = SAINT_MIN;
6982 if qlen == plen {
6983 let mut l = 0;
6984 while l < plen as usize {
6985 if t[q + l] != t[p + l] {
6986 break;
6987 }
6988 l += 1;
6989 }
6990 pdiff = ((l as SaSint) - plen) & SAINT_MIN;
6991 }
6992 sa[m_usize + (q >> 1)] = name | (qdiff & pdiff);
6993 name += SaSint::from(pdiff < 0);
6994 i += 2;
6995 }
6996
6997 j = m_usize;
6998 while i < j {
6999 let q = sa[i] as usize;
7000 let qlen = sa[m_usize + (q >> 1)];
7001 let mut qdiff = SAINT_MIN;
7002 if plen == qlen {
7003 let mut l = 0;
7004 while l < plen as usize {
7005 if t[p + l] != t[q + l] {
7006 break;
7007 }
7008 l += 1;
7009 }
7010 qdiff = ((l as SaSint) - plen) & SAINT_MIN;
7011 }
7012 sa[m_usize + (p >> 1)] = name | (pdiff & qdiff);
7013 name += SaSint::from(qdiff < 0);
7014 p = q;
7015 plen = qlen;
7016 pdiff = qdiff;
7017 i += 1;
7018 }
7019
7020 sa[m_usize + (p >> 1)] = name | pdiff;
7021 name += 1;
7022 }
7023
7024 if name <= m {
7025 mark_distinct_lms_suffixes_32s_omp(sa, n, m, threads);
7026 }
7027
7028 name - 1
7029}
7030
7031#[allow(dead_code)]
7032fn renumber_unique_and_nonunique_lms_suffixes_32s(
7033 t: &mut [SaSint],
7034 sa: &mut [SaSint],
7035 m: SaSint,
7036 mut f: SaSint,
7037 omp_block_start: isize,
7038 omp_block_size: isize,
7039) -> SaSint {
7040 if omp_block_size <= 0 {
7041 return f;
7042 }
7043
7044 let m_usize = m as usize;
7045 let (sa_head, sam) = sa.split_at_mut(m_usize);
7046 let mut i = omp_block_start;
7047 let mut j = omp_block_start + omp_block_size - 128 - 3;
7048 while i < j {
7049 for offset in 0..4 {
7050 let idx = (i + offset) as usize;
7051 let p = sa_head[idx] as SaUint;
7052 let mut s = sam[(p >> 1) as usize];
7053 if s < 0 {
7054 t[p as usize] |= SAINT_MIN;
7055 f += 1;
7056 s = i as SaSint + offset as SaSint + SAINT_MIN + f;
7057 }
7058 sam[(p >> 1) as usize] = s - f;
7059 }
7060 i += 4;
7061 }
7062
7063 j += 128 + 3;
7064 while i < j {
7065 let p = sa_head[i as usize] as SaUint;
7066 let mut s = sam[(p >> 1) as usize];
7067 if s < 0 {
7068 t[p as usize] |= SAINT_MIN;
7069 f += 1;
7070 s = i as SaSint + SAINT_MIN + f;
7071 }
7072 sam[(p >> 1) as usize] = s - f;
7073 i += 1;
7074 }
7075
7076 f
7077}
7078
7079#[allow(dead_code)]
7080fn compact_unique_and_nonunique_lms_suffixes_32s(
7081 sa: &mut [SaSint],
7082 m: SaSint,
7083 pl: &mut isize,
7084 pr: &mut isize,
7085 omp_block_start: isize,
7086 omp_block_size: isize,
7087) {
7088 if omp_block_size <= 0 {
7089 return;
7090 }
7091
7092 let m_usize = m as usize;
7093 let source: Vec<SaSint> = sa
7094 [m_usize + omp_block_start as usize..m_usize + (omp_block_start + omp_block_size) as usize]
7095 .to_vec();
7096 let mut l = *pl - 1;
7097 let mut r = *pr - 1;
7098
7099 for &p in source.iter().rev() {
7100 sa[l as usize] = p & SAINT_MAX;
7101 l -= isize::from(p < 0);
7102
7103 sa[r as usize] = p.wrapping_sub(1);
7104 r -= isize::from(p > 0);
7105 }
7106
7107 *pl = l + 1;
7108 *pr = r + 1;
7109}
7110
7111#[allow(dead_code)]
7112fn count_unique_suffixes(
7113 sa: &[SaSint],
7114 m: SaSint,
7115 omp_block_start: isize,
7116 omp_block_size: isize,
7117) -> SaSint {
7118 let base = m as usize;
7119 let start = omp_block_start as usize;
7120 let end = start + omp_block_size as usize;
7121 let mut count = 0;
7122 for i in start..end {
7123 count += SaSint::from(sa[base + ((sa[i] as SaUint) >> 1) as usize] < 0);
7124 }
7125 count
7126}
7127
7128#[allow(dead_code)]
7129fn renumber_unique_and_nonunique_lms_suffixes_32s_omp(
7130 t: &mut [SaSint],
7131 sa: &mut [SaSint],
7132 m: SaSint,
7133 threads: SaSint,
7134) -> SaSint {
7135 if threads == 1 || m < 65_536 {
7136 return renumber_unique_and_nonunique_lms_suffixes_32s(t, sa, m, 0, 0, m as isize);
7137 }
7138
7139 let thread_count = threads as usize;
7140 let block_stride = (m / threads) & !15;
7141 let mut counts = vec![0; thread_count];
7142
7143 for thread in 0..thread_count {
7144 let block_start = thread as SaSint * block_stride;
7145 let block_size = if thread + 1 < thread_count {
7146 block_stride
7147 } else {
7148 m - block_start
7149 };
7150 counts[thread] = count_unique_suffixes(sa, m, block_start as isize, block_size as isize);
7151 }
7152
7153 let mut f = 0;
7154 for thread in 0..thread_count {
7155 let block_start = thread as SaSint * block_stride;
7156 let block_size = if thread + 1 < thread_count {
7157 block_stride
7158 } else {
7159 m - block_start
7160 };
7161 renumber_unique_and_nonunique_lms_suffixes_32s(
7162 t,
7163 sa,
7164 m,
7165 f,
7166 block_start as isize,
7167 block_size as isize,
7168 );
7169 f += counts[thread];
7170 }
7171
7172 f
7173}
7174
7175#[allow(dead_code)]
7176fn compact_unique_and_nonunique_lms_suffixes_32s_omp(
7177 sa: &mut [SaSint],
7178 n: SaSint,
7179 m: SaSint,
7180 fs: SaSint,
7181 f: SaSint,
7182 threads: SaSint,
7183) {
7184 let half_n = n >> 1;
7185 if threads == 1 || n < 131_072 || m >= fs {
7186 let mut l = m as isize;
7187 let mut r = (n + fs) as isize;
7188 compact_unique_and_nonunique_lms_suffixes_32s(sa, m, &mut l, &mut r, 0, half_n as isize);
7189 } else {
7190 let thread_count = threads as usize;
7191 let block_stride = (half_n / threads) & !15;
7192 let mut positions = vec![0isize; thread_count];
7193 let mut counts = vec![0isize; thread_count];
7194
7195 for thread in 0..thread_count {
7196 let block_start = thread as SaSint * block_stride;
7197 let block_size = if thread + 1 < thread_count {
7198 block_stride
7199 } else {
7200 half_n - block_start
7201 };
7202 let mut position = (m + half_n + block_start + block_size) as isize;
7203 let mut count = (m + block_start + block_size) as isize;
7204 compact_unique_and_nonunique_lms_suffixes_32s(
7205 sa,
7206 m,
7207 &mut position,
7208 &mut count,
7209 block_start as isize,
7210 block_size as isize,
7211 );
7212 positions[thread] = position;
7213 counts[thread] = count;
7214 }
7215
7216 let mut position = m as isize;
7217 for thread in (0..thread_count).rev() {
7218 let block_end = if thread + 1 < thread_count {
7219 block_stride * (thread as SaSint + 1)
7220 } else {
7221 half_n
7222 };
7223 let count = (m + half_n + block_end) as isize - positions[thread];
7224 if count > 0 {
7225 position -= count;
7226 let src = positions[thread] as usize;
7227 let dst = position as usize;
7228 sa.copy_within(src..src + count as usize, dst);
7229 }
7230 }
7231
7232 let mut position = (n + fs) as isize;
7233 for thread in (0..thread_count).rev() {
7234 let block_end = if thread + 1 < thread_count {
7235 block_stride * (thread as SaSint + 1)
7236 } else {
7237 half_n
7238 };
7239 let count = (m + block_end) as isize - counts[thread];
7240 if count > 0 {
7241 position -= count;
7242 let src = counts[thread] as usize;
7243 let dst = position as usize;
7244 sa.copy_within(src..src + count as usize, dst);
7245 }
7246 }
7247 }
7248
7249 let dst = (n + fs - m) as usize;
7250 let src = (m - f) as usize;
7251 sa.copy_within(src..src + f as usize, dst);
7252}
7253
7254#[allow(dead_code)]
7255fn compact_lms_suffixes_32s_omp(
7256 t: &mut [SaSint],
7257 sa: &mut [SaSint],
7258 n: SaSint,
7259 m: SaSint,
7260 fs: SaSint,
7261 threads: SaSint,
7262) -> SaSint {
7263 let f = renumber_unique_and_nonunique_lms_suffixes_32s_omp(t, sa, m, threads);
7264 compact_unique_and_nonunique_lms_suffixes_32s_omp(sa, n, m, fs, f, threads);
7265 f
7266}
7267
7268#[allow(dead_code)]
7269fn merge_unique_lms_suffixes_32s(
7270 t: &mut [SaSint],
7271 sa: &mut [SaSint],
7272 n: SaSint,
7273 m: SaSint,
7274 l: isize,
7275 omp_block_start: isize,
7276 omp_block_size: isize,
7277) {
7278 let mut src_index = (n as isize - m as isize - 1 + l) as usize;
7279 let mut tmp = sa[src_index] as isize;
7280 src_index += 1;
7281
7282 let mut i = omp_block_start;
7283 let mut j = omp_block_start + omp_block_size - 6;
7284 while i < j {
7285 let iu = i as usize;
7286
7287 let c0 = t[iu];
7288 if c0 < 0 {
7289 t[iu] = c0 & SAINT_MAX;
7290 sa[tmp as usize] = i as SaSint;
7291 i += 1;
7292 tmp = sa[src_index] as isize;
7293 src_index += 1;
7294 }
7295
7296 let c1 = t[(i + 1) as usize];
7297 if c1 < 0 {
7298 t[(i + 1) as usize] = c1 & SAINT_MAX;
7299 sa[tmp as usize] = i as SaSint + 1;
7300 i += 1;
7301 tmp = sa[src_index] as isize;
7302 src_index += 1;
7303 }
7304
7305 let c2 = t[(i + 2) as usize];
7306 if c2 < 0 {
7307 t[(i + 2) as usize] = c2 & SAINT_MAX;
7308 sa[tmp as usize] = i as SaSint + 2;
7309 i += 1;
7310 tmp = sa[src_index] as isize;
7311 src_index += 1;
7312 }
7313
7314 let c3 = t[(i + 3) as usize];
7315 if c3 < 0 {
7316 t[(i + 3) as usize] = c3 & SAINT_MAX;
7317 sa[tmp as usize] = i as SaSint + 3;
7318 i += 1;
7319 tmp = sa[src_index] as isize;
7320 src_index += 1;
7321 }
7322
7323 i += 4;
7324 }
7325
7326 j += 6;
7327 while i < j {
7328 let c = t[i as usize];
7329 if c < 0 {
7330 t[i as usize] = c & SAINT_MAX;
7331 sa[tmp as usize] = i as SaSint;
7332 i += 1;
7333 tmp = sa[src_index] as isize;
7334 src_index += 1;
7335 }
7336 i += 1;
7337 }
7338}
7339
7340#[allow(dead_code)]
7341fn merge_nonunique_lms_suffixes_32s(
7342 sa: &mut [SaSint],
7343 n: SaSint,
7344 m: SaSint,
7345 l: isize,
7346 omp_block_start: isize,
7347 omp_block_size: isize,
7348) {
7349 let mut src_index = (n as isize - m as isize - 1 + l) as usize;
7350 let mut tmp = sa[src_index];
7351 src_index += 1;
7352
7353 let mut i = omp_block_start;
7354 let mut j = omp_block_start + omp_block_size - 3;
7355 while i < j {
7356 if sa[i as usize] == 0 {
7357 sa[i as usize] = tmp;
7358 tmp = sa[src_index];
7359 src_index += 1;
7360 }
7361 if sa[(i + 1) as usize] == 0 {
7362 sa[(i + 1) as usize] = tmp;
7363 tmp = sa[src_index];
7364 src_index += 1;
7365 }
7366 if sa[(i + 2) as usize] == 0 {
7367 sa[(i + 2) as usize] = tmp;
7368 tmp = sa[src_index];
7369 src_index += 1;
7370 }
7371 if sa[(i + 3) as usize] == 0 {
7372 sa[(i + 3) as usize] = tmp;
7373 tmp = sa[src_index];
7374 src_index += 1;
7375 }
7376 i += 4;
7377 }
7378
7379 j += 3;
7380 while i < j {
7381 if sa[i as usize] == 0 {
7382 sa[i as usize] = tmp;
7383 tmp = sa[src_index];
7384 src_index += 1;
7385 }
7386 i += 1;
7387 }
7388}
7389
7390#[allow(dead_code)]
7391fn merge_unique_lms_suffixes_32s_omp(
7392 t: &mut [SaSint],
7393 sa: &mut [SaSint],
7394 n: SaSint,
7395 m: SaSint,
7396 threads: SaSint,
7397) {
7398 if threads == 1 || n < 65_536 {
7399 merge_unique_lms_suffixes_32s(t, sa, n, m, 0, 0, n as isize);
7400 return;
7401 }
7402
7403 let thread_count = threads as usize;
7404 let block_stride = (n / threads) & !15;
7405 let mut counts = vec![0; thread_count];
7406
7407 for thread in 0..thread_count {
7408 let block_start = thread as SaSint * block_stride;
7409 let block_size = if thread + 1 < thread_count {
7410 block_stride
7411 } else {
7412 n - block_start
7413 };
7414 counts[thread] = count_negative_marked_suffixes(t, block_start, block_size);
7415 }
7416
7417 let mut count = 0;
7418 for thread in 0..thread_count {
7419 let block_start = thread as SaSint * block_stride;
7420 let block_size = if thread + 1 < thread_count {
7421 block_stride
7422 } else {
7423 n - block_start
7424 };
7425 merge_unique_lms_suffixes_32s(
7426 t,
7427 sa,
7428 n,
7429 m,
7430 count as isize,
7431 block_start as isize,
7432 block_size as isize,
7433 );
7434 count += counts[thread];
7435 }
7436}
7437
7438#[allow(dead_code)]
7439fn merge_nonunique_lms_suffixes_32s_omp(
7440 sa: &mut [SaSint],
7441 n: SaSint,
7442 m: SaSint,
7443 f: SaSint,
7444 threads: SaSint,
7445) {
7446 if threads == 1 || m < 65_536 {
7447 merge_nonunique_lms_suffixes_32s(sa, n, m, f as isize, 0, m as isize);
7448 return;
7449 }
7450
7451 let thread_count = threads as usize;
7452 let block_stride = (m / threads) & !15;
7453 let mut counts = vec![0; thread_count];
7454
7455 for thread in 0..thread_count {
7456 let block_start = thread as SaSint * block_stride;
7457 let block_size = if thread + 1 < thread_count {
7458 block_stride
7459 } else {
7460 m - block_start
7461 };
7462 counts[thread] = count_zero_marked_suffixes(sa, block_start, block_size);
7463 }
7464
7465 let mut count = f;
7466 for thread in 0..thread_count {
7467 let block_start = thread as SaSint * block_stride;
7468 let block_size = if thread + 1 < thread_count {
7469 block_stride
7470 } else {
7471 m - block_start
7472 };
7473 merge_nonunique_lms_suffixes_32s(
7474 sa,
7475 n,
7476 m,
7477 count as isize,
7478 block_start as isize,
7479 block_size as isize,
7480 );
7481 count += counts[thread];
7482 }
7483}
7484
7485#[allow(dead_code)]
7486fn merge_compacted_lms_suffixes_32s_omp(
7487 t: &mut [SaSint],
7488 sa: &mut [SaSint],
7489 n: SaSint,
7490 m: SaSint,
7491 f: SaSint,
7492 threads: SaSint,
7493) {
7494 merge_unique_lms_suffixes_32s_omp(t, sa, n, m, threads);
7495 merge_nonunique_lms_suffixes_32s_omp(sa, n, m, f, threads);
7496}
7497
7498#[allow(dead_code)]
7499fn reconstruct_compacted_lms_suffixes_32s_2k_omp(
7500 t: &mut [SaSint],
7501 sa: &mut [SaSint],
7502 n: SaSint,
7503 k: SaSint,
7504 m: SaSint,
7505 fs: SaSint,
7506 f: SaSint,
7507 buckets: &mut [SaSint],
7508 local_buckets: SaSint,
7509 threads: SaSint,
7510 thread_state: &mut [ThreadState],
7511) {
7512 if f > 0 {
7513 let dst = (n - m - 1) as usize;
7514 let src = (n + fs - m) as usize;
7515 sa.copy_within(src..src + f as usize, dst);
7516
7517 count_and_gather_compacted_lms_suffixes_32s_2k_omp(
7518 t,
7519 sa,
7520 n,
7521 k,
7522 buckets,
7523 local_buckets,
7524 threads,
7525 thread_state,
7526 );
7527 reconstruct_lms_suffixes_omp(sa, n, m - f, threads);
7528
7529 let dst = (n - m - 1 + f) as usize;
7530 sa.copy_within(0..(m - f) as usize, dst);
7531 sa[..m as usize].fill(0);
7532
7533 merge_compacted_lms_suffixes_32s_omp(t, sa, n, m, f, threads);
7534 } else {
7535 count_and_gather_lms_suffixes_32s_2k(t, sa, n, k, buckets, 0, n as isize);
7536 reconstruct_lms_suffixes_omp(sa, n, m, threads);
7537 }
7538}
7539
7540#[allow(dead_code)]
7541fn reconstruct_compacted_lms_suffixes_32s_1k_omp(
7542 t: &mut [SaSint],
7543 sa: &mut [SaSint],
7544 n: SaSint,
7545 m: SaSint,
7546 fs: SaSint,
7547 f: SaSint,
7548 threads: SaSint,
7549) {
7550 if f > 0 {
7551 let dst = (n - m - 1) as usize;
7552 let src = (n + fs - m) as usize;
7553 sa.copy_within(src..src + f as usize, dst);
7554
7555 gather_compacted_lms_suffixes_32s(t, sa, n);
7556 reconstruct_lms_suffixes_omp(sa, n, m - f, threads);
7557
7558 let dst = (n - m - 1 + f) as usize;
7559 sa.copy_within(0..(m - f) as usize, dst);
7560 sa[..m as usize].fill(0);
7561
7562 merge_compacted_lms_suffixes_32s_omp(t, sa, n, m, f, threads);
7563 } else {
7564 gather_lms_suffixes_32s(t, sa, n);
7565 reconstruct_lms_suffixes_omp(sa, n, m, threads);
7566 }
7567}
7568
7569#[allow(dead_code)]
7570fn place_lms_suffixes_interval_16u(
7571 sa: &mut [SaSint],
7572 n: SaSint,
7573 mut m: SaSint,
7574 flags: SaSint,
7575 buckets: &mut [SaSint],
7576) {
7577 if (flags & LIBSAIS_FLAGS_GSA) != 0 {
7578 buckets[7 * ALPHABET_SIZE] -= 1;
7579 }
7580
7581 let mut j = n as isize;
7582 let mut c = ALPHABET_SIZE as isize - 2;
7583 while c >= 0 {
7584 let ci = c as usize;
7585 let l =
7586 buckets[buckets_index2(ci, 1) + buckets_index2(1, 0)] - buckets[buckets_index2(ci, 1)];
7587 if l > 0 {
7588 let i = buckets[7 * ALPHABET_SIZE + ci] as isize;
7589 if j - i > 0 {
7590 sa[i as usize..j as usize].fill(0);
7591 }
7592
7593 m -= l;
7594 j = i - l as isize;
7595 let src = m as usize;
7596 let dst = j as usize;
7597 sa.copy_within(src..src + l as usize, dst);
7598 }
7599 c -= 1;
7600 }
7601
7602 sa[..j as usize].fill(0);
7603
7604 if (flags & LIBSAIS_FLAGS_GSA) != 0 {
7605 buckets[7 * ALPHABET_SIZE] += 1;
7606 }
7607}
7608
7609#[allow(dead_code)]
7610fn place_lms_suffixes_interval_32s_4k(
7611 sa: &mut [SaSint],
7612 n: SaSint,
7613 k: SaSint,
7614 mut m: SaSint,
7615 buckets: &[SaSint],
7616) {
7617 let bucket_end = &buckets[3 * k as usize..4 * k as usize];
7618 let mut j = n as usize;
7619 let mut c = k - 2;
7620 while c >= 0 {
7621 let cu = c as usize;
7622 let l =
7623 buckets[buckets_index2(cu, 1) + buckets_index2(1, 0)] - buckets[buckets_index2(cu, 1)];
7624 if l > 0 {
7625 let i = bucket_end[cu] as usize;
7626 if j > i {
7627 sa[i..j].fill(0);
7628 }
7629
7630 m -= l;
7631 let dst = i - l as usize;
7632 sa.copy_within(m as usize..m as usize + l as usize, dst);
7633 j = dst;
7634 }
7635 c -= 1;
7636 }
7637
7638 sa[..j].fill(0);
7639}
7640
7641#[allow(dead_code)]
7642fn place_lms_suffixes_interval_32s_2k(
7643 sa: &mut [SaSint],
7644 n: SaSint,
7645 k: SaSint,
7646 mut m: SaSint,
7647 buckets: &[SaSint],
7648) {
7649 let mut j = n as usize;
7650 if k > 1 {
7651 let mut c = buckets_index2(k as usize - 2, 0) as isize;
7652 while c >= buckets_index2(0, 0) as isize {
7653 let cu = c as usize;
7654 let l = buckets[cu + buckets_index2(1, 1)] - buckets[cu + buckets_index2(0, 1)];
7655 if l > 0 {
7656 let i = buckets[cu] as usize;
7657 if j > i {
7658 sa[i..j].fill(0);
7659 }
7660
7661 m -= l;
7662 let dst = i - l as usize;
7663 sa.copy_within(m as usize..m as usize + l as usize, dst);
7664 j = dst;
7665 }
7666 c -= buckets_index2(1, 0) as isize;
7667 }
7668 }
7669
7670 sa[..j].fill(0);
7671}
7672
7673#[allow(dead_code)]
7674fn place_lms_suffixes_interval_32s_1k(
7675 t: &[SaSint],
7676 sa: &mut [SaSint],
7677 k: SaSint,
7678 m: SaSint,
7679 buckets: &[SaSint],
7680) {
7681 let mut c = k - 1;
7682 let mut l = buckets[c as usize] as usize;
7683
7684 let mut i = m - 1;
7685 while i >= 0 {
7686 let p = sa[i as usize] as usize;
7687 if t[p] != c {
7688 c = t[p];
7689 let bucket_pos = buckets[c as usize] as usize;
7690 if l > bucket_pos {
7691 sa[bucket_pos..l].fill(0);
7692 }
7693 l = bucket_pos;
7694 }
7695 l -= 1;
7696 sa[l] = p as SaSint;
7697 i -= 1;
7698 }
7699
7700 sa[..l].fill(0);
7701}
7702
7703#[allow(dead_code)]
7704fn place_lms_suffixes_histogram_32s_6k(
7705 sa: &mut [SaSint],
7706 n: SaSint,
7707 k: SaSint,
7708 mut m: SaSint,
7709 buckets: &[SaSint],
7710) {
7711 let bucket_end = &buckets[5 * k as usize..6 * k as usize];
7712 let mut j = n as usize;
7713 let mut c = k - 2;
7714 while c >= 0 {
7715 let l = buckets[buckets_index4(c as usize, 1)] as usize;
7716 if l > 0 {
7717 let i = bucket_end[c as usize] as usize;
7718 if j > i {
7719 sa[i..j].fill(0);
7720 }
7721 let dst = i - l;
7722 m -= l as SaSint;
7723 sa.copy_within(m as usize..m as usize + l, dst);
7724 j = dst;
7725 }
7726 c -= 1;
7727 }
7728 sa[..j].fill(0);
7729}
7730
7731#[allow(dead_code)]
7732fn place_lms_suffixes_histogram_32s_4k(
7733 sa: &mut [SaSint],
7734 n: SaSint,
7735 k: SaSint,
7736 mut m: SaSint,
7737 buckets: &[SaSint],
7738) {
7739 let bucket_end = &buckets[3 * k as usize..4 * k as usize];
7740 let mut j = n as usize;
7741 let mut c = k - 2;
7742 while c >= 0 {
7743 let l = buckets[buckets_index2(c as usize, 1)] as usize;
7744 if l > 0 {
7745 let i = bucket_end[c as usize] as usize;
7746 if j > i {
7747 sa[i..j].fill(0);
7748 }
7749 let dst = i - l;
7750 m -= l as SaSint;
7751 sa.copy_within(m as usize..m as usize + l, dst);
7752 j = dst;
7753 }
7754 c -= 1;
7755 }
7756 sa[..j].fill(0);
7757}
7758
7759#[allow(dead_code)]
7760fn place_lms_suffixes_histogram_32s_2k(
7761 sa: &mut [SaSint],
7762 n: SaSint,
7763 k: SaSint,
7764 mut m: SaSint,
7765 buckets: &[SaSint],
7766) {
7767 let mut j = n as usize;
7768 if k > 1 {
7769 let mut c = buckets_index2(k as usize - 2, 0) as isize;
7770 while c >= buckets_index2(0, 0) as isize {
7771 let cu = c as usize;
7772 let l = buckets[cu + buckets_index2(0, 1)] as usize;
7773 if l > 0 {
7774 let i = buckets[cu] as usize;
7775 if j > i {
7776 sa[i..j].fill(0);
7777 }
7778 let dst = i - l;
7779 m -= l as SaSint;
7780 sa.copy_within(m as usize..m as usize + l, dst);
7781 j = dst;
7782 }
7783 c -= buckets_index2(1, 0) as isize;
7784 }
7785 }
7786 sa[..j].fill(0);
7787}
7788
7789#[allow(dead_code)]
7790fn final_bwt_scan_left_to_right_16u_block_prepare(
7791 t: &[u16],
7792 sa: &mut [SaSint],
7793 k: SaSint,
7794 buckets: &mut [SaSint],
7795 cache: &mut [ThreadCache],
7796 omp_block_start: SaSint,
7797 omp_block_size: SaSint,
7798) -> SaSint {
7799 buckets[..k as usize].fill(0);
7800 let mut count = 0usize;
7801 for i in omp_block_start as usize..(omp_block_start + omp_block_size) as usize {
7802 let mut p = sa[i];
7803 sa[i] = p & SAINT_MAX;
7804 if p > 0 {
7805 p -= 1;
7806 let c = t[p as usize] as usize;
7807 sa[i] = c as SaSint | SAINT_MIN;
7808 buckets[c] += 1;
7809 cache[count].symbol = c as SaSint;
7810 cache[count].index = p
7811 | ((usize::from(t[(p - SaSint::from(p > 0)) as usize] < t[p as usize]) as SaSint)
7812 << (SAINT_BIT - 1));
7813 count += 1;
7814 }
7815 }
7816 count as SaSint
7817}
7818
7819#[allow(dead_code)]
7820fn final_sorting_scan_left_to_right_16u_block_prepare(
7821 t: &[u16],
7822 sa: &mut [SaSint],
7823 k: SaSint,
7824 buckets: &mut [SaSint],
7825 cache: &mut [ThreadCache],
7826 omp_block_start: SaSint,
7827 omp_block_size: SaSint,
7828) -> SaSint {
7829 buckets[..k as usize].fill(0);
7830 let mut count = 0usize;
7831 for i in omp_block_start as usize..(omp_block_start + omp_block_size) as usize {
7832 let mut p = sa[i];
7833 sa[i] = p ^ SAINT_MIN;
7834 if p > 0 {
7835 p -= 1;
7836 let c = t[p as usize] as usize;
7837 buckets[c] += 1;
7838 cache[count].symbol = c as SaSint;
7839 cache[count].index = p
7840 | ((usize::from(t[(p - SaSint::from(p > 0)) as usize] < t[p as usize]) as SaSint)
7841 << (SAINT_BIT - 1));
7842 count += 1;
7843 }
7844 }
7845 count as SaSint
7846}
7847
7848#[allow(dead_code)]
7849fn final_order_scan_left_to_right_16u_block_place(
7850 sa: &mut [SaSint],
7851 buckets: &mut [SaSint],
7852 cache: &[ThreadCache],
7853 count: SaSint,
7854) {
7855 for entry in cache.iter().take(count as usize) {
7856 let c = entry.symbol as usize;
7857 let dst = buckets[c] as usize;
7858 sa[dst] = entry.index;
7859 buckets[c] += 1;
7860 }
7861}
7862
7863#[allow(dead_code)]
7864fn final_bwt_aux_scan_left_to_right_16u_block_place(
7865 sa: &mut [SaSint],
7866 rm: SaSint,
7867 i_sample: &mut [SaSint],
7868 buckets: &mut [SaSint],
7869 cache: &[ThreadCache],
7870 count: SaSint,
7871) {
7872 for entry in cache.iter().take(count as usize) {
7873 let c = entry.symbol as usize;
7874 let dst = buckets[c] as usize;
7875 sa[dst] = entry.index;
7876 buckets[c] += 1;
7877 let p = entry.index & SAINT_MAX;
7878 if (p & rm) == 0 {
7879 i_sample[(p / (rm + 1)) as usize] = buckets[c];
7880 }
7881 }
7882}
7883
7884#[allow(dead_code)]
7885fn final_bwt_scan_left_to_right_16u_block_omp(
7886 t: &[u16],
7887 sa: &mut [SaSint],
7888 k: SaSint,
7889 induction_bucket: &mut [SaSint],
7890 block_start: SaSint,
7891 block_size: SaSint,
7892 threads: SaSint,
7893 thread_state: &mut [ThreadState],
7894) {
7895 let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
7896 usize::try_from(threads)
7897 .expect("threads must be non-negative")
7898 .min(thread_state.len())
7899 } else {
7900 1
7901 };
7902 if thread_count <= 1 {
7903 final_bwt_scan_left_to_right_16u(t, sa, induction_bucket, block_start, block_size);
7904 return;
7905 }
7906
7907 let k_usize = usize::try_from(k).expect("k must be non-negative");
7908 let block_stride = (block_size / thread_count as SaSint) & !15;
7909
7910 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
7911 let local_start = thread as SaSint * block_stride;
7912 let local_size = if thread + 1 < thread_count {
7913 block_stride
7914 } else {
7915 block_size - local_start
7916 };
7917 state.count = final_bwt_scan_left_to_right_16u_block_prepare(
7918 t,
7919 sa,
7920 k,
7921 &mut state.buckets[..k_usize],
7922 &mut state.cache,
7923 block_start + local_start,
7924 local_size,
7925 );
7926 }
7927
7928 for state in thread_state.iter_mut().take(thread_count) {
7929 for c in 0..k_usize {
7930 let a = induction_bucket[c];
7931 let b = state.buckets[c];
7932 induction_bucket[c] = a + b;
7933 state.buckets[c] = a;
7934 }
7935 }
7936
7937 for state in thread_state.iter_mut().take(thread_count) {
7938 final_order_scan_left_to_right_16u_block_place(
7939 sa,
7940 &mut state.buckets[..k_usize],
7941 &state.cache,
7942 state.count,
7943 );
7944 }
7945}
7946
7947#[allow(dead_code)]
7948fn final_bwt_aux_scan_left_to_right_16u_block_omp(
7949 t: &[u16],
7950 sa: &mut [SaSint],
7951 k: SaSint,
7952 rm: SaSint,
7953 i_sample: &mut [SaSint],
7954 induction_bucket: &mut [SaSint],
7955 block_start: SaSint,
7956 block_size: SaSint,
7957 threads: SaSint,
7958 thread_state: &mut [ThreadState],
7959) {
7960 let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
7961 usize::try_from(threads)
7962 .expect("threads must be non-negative")
7963 .min(thread_state.len())
7964 } else {
7965 1
7966 };
7967 if thread_count <= 1 {
7968 final_bwt_aux_scan_left_to_right_16u(
7969 t,
7970 sa,
7971 rm,
7972 i_sample,
7973 induction_bucket,
7974 block_start,
7975 block_size,
7976 );
7977 return;
7978 }
7979
7980 let k_usize = usize::try_from(k).expect("k must be non-negative");
7981 let block_stride = (block_size / thread_count as SaSint) & !15;
7982
7983 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
7984 let local_start = thread as SaSint * block_stride;
7985 let local_size = if thread + 1 < thread_count {
7986 block_stride
7987 } else {
7988 block_size - local_start
7989 };
7990 state.count = final_bwt_scan_left_to_right_16u_block_prepare(
7991 t,
7992 sa,
7993 k,
7994 &mut state.buckets[..k_usize],
7995 &mut state.cache,
7996 block_start + local_start,
7997 local_size,
7998 );
7999 }
8000
8001 for state in thread_state.iter_mut().take(thread_count) {
8002 for c in 0..k_usize {
8003 let a = induction_bucket[c];
8004 let b = state.buckets[c];
8005 induction_bucket[c] = a + b;
8006 state.buckets[c] = a;
8007 }
8008 }
8009
8010 for state in thread_state.iter_mut().take(thread_count) {
8011 final_bwt_aux_scan_left_to_right_16u_block_place(
8012 sa,
8013 rm,
8014 i_sample,
8015 &mut state.buckets[..k_usize],
8016 &state.cache,
8017 state.count,
8018 );
8019 }
8020}
8021
8022#[allow(dead_code)]
8023fn final_sorting_scan_left_to_right_16u_block_omp(
8024 t: &[u16],
8025 sa: &mut [SaSint],
8026 k: SaSint,
8027 induction_bucket: &mut [SaSint],
8028 block_start: SaSint,
8029 block_size: SaSint,
8030 threads: SaSint,
8031 thread_state: &mut [ThreadState],
8032) {
8033 let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
8034 usize::try_from(threads)
8035 .expect("threads must be non-negative")
8036 .min(thread_state.len())
8037 } else {
8038 1
8039 };
8040 if thread_count <= 1 {
8041 final_sorting_scan_left_to_right_16u(t, sa, induction_bucket, block_start, block_size);
8042 return;
8043 }
8044
8045 let k_usize = usize::try_from(k).expect("k must be non-negative");
8046 let block_stride = (block_size / thread_count as SaSint) & !15;
8047
8048 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
8049 let local_start = thread as SaSint * block_stride;
8050 let local_size = if thread + 1 < thread_count {
8051 block_stride
8052 } else {
8053 block_size - local_start
8054 };
8055 state.count = final_sorting_scan_left_to_right_16u_block_prepare(
8056 t,
8057 sa,
8058 k,
8059 &mut state.buckets[..k_usize],
8060 &mut state.cache,
8061 block_start + local_start,
8062 local_size,
8063 );
8064 }
8065
8066 for state in thread_state.iter_mut().take(thread_count) {
8067 for c in 0..k_usize {
8068 let a = induction_bucket[c];
8069 let b = state.buckets[c];
8070 induction_bucket[c] = a + b;
8071 state.buckets[c] = a;
8072 }
8073 }
8074
8075 for state in thread_state.iter_mut().take(thread_count) {
8076 final_order_scan_left_to_right_16u_block_place(
8077 sa,
8078 &mut state.buckets[..k_usize],
8079 &state.cache,
8080 state.count,
8081 );
8082 }
8083}
8084
8085#[allow(dead_code)]
8086fn final_bwt_scan_left_to_right_16u_omp(
8087 t: &[u16],
8088 sa: &mut [SaSint],
8089 n: SaSint,
8090 k: SaSint,
8091 induction_bucket: &mut [SaSint],
8092 threads: SaSint,
8093) {
8094 let c = t[(n - 1) as usize] as usize;
8095 let dst = induction_bucket[c] as usize;
8096 induction_bucket[c] += 1;
8097 let mark = if t[(n - 2) as usize] < t[(n - 1) as usize] {
8098 SAINT_MIN
8099 } else {
8100 0
8101 };
8102 sa[dst] = (n - 1) | mark;
8103
8104 if threads == 1 || n < 65536 {
8105 final_bwt_scan_left_to_right_16u(t, sa, induction_bucket, 0, n);
8106 } else {
8107 let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
8108 let mut block_start = 0;
8109 while block_start < n {
8110 if sa[block_start as usize] == 0 {
8111 block_start += 1;
8112 } else {
8113 let mut block_end =
8114 block_start + threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
8115 if block_end > n {
8116 block_end = n;
8117 }
8118 let mut block_scan_end = block_start + 1;
8119 while block_scan_end < block_end && sa[block_scan_end as usize] != 0 {
8120 block_scan_end += 1;
8121 }
8122 let block_size = block_scan_end - block_start;
8123 if block_size < 32 {
8124 while block_start < block_scan_end {
8125 let mut p = sa[block_start as usize];
8126 sa[block_start as usize] = p & SAINT_MAX;
8127 if p > 0 {
8128 p -= 1;
8129 let c = t[p as usize] as usize;
8130 sa[block_start as usize] = c as SaSint | SAINT_MIN;
8131 let dst = induction_bucket[c] as usize;
8132 induction_bucket[c] += 1;
8133 let mark = if t[(p - SaSint::from(p > 0)) as usize] < t[p as usize] {
8134 SAINT_MIN
8135 } else {
8136 0
8137 };
8138 sa[dst] = p | mark;
8139 }
8140 block_start += 1;
8141 }
8142 } else {
8143 final_bwt_scan_left_to_right_16u_block_omp(
8144 t,
8145 sa,
8146 k,
8147 induction_bucket,
8148 block_start,
8149 block_size,
8150 threads,
8151 &mut thread_state,
8152 );
8153 block_start = block_scan_end;
8154 }
8155 }
8156 }
8157 }
8158}
8159
8160#[allow(dead_code)]
8161fn final_bwt_aux_scan_left_to_right_16u_omp(
8162 t: &[u16],
8163 sa: &mut [SaSint],
8164 n: SaSint,
8165 k: SaSint,
8166 rm: SaSint,
8167 i_sample: &mut [SaSint],
8168 induction_bucket: &mut [SaSint],
8169 threads: SaSint,
8170) {
8171 let c = t[(n - 1) as usize] as usize;
8172 let dst = induction_bucket[c] as usize;
8173 induction_bucket[c] += 1;
8174 let mark = if t[(n - 2) as usize] < t[(n - 1) as usize] {
8175 SAINT_MIN
8176 } else {
8177 0
8178 };
8179 sa[dst] = (n - 1) | mark;
8180
8181 if ((n - 1) & rm) == 0 {
8182 i_sample[((n - 1) / (rm + 1)) as usize] = induction_bucket[c];
8183 }
8184
8185 if threads == 1 || n < 65536 {
8186 final_bwt_aux_scan_left_to_right_16u(t, sa, rm, i_sample, induction_bucket, 0, n);
8187 } else {
8188 let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
8189 let mut block_start = 0;
8190 while block_start < n {
8191 if sa[block_start as usize] == 0 {
8192 block_start += 1;
8193 } else {
8194 let mut block_end =
8195 block_start + threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
8196 if block_end > n {
8197 block_end = n;
8198 }
8199 let mut block_scan_end = block_start + 1;
8200 while block_scan_end < block_end && sa[block_scan_end as usize] != 0 {
8201 block_scan_end += 1;
8202 }
8203 let block_size = block_scan_end - block_start;
8204 if block_size < 32 {
8205 while block_start < block_scan_end {
8206 let mut p = sa[block_start as usize];
8207 sa[block_start as usize] = p & SAINT_MAX;
8208 if p > 0 {
8209 p -= 1;
8210 let c = t[p as usize] as usize;
8211 sa[block_start as usize] = c as SaSint | SAINT_MIN;
8212 let dst = induction_bucket[c] as usize;
8213 induction_bucket[c] += 1;
8214 let mark = if t[(p - SaSint::from(p > 0)) as usize] < t[p as usize] {
8215 SAINT_MIN
8216 } else {
8217 0
8218 };
8219 sa[dst] = p | mark;
8220 if (p & rm) == 0 {
8221 i_sample[(p / (rm + 1)) as usize] = induction_bucket[c];
8222 }
8223 }
8224 block_start += 1;
8225 }
8226 } else {
8227 final_bwt_aux_scan_left_to_right_16u_block_omp(
8228 t,
8229 sa,
8230 k,
8231 rm,
8232 i_sample,
8233 induction_bucket,
8234 block_start,
8235 block_size,
8236 threads,
8237 &mut thread_state,
8238 );
8239 block_start = block_scan_end;
8240 }
8241 }
8242 }
8243 }
8244}
8245
8246#[allow(dead_code)]
8247fn final_sorting_scan_left_to_right_16u_omp(
8248 t: &[u16],
8249 sa: &mut [SaSint],
8250 n: SaSint,
8251 k: SaSint,
8252 induction_bucket: &mut [SaSint],
8253 threads: SaSint,
8254) {
8255 let c = t[(n - 1) as usize] as usize;
8256 let dst = induction_bucket[c] as usize;
8257 induction_bucket[c] += 1;
8258 let mark = if t[(n - 2) as usize] < t[(n - 1) as usize] {
8259 SAINT_MIN
8260 } else {
8261 0
8262 };
8263 sa[dst] = (n - 1) | mark;
8264
8265 if threads == 1 || n < 65536 {
8266 final_sorting_scan_left_to_right_16u(t, sa, induction_bucket, 0, n);
8267 } else {
8268 let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
8269 let mut block_start = 0;
8270 while block_start < n {
8271 if sa[block_start as usize] == 0 {
8272 block_start += 1;
8273 } else {
8274 let mut block_end =
8275 block_start + threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
8276 if block_end > n {
8277 block_end = n;
8278 }
8279 let mut block_scan_end = block_start + 1;
8280 while block_scan_end < block_end && sa[block_scan_end as usize] != 0 {
8281 block_scan_end += 1;
8282 }
8283 let block_size = block_scan_end - block_start;
8284 if block_size < 32 {
8285 while block_start < block_scan_end {
8286 let mut p = sa[block_start as usize];
8287 sa[block_start as usize] = p ^ SAINT_MIN;
8288 if p > 0 {
8289 p -= 1;
8290 let c = t[p as usize] as usize;
8291 let dst = induction_bucket[c] as usize;
8292 induction_bucket[c] += 1;
8293 let mark = if t[(p - SaSint::from(p > 0)) as usize] < t[p as usize] {
8294 SAINT_MIN
8295 } else {
8296 0
8297 };
8298 sa[dst] = p | mark;
8299 }
8300 block_start += 1;
8301 }
8302 } else {
8303 final_sorting_scan_left_to_right_16u_block_omp(
8304 t,
8305 sa,
8306 k,
8307 induction_bucket,
8308 block_start,
8309 block_size,
8310 threads,
8311 &mut thread_state,
8312 );
8313 block_start = block_scan_end;
8314 }
8315 }
8316 }
8317 }
8318}
8319
8320#[allow(dead_code)]
8321fn final_bwt_scan_right_to_left_16u_block_prepare(
8322 t: &[u16],
8323 sa: &mut [SaSint],
8324 k: SaSint,
8325 buckets: &mut [SaSint],
8326 cache: &mut [ThreadCache],
8327 omp_block_start: SaSint,
8328 omp_block_size: SaSint,
8329) -> SaSint {
8330 buckets[..k as usize].fill(0);
8331 let mut count = 0usize;
8332 for i in (omp_block_start as usize..(omp_block_start + omp_block_size) as usize).rev() {
8333 let mut p = sa[i];
8334 sa[i] = p & SAINT_MAX;
8335 if p > 0 {
8336 p -= 1;
8337 let c0 = t[(p - SaSint::from(p > 0)) as usize];
8338 let c1 = t[p as usize];
8339 sa[i] = c1 as SaSint;
8340 buckets[c1 as usize] += 1;
8341 cache[count].symbol = c1 as SaSint;
8342 cache[count].index = if c0 <= c1 {
8343 p
8344 } else {
8345 c0 as SaSint | SAINT_MIN
8346 };
8347 count += 1;
8348 }
8349 }
8350 count as SaSint
8351}
8352
8353#[allow(dead_code)]
8354fn final_bwt_aux_scan_right_to_left_16u_block_prepare(
8355 t: &[u16],
8356 sa: &mut [SaSint],
8357 k: SaSint,
8358 buckets: &mut [SaSint],
8359 cache: &mut [ThreadCache],
8360 omp_block_start: SaSint,
8361 omp_block_size: SaSint,
8362) -> SaSint {
8363 buckets[..k as usize].fill(0);
8364 let mut count = 0usize;
8365 for i in (omp_block_start as usize..(omp_block_start + omp_block_size) as usize).rev() {
8366 let mut p = sa[i];
8367 sa[i] = p & SAINT_MAX;
8368 if p > 0 {
8369 p -= 1;
8370 let c0 = t[(p - SaSint::from(p > 0)) as usize];
8371 let c1 = t[p as usize];
8372 sa[i] = c1 as SaSint;
8373 buckets[c1 as usize] += 1;
8374 cache[count].symbol = c1 as SaSint;
8375 cache[count].index = if c0 <= c1 {
8376 p
8377 } else {
8378 c0 as SaSint | SAINT_MIN
8379 };
8380 cache[count + 1].index = p;
8381 count += 2;
8382 }
8383 }
8384 count as SaSint
8385}
8386
8387#[allow(dead_code)]
8388fn final_sorting_scan_right_to_left_16u_block_prepare(
8389 t: &[u16],
8390 sa: &mut [SaSint],
8391 k: SaSint,
8392 buckets: &mut [SaSint],
8393 cache: &mut [ThreadCache],
8394 omp_block_start: SaSint,
8395 omp_block_size: SaSint,
8396) -> SaSint {
8397 buckets[..k as usize].fill(0);
8398 let mut count = 0usize;
8399 for i in (omp_block_start as usize..(omp_block_start + omp_block_size) as usize).rev() {
8400 let mut p = sa[i];
8401 sa[i] = p & SAINT_MAX;
8402 if p > 0 {
8403 p -= 1;
8404 let c = t[p as usize] as usize;
8405 buckets[c] += 1;
8406 cache[count].symbol = c as SaSint;
8407 cache[count].index = p
8408 | ((usize::from(t[(p - SaSint::from(p > 0)) as usize] > t[p as usize]) as SaSint)
8409 << (SAINT_BIT - 1));
8410 count += 1;
8411 }
8412 }
8413 count as SaSint
8414}
8415
8416#[allow(dead_code)]
8417fn final_order_scan_right_to_left_16u_block_place(
8418 sa: &mut [SaSint],
8419 buckets: &mut [SaSint],
8420 cache: &[ThreadCache],
8421 count: SaSint,
8422) {
8423 for entry in cache.iter().take(count as usize) {
8424 let c = entry.symbol as usize;
8425 buckets[c] -= 1;
8426 sa[buckets[c] as usize] = entry.index;
8427 }
8428}
8429
8430#[allow(dead_code)]
8431fn final_gsa_scan_right_to_left_16u_block_place(
8432 sa: &mut [SaSint],
8433 buckets: &mut [SaSint],
8434 cache: &[ThreadCache],
8435 count: SaSint,
8436) {
8437 for entry in cache.iter().take(count as usize) {
8438 let c = entry.symbol as usize;
8439 if c > 0 {
8440 buckets[c] -= 1;
8441 sa[buckets[c] as usize] = entry.index;
8442 }
8443 }
8444}
8445
8446#[allow(dead_code)]
8447fn final_bwt_aux_scan_right_to_left_16u_block_place(
8448 sa: &mut [SaSint],
8449 rm: SaSint,
8450 i_sample: &mut [SaSint],
8451 buckets: &mut [SaSint],
8452 cache: &[ThreadCache],
8453 count: SaSint,
8454) {
8455 let mut i = 0usize;
8456 while i < count as usize {
8457 let c = cache[i].symbol as usize;
8458 buckets[c] -= 1;
8459 sa[buckets[c] as usize] = cache[i].index;
8460 let p = cache[i + 1].index;
8461 if (p & rm) == 0 {
8462 i_sample[(p / (rm + 1)) as usize] = buckets[c] + 1;
8463 }
8464 i += 2;
8465 }
8466}
8467
8468#[allow(dead_code)]
8469fn final_bwt_scan_right_to_left_16u_block_omp(
8470 t: &[u16],
8471 sa: &mut [SaSint],
8472 k: SaSint,
8473 induction_bucket: &mut [SaSint],
8474 block_start: SaSint,
8475 block_size: SaSint,
8476 threads: SaSint,
8477 thread_state: &mut [ThreadState],
8478) -> SaSint {
8479 let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
8480 usize::try_from(threads)
8481 .expect("threads must be non-negative")
8482 .min(thread_state.len())
8483 } else {
8484 1
8485 };
8486 if thread_count <= 1 {
8487 return final_bwt_scan_right_to_left_16u(t, sa, induction_bucket, block_start, block_size);
8488 }
8489
8490 let k_usize = usize::try_from(k).expect("k must be non-negative");
8491 let block_stride = (block_size / thread_count as SaSint) & !15;
8492
8493 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
8494 let local_start = thread as SaSint * block_stride;
8495 let local_size = if thread + 1 < thread_count {
8496 block_stride
8497 } else {
8498 block_size - local_start
8499 };
8500 state.count = final_bwt_scan_right_to_left_16u_block_prepare(
8501 t,
8502 sa,
8503 k,
8504 &mut state.buckets[..k_usize],
8505 &mut state.cache,
8506 block_start + local_start,
8507 local_size,
8508 );
8509 }
8510
8511 for state in thread_state.iter_mut().take(thread_count).rev() {
8512 for c in 0..k_usize {
8513 let a = induction_bucket[c];
8514 let b = state.buckets[c];
8515 induction_bucket[c] = a - b;
8516 state.buckets[c] = a;
8517 }
8518 }
8519
8520 for state in thread_state.iter_mut().take(thread_count) {
8521 final_order_scan_right_to_left_16u_block_place(
8522 sa,
8523 &mut state.buckets[..k_usize],
8524 &state.cache,
8525 state.count,
8526 );
8527 }
8528
8529 -1
8530}
8531
8532#[allow(dead_code)]
8533fn final_bwt_aux_scan_right_to_left_16u_block_omp(
8534 t: &[u16],
8535 sa: &mut [SaSint],
8536 k: SaSint,
8537 rm: SaSint,
8538 i_sample: &mut [SaSint],
8539 induction_bucket: &mut [SaSint],
8540 block_start: SaSint,
8541 block_size: SaSint,
8542 threads: SaSint,
8543 thread_state: &mut [ThreadState],
8544) {
8545 let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
8546 usize::try_from(threads)
8547 .expect("threads must be non-negative")
8548 .min(thread_state.len())
8549 } else {
8550 1
8551 };
8552 if thread_count <= 1 {
8553 final_bwt_aux_scan_right_to_left_16u(
8554 t,
8555 sa,
8556 rm,
8557 i_sample,
8558 induction_bucket,
8559 block_start,
8560 block_size,
8561 );
8562 return;
8563 }
8564
8565 let k_usize = usize::try_from(k).expect("k must be non-negative");
8566 let block_stride = (block_size / thread_count as SaSint) & !15;
8567
8568 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
8569 let local_start = thread as SaSint * block_stride;
8570 let local_size = if thread + 1 < thread_count {
8571 block_stride
8572 } else {
8573 block_size - local_start
8574 };
8575 state.count = final_bwt_aux_scan_right_to_left_16u_block_prepare(
8576 t,
8577 sa,
8578 k,
8579 &mut state.buckets[..k_usize],
8580 &mut state.cache,
8581 block_start + local_start,
8582 local_size,
8583 );
8584 }
8585
8586 for state in thread_state.iter_mut().take(thread_count).rev() {
8587 for c in 0..k_usize {
8588 let a = induction_bucket[c];
8589 let b = state.buckets[c];
8590 induction_bucket[c] = a - b;
8591 state.buckets[c] = a;
8592 }
8593 }
8594
8595 for state in thread_state.iter_mut().take(thread_count) {
8596 final_bwt_aux_scan_right_to_left_16u_block_place(
8597 sa,
8598 rm,
8599 i_sample,
8600 &mut state.buckets[..k_usize],
8601 &state.cache,
8602 state.count,
8603 );
8604 }
8605}
8606
8607#[allow(dead_code)]
8608fn final_sorting_scan_right_to_left_16u_block_omp(
8609 t: &[u16],
8610 sa: &mut [SaSint],
8611 k: SaSint,
8612 induction_bucket: &mut [SaSint],
8613 block_start: SaSint,
8614 block_size: SaSint,
8615 threads: SaSint,
8616 thread_state: &mut [ThreadState],
8617) {
8618 let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
8619 usize::try_from(threads)
8620 .expect("threads must be non-negative")
8621 .min(thread_state.len())
8622 } else {
8623 1
8624 };
8625 if thread_count <= 1 {
8626 final_sorting_scan_right_to_left_16u(t, sa, induction_bucket, block_start, block_size);
8627 return;
8628 }
8629
8630 let k_usize = usize::try_from(k).expect("k must be non-negative");
8631 let block_stride = (block_size / thread_count as SaSint) & !15;
8632
8633 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
8634 let local_start = thread as SaSint * block_stride;
8635 let local_size = if thread + 1 < thread_count {
8636 block_stride
8637 } else {
8638 block_size - local_start
8639 };
8640 state.count = final_sorting_scan_right_to_left_16u_block_prepare(
8641 t,
8642 sa,
8643 k,
8644 &mut state.buckets[..k_usize],
8645 &mut state.cache,
8646 block_start + local_start,
8647 local_size,
8648 );
8649 }
8650
8651 for state in thread_state.iter_mut().take(thread_count).rev() {
8652 for c in 0..k_usize {
8653 let a = induction_bucket[c];
8654 let b = state.buckets[c];
8655 induction_bucket[c] = a - b;
8656 state.buckets[c] = a;
8657 }
8658 }
8659
8660 for state in thread_state.iter_mut().take(thread_count) {
8661 final_order_scan_right_to_left_16u_block_place(
8662 sa,
8663 &mut state.buckets[..k_usize],
8664 &state.cache,
8665 state.count,
8666 );
8667 }
8668}
8669
8670#[allow(dead_code)]
8671fn final_gsa_scan_right_to_left_16u_block_omp(
8672 t: &[u16],
8673 sa: &mut [SaSint],
8674 k: SaSint,
8675 induction_bucket: &mut [SaSint],
8676 block_start: SaSint,
8677 block_size: SaSint,
8678 threads: SaSint,
8679 thread_state: &mut [ThreadState],
8680) {
8681 let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
8682 usize::try_from(threads)
8683 .expect("threads must be non-negative")
8684 .min(thread_state.len())
8685 } else {
8686 1
8687 };
8688 if thread_count <= 1 {
8689 final_gsa_scan_right_to_left_16u(t, sa, induction_bucket, block_start, block_size);
8690 return;
8691 }
8692
8693 let k_usize = usize::try_from(k).expect("k must be non-negative");
8694 let block_stride = (block_size / thread_count as SaSint) & !15;
8695
8696 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
8697 let local_start = thread as SaSint * block_stride;
8698 let local_size = if thread + 1 < thread_count {
8699 block_stride
8700 } else {
8701 block_size - local_start
8702 };
8703 state.count = final_sorting_scan_right_to_left_16u_block_prepare(
8704 t,
8705 sa,
8706 k,
8707 &mut state.buckets[..k_usize],
8708 &mut state.cache,
8709 block_start + local_start,
8710 local_size,
8711 );
8712 }
8713
8714 for state in thread_state.iter_mut().take(thread_count).rev() {
8715 for c in 0..k_usize {
8716 let a = induction_bucket[c];
8717 let b = state.buckets[c];
8718 induction_bucket[c] = a - b;
8719 state.buckets[c] = a;
8720 }
8721 }
8722
8723 for state in thread_state.iter_mut().take(thread_count) {
8724 final_gsa_scan_right_to_left_16u_block_place(
8725 sa,
8726 &mut state.buckets[..k_usize],
8727 &state.cache,
8728 state.count,
8729 );
8730 }
8731}
8732
8733#[allow(dead_code)]
8734fn final_bwt_scan_right_to_left_16u_omp(
8735 t: &[u16],
8736 sa: &mut [SaSint],
8737 n: SaSint,
8738 k: SaSint,
8739 induction_bucket: &mut [SaSint],
8740 threads: SaSint,
8741) -> SaSint {
8742 let mut index = -1;
8743
8744 if threads == 1 || n < 65536 {
8745 index = final_bwt_scan_right_to_left_16u(t, sa, induction_bucket, 0, n);
8746 } else {
8747 let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
8748 let mut block_start = n - 1;
8749 while block_start >= 0 {
8750 if sa[block_start as usize] == 0 {
8751 index = block_start;
8752 block_start -= 1;
8753 } else {
8754 let mut block_max_end =
8755 block_start - threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
8756 if block_max_end < 0 {
8757 block_max_end = -1;
8758 }
8759 let mut block_end = block_start - 1;
8760 while block_end > block_max_end && sa[block_end as usize] != 0 {
8761 block_end -= 1;
8762 }
8763 let block_size = block_start - block_end;
8764 if block_size < 32 {
8765 while block_start > block_end {
8766 let mut p = sa[block_start as usize];
8767 sa[block_start as usize] = p & SAINT_MAX;
8768 if p > 0 {
8769 p -= 1;
8770 let c0 = t[(p - SaSint::from(p > 0)) as usize];
8771 let c1 = t[p as usize] as usize;
8772 sa[block_start as usize] = c1 as SaSint;
8773 induction_bucket[c1] -= 1;
8774 sa[induction_bucket[c1] as usize] = if c0 <= c1 as u16 {
8775 p
8776 } else {
8777 c0 as SaSint | SAINT_MIN
8778 };
8779 }
8780 block_start -= 1;
8781 }
8782 } else {
8783 final_bwt_scan_right_to_left_16u_block_omp(
8784 t,
8785 sa,
8786 k,
8787 induction_bucket,
8788 block_end + 1,
8789 block_size,
8790 threads,
8791 &mut thread_state,
8792 );
8793 block_start = block_end;
8794 }
8795 }
8796 }
8797 }
8798 index
8799}
8800
8801#[allow(dead_code)]
8802fn final_bwt_aux_scan_right_to_left_16u_omp(
8803 t: &[u16],
8804 sa: &mut [SaSint],
8805 n: SaSint,
8806 k: SaSint,
8807 rm: SaSint,
8808 i_sample: &mut [SaSint],
8809 induction_bucket: &mut [SaSint],
8810 threads: SaSint,
8811) {
8812 if threads == 1 || n < 65536 {
8813 final_bwt_aux_scan_right_to_left_16u(t, sa, rm, i_sample, induction_bucket, 0, n);
8814 } else {
8815 let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
8816 let mut block_start = n - 1;
8817 while block_start >= 0 {
8818 if sa[block_start as usize] == 0 {
8819 block_start -= 1;
8820 } else {
8821 let mut block_max_end =
8822 block_start - threads * ((PER_THREAD_CACHE_SIZE as SaSint - 16 * threads) / 2);
8823 if block_max_end < 0 {
8824 block_max_end = -1;
8825 }
8826 let mut block_end = block_start - 1;
8827 while block_end > block_max_end && sa[block_end as usize] != 0 {
8828 block_end -= 1;
8829 }
8830 let block_size = block_start - block_end;
8831 if block_size < 32 {
8832 while block_start > block_end {
8833 let mut p = sa[block_start as usize];
8834 sa[block_start as usize] = p & SAINT_MAX;
8835 if p > 0 {
8836 p -= 1;
8837 let c0 = t[(p - SaSint::from(p > 0)) as usize];
8838 let c1 = t[p as usize] as usize;
8839 sa[block_start as usize] = c1 as SaSint;
8840 induction_bucket[c1] -= 1;
8841 sa[induction_bucket[c1] as usize] = if c0 <= c1 as u16 {
8842 p
8843 } else {
8844 c0 as SaSint | SAINT_MIN
8845 };
8846 if (p & rm) == 0 {
8847 i_sample[(p / (rm + 1)) as usize] = induction_bucket[c1] + 1;
8848 }
8849 }
8850 block_start -= 1;
8851 }
8852 } else {
8853 final_bwt_aux_scan_right_to_left_16u_block_omp(
8854 t,
8855 sa,
8856 k,
8857 rm,
8858 i_sample,
8859 induction_bucket,
8860 block_end + 1,
8861 block_size,
8862 threads,
8863 &mut thread_state,
8864 );
8865 block_start = block_end;
8866 }
8867 }
8868 }
8869 }
8870}
8871
8872#[allow(dead_code)]
8873fn final_sorting_scan_right_to_left_16u_omp(
8874 t: &[u16],
8875 sa: &mut [SaSint],
8876 omp_block_start: SaSint,
8877 omp_block_size: SaSint,
8878 k: SaSint,
8879 induction_bucket: &mut [SaSint],
8880 threads: SaSint,
8881) {
8882 if threads == 1 || omp_block_size < 65536 {
8883 final_sorting_scan_right_to_left_16u(
8884 t,
8885 sa,
8886 induction_bucket,
8887 omp_block_start,
8888 omp_block_size,
8889 );
8890 } else {
8891 let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
8892 let mut block_start = omp_block_start + omp_block_size - 1;
8893 while block_start >= omp_block_start {
8894 if sa[block_start as usize] == 0 {
8895 block_start -= 1;
8896 } else {
8897 let mut block_max_end =
8898 block_start - threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
8899 if block_max_end < omp_block_start {
8900 block_max_end = omp_block_start - 1;
8901 }
8902 let mut block_end = block_start - 1;
8903 while block_end > block_max_end && sa[block_end as usize] != 0 {
8904 block_end -= 1;
8905 }
8906 let block_size = block_start - block_end;
8907 if block_size < 32 {
8908 while block_start > block_end {
8909 let mut p = sa[block_start as usize];
8910 sa[block_start as usize] = p & SAINT_MAX;
8911 if p > 0 {
8912 p -= 1;
8913 let c = t[p as usize] as usize;
8914 induction_bucket[c] -= 1;
8915 let mark = if t[(p - SaSint::from(p > 0)) as usize] > t[p as usize] {
8916 SAINT_MIN
8917 } else {
8918 0
8919 };
8920 sa[induction_bucket[c] as usize] = p | mark;
8921 }
8922 block_start -= 1;
8923 }
8924 } else {
8925 final_sorting_scan_right_to_left_16u_block_omp(
8926 t,
8927 sa,
8928 k,
8929 induction_bucket,
8930 block_end + 1,
8931 block_size,
8932 threads,
8933 &mut thread_state,
8934 );
8935 block_start = block_end;
8936 }
8937 }
8938 }
8939 }
8940}
8941
8942#[allow(dead_code)]
8943fn final_gsa_scan_right_to_left_16u_omp(
8944 t: &[u16],
8945 sa: &mut [SaSint],
8946 omp_block_start: SaSint,
8947 omp_block_size: SaSint,
8948 k: SaSint,
8949 induction_bucket: &mut [SaSint],
8950 threads: SaSint,
8951) {
8952 if threads == 1 || omp_block_size < 65536 {
8953 final_gsa_scan_right_to_left_16u(t, sa, induction_bucket, omp_block_start, omp_block_size);
8954 } else {
8955 let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
8956 let mut block_start = omp_block_start + omp_block_size - 1;
8957 while block_start >= omp_block_start {
8958 if sa[block_start as usize] == 0 {
8959 block_start -= 1;
8960 } else {
8961 let mut block_max_end =
8962 block_start - threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
8963 if block_max_end < omp_block_start {
8964 block_max_end = omp_block_start - 1;
8965 }
8966 let mut block_end = block_start - 1;
8967 while block_end > block_max_end && sa[block_end as usize] != 0 {
8968 block_end -= 1;
8969 }
8970 let block_size = block_start - block_end;
8971 if block_size < 32 {
8972 while block_start > block_end {
8973 let mut p = sa[block_start as usize];
8974 sa[block_start as usize] = p & SAINT_MAX;
8975 if p > 0 && t[(p - 1) as usize] > 0 {
8976 p -= 1;
8977 let c = t[p as usize] as usize;
8978 induction_bucket[c] -= 1;
8979 let mark = if t[(p - SaSint::from(p > 0)) as usize] > t[p as usize] {
8980 SAINT_MIN
8981 } else {
8982 0
8983 };
8984 sa[induction_bucket[c] as usize] = p | mark;
8985 }
8986 block_start -= 1;
8987 }
8988 } else {
8989 final_gsa_scan_right_to_left_16u_block_omp(
8990 t,
8991 sa,
8992 k,
8993 induction_bucket,
8994 block_end + 1,
8995 block_size,
8996 threads,
8997 &mut thread_state,
8998 );
8999 block_start = block_end;
9000 }
9001 }
9002 }
9003 }
9004}
9005
9006#[allow(dead_code)]
9007fn induce_final_order_16u_omp(
9008 t: &[u16],
9009 sa: &mut [SaSint],
9010 n: SaSint,
9011 k: SaSint,
9012 flags: SaSint,
9013 r: SaSint,
9014 i_out: Option<&mut [SaSint]>,
9015 buckets: &mut [SaSint],
9016 threads: SaSint,
9017 _thread_state: &mut [ThreadState],
9018) -> SaSint {
9019 if (flags & LIBSAIS_FLAGS_BWT) == 0 {
9020 if (flags & LIBSAIS_FLAGS_GSA) != 0 {
9021 buckets[6 * ALPHABET_SIZE] = buckets[7 * ALPHABET_SIZE] - 1;
9022 }
9023
9024 let (left_buckets, right_tail) = buckets.split_at_mut(7 * ALPHABET_SIZE);
9025 let bucket_start = &mut left_buckets[6 * ALPHABET_SIZE..7 * ALPHABET_SIZE];
9026 let bucket_end = &mut right_tail[..ALPHABET_SIZE];
9027
9028 final_sorting_scan_left_to_right_16u_omp(t, sa, n, k, bucket_start, threads);
9029 if threads > 1 && n >= 65_536 {
9030 clear_lms_suffixes_omp(
9031 sa,
9032 n,
9033 ALPHABET_SIZE as SaSint,
9034 bucket_start,
9035 bucket_end,
9036 threads,
9037 );
9038 }
9039
9040 if (flags & LIBSAIS_FLAGS_GSA) != 0 {
9041 flip_suffix_markers_omp(sa, bucket_end[0], threads);
9042 final_gsa_scan_right_to_left_16u_omp(
9043 t,
9044 sa,
9045 bucket_end[0],
9046 n - bucket_end[0],
9047 k,
9048 bucket_end,
9049 threads,
9050 );
9051 } else {
9052 final_sorting_scan_right_to_left_16u_omp(t, sa, 0, n, k, bucket_end, threads);
9053 }
9054
9055 0
9056 } else if let Some(i_out) = i_out {
9057 let (left_buckets, right_tail) = buckets.split_at_mut(7 * ALPHABET_SIZE);
9058 let bucket_start = &mut left_buckets[6 * ALPHABET_SIZE..7 * ALPHABET_SIZE];
9059 let bucket_end = &mut right_tail[..ALPHABET_SIZE];
9060
9061 final_bwt_aux_scan_left_to_right_16u_omp(t, sa, n, k, r - 1, i_out, bucket_start, threads);
9062 if threads > 1 && n >= 65_536 {
9063 clear_lms_suffixes_omp(
9064 sa,
9065 n,
9066 ALPHABET_SIZE as SaSint,
9067 bucket_start,
9068 bucket_end,
9069 threads,
9070 );
9071 }
9072 final_bwt_aux_scan_right_to_left_16u_omp(t, sa, n, k, r - 1, i_out, bucket_end, threads);
9073 0
9074 } else {
9075 let (left_buckets, right_tail) = buckets.split_at_mut(7 * ALPHABET_SIZE);
9076 let bucket_start = &mut left_buckets[6 * ALPHABET_SIZE..7 * ALPHABET_SIZE];
9077 let bucket_end = &mut right_tail[..ALPHABET_SIZE];
9078
9079 final_bwt_scan_left_to_right_16u_omp(t, sa, n, k, bucket_start, threads);
9080 if threads > 1 && n >= 65_536 {
9081 clear_lms_suffixes_omp(
9082 sa,
9083 n,
9084 ALPHABET_SIZE as SaSint,
9085 bucket_start,
9086 bucket_end,
9087 threads,
9088 );
9089 }
9090 final_bwt_scan_right_to_left_16u_omp(t, sa, n, k, bucket_end, threads)
9091 }
9092}
9093
9094#[allow(dead_code)]
9095fn bwt_copy_16u(u: &mut [u16], a: &[SaSint], n: SaSint) {
9096 let mut i = 0isize;
9097 let mut j = n as isize - 7;
9098 while i < j {
9099 u[i as usize] = a[i as usize] as u16;
9100 u[(i + 1) as usize] = a[(i + 1) as usize] as u16;
9101 u[(i + 2) as usize] = a[(i + 2) as usize] as u16;
9102 u[(i + 3) as usize] = a[(i + 3) as usize] as u16;
9103 u[(i + 4) as usize] = a[(i + 4) as usize] as u16;
9104 u[(i + 5) as usize] = a[(i + 5) as usize] as u16;
9105 u[(i + 6) as usize] = a[(i + 6) as usize] as u16;
9106 u[(i + 7) as usize] = a[(i + 7) as usize] as u16;
9107 i += 8;
9108 }
9109
9110 j += 7;
9111 while i < j {
9112 u[i as usize] = a[i as usize] as u16;
9113 i += 1;
9114 }
9115}
9116
9117#[allow(dead_code)]
9118fn bwt_copy_16u_omp(u: &mut [u16], a: &[SaSint], n: SaSint, threads: SaSint) {
9119 if threads == 1 || n < 65_536 {
9120 bwt_copy_16u(u, a, n);
9121 return;
9122 }
9123
9124 let block_stride = (n / threads) & !15;
9125 for thread in 0..threads {
9126 let block_start = thread * block_stride;
9127 let block_size = if thread < threads - 1 {
9128 block_stride
9129 } else {
9130 n - block_start
9131 };
9132 let start = block_start as usize;
9133 bwt_copy_16u(&mut u[start..], &a[start..], block_size);
9134 }
9135}
9136
9137#[allow(dead_code)]
9138fn final_bwt_ltr_step(t: &[u16], sa: &mut [SaSint], induction_bucket: &mut [SaSint], index: usize) {
9139 let mut p = sa[index];
9140 sa[index] = p & SAINT_MAX;
9141 if p > 0 {
9142 p -= 1;
9143 let c = t[p as usize] as usize;
9144 sa[index] = t[p as usize] as SaSint | SAINT_MIN;
9145 let mark = if t[(p - SaSint::from(p > 0)) as usize] < t[p as usize] {
9146 SAINT_MIN
9147 } else {
9148 0
9149 };
9150 let dst = induction_bucket[c] as usize;
9151 sa[dst] = p | mark;
9152 induction_bucket[c] += 1;
9153 }
9154}
9155
9156#[allow(dead_code)]
9157fn final_bwt_rtl_step(
9158 t: &[u16],
9159 sa: &mut [SaSint],
9160 induction_bucket: &mut [SaSint],
9161 index: usize,
9162 primary_index: &mut SaSint,
9163) {
9164 let mut p = sa[index];
9165 if p == 0 {
9166 *primary_index = index as SaSint;
9167 }
9168 sa[index] = p & SAINT_MAX;
9169 if p > 0 {
9170 p -= 1;
9171 let c0 = t[(p - SaSint::from(p > 0)) as usize];
9172 let c1 = t[p as usize];
9173 sa[index] = c1 as SaSint;
9174 let induced = if c0 <= c1 {
9175 p
9176 } else {
9177 c0 as SaSint | SAINT_MIN
9178 };
9179 induction_bucket[c1 as usize] -= 1;
9180 sa[induction_bucket[c1 as usize] as usize] = induced;
9181 }
9182}
9183
9184#[allow(dead_code)]
9185fn final_bwt_aux_ltr_step(
9186 t: &[u16],
9187 sa: &mut [SaSint],
9188 rm: SaSint,
9189 i_sample: &mut [SaSint],
9190 induction_bucket: &mut [SaSint],
9191 index: usize,
9192) {
9193 let mut p = sa[index];
9194 sa[index] = p & SAINT_MAX;
9195 if p > 0 {
9196 p -= 1;
9197 let c = t[p as usize] as usize;
9198 sa[index] = t[p as usize] as SaSint | SAINT_MIN;
9199 let mark = if t[(p - SaSint::from(p > 0)) as usize] < t[p as usize] {
9200 SAINT_MIN
9201 } else {
9202 0
9203 };
9204 let dst = induction_bucket[c] as usize;
9205 sa[dst] = p | mark;
9206 induction_bucket[c] += 1;
9207 if (p & rm) == 0 {
9208 i_sample[(p / (rm + 1)) as usize] = induction_bucket[c];
9209 }
9210 }
9211}
9212
9213#[allow(dead_code)]
9214fn final_bwt_aux_rtl_step(
9215 t: &[u16],
9216 sa: &mut [SaSint],
9217 rm: SaSint,
9218 i_sample: &mut [SaSint],
9219 induction_bucket: &mut [SaSint],
9220 index: usize,
9221) {
9222 let mut p = sa[index];
9223 sa[index] = p & SAINT_MAX;
9224 if p > 0 {
9225 p -= 1;
9226 let c0 = t[(p - SaSint::from(p > 0)) as usize];
9227 let c1 = t[p as usize];
9228 sa[index] = c1 as SaSint;
9229 let induced = if c0 <= c1 {
9230 p
9231 } else {
9232 c0 as SaSint | SAINT_MIN
9233 };
9234 induction_bucket[c1 as usize] -= 1;
9235 sa[induction_bucket[c1 as usize] as usize] = induced;
9236 if (p & rm) == 0 {
9237 i_sample[(p / (rm + 1)) as usize] = induction_bucket[c1 as usize] + 1;
9238 }
9239 }
9240}
9241
9242#[allow(dead_code)]
9243fn main_32s_recursion(
9244 t_ptr: *mut SaSint,
9245 sa_ptr: *mut SaSint,
9246 sa_capacity: usize,
9247 n: SaSint,
9248 k: SaSint,
9249 fs: SaSint,
9250 threads: SaSint,
9251 thread_state: &mut [ThreadState],
9252 local_buffer: &mut [SaSint],
9253) -> SaSint {
9254 let fs = fs.min(SAINT_MAX - n);
9255 let local_buffer_size = SaSint::try_from(LIBSAIS_LOCAL_BUFFER_SIZE).expect("fits");
9256 let n_usize = usize::try_from(n).expect("n must be non-negative");
9257 let fs_usize = usize::try_from(fs).expect("fs must be non-negative");
9258 let total_len = n_usize + fs_usize;
9259 assert!(total_len <= sa_capacity);
9260
9261 if k > 0 && ((fs / k) >= 6 || (local_buffer_size / k) >= 6) {
9262 let k_usize = usize::try_from(k).expect("k must be non-negative");
9263 let alignment = if fs >= 1024 && ((fs - 1024) / k) >= 6 {
9264 1024usize
9265 } else {
9266 16usize
9267 };
9268 let need = 6 * k_usize;
9269 let use_local_buffer = local_buffer_size > fs;
9270 let buckets_ptr = if use_local_buffer {
9271 local_buffer.as_mut_ptr()
9272 } else {
9273 unsafe {
9274 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9275 let start =
9276 if fs_usize >= need + alignment && ((fs_usize - alignment) / k_usize) >= 6 {
9277 let byte_ptr = sa[total_len - need - alignment..].as_mut_ptr() as usize;
9278 let aligned = align_up(byte_ptr, alignment * mem::size_of::<SaSint>());
9279 (aligned - sa_ptr as usize) / mem::size_of::<SaSint>()
9280 } else {
9281 total_len - need
9282 };
9283 sa[start..].as_mut_ptr()
9284 }
9285 };
9286
9287 let m = unsafe {
9288 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9289 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9290 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9291 count_and_gather_lms_suffixes_32s_4k_omp(
9292 t,
9293 sa,
9294 n,
9295 k,
9296 buckets,
9297 SaSint::from(use_local_buffer),
9298 threads,
9299 thread_state,
9300 )
9301 };
9302 if m > 1 {
9303 let m_usize = usize::try_from(m).expect("m must be non-negative");
9304 unsafe {
9305 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9306 sa[..n_usize - m_usize].fill(0);
9307 }
9308
9309 let first_lms_suffix = unsafe {
9310 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9311 sa[n_usize - m_usize]
9312 };
9313 let left_suffixes_count = unsafe {
9314 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9315 initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(
9316 std::slice::from_raw_parts(t_ptr, n_usize),
9317 k,
9318 buckets,
9319 first_lms_suffix,
9320 )
9321 };
9322
9323 unsafe {
9324 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9325 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9326 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9327 let (_, induction_bucket) = buckets.split_at_mut(4 * k_usize);
9328 radix_sort_lms_suffixes_32s_6k_omp(t, sa, n, m, induction_bucket, threads);
9329 if (n / 8192) < k {
9330 radix_sort_set_markers_32s_6k_omp(sa, k, induction_bucket, threads);
9331 }
9332 if threads > 1 && n >= 65_536 {
9333 sa[n_usize - m_usize..n_usize].fill(0);
9334 }
9335 initialize_buckets_for_partial_sorting_32s_6k(
9336 t,
9337 k,
9338 buckets,
9339 first_lms_suffix,
9340 left_suffixes_count,
9341 );
9342 induce_partial_order_32s_6k_omp(
9343 t,
9344 sa,
9345 n,
9346 k,
9347 buckets,
9348 first_lms_suffix,
9349 left_suffixes_count,
9350 threads,
9351 thread_state,
9352 );
9353 }
9354
9355 let names = unsafe {
9356 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9357 if (n / 8192) < k {
9358 renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(
9359 sa,
9360 n,
9361 m,
9362 threads,
9363 thread_state,
9364 )
9365 } else {
9366 renumber_and_gather_lms_suffixes_omp(sa, n, m, fs, threads, thread_state)
9367 }
9368 };
9369
9370 if names < m {
9371 let f = if (n / 8192) < k {
9372 unsafe {
9373 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9374 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9375 compact_lms_suffixes_32s_omp(t, sa, n, m, fs, threads)
9376 }
9377 } else {
9378 0
9379 };
9380
9381 let new_t_start =
9382 total_len - usize::try_from(m - f).expect("m - f must be non-negative");
9383 if main_32s_recursion(
9384 unsafe {
9385 std::slice::from_raw_parts_mut(sa_ptr, total_len)[new_t_start..]
9386 .as_mut_ptr()
9387 },
9388 sa_ptr,
9389 sa_capacity,
9390 m - f,
9391 names - f,
9392 fs + n - 2 * m + f,
9393 threads,
9394 thread_state,
9395 local_buffer,
9396 ) != 0
9397 {
9398 return -2;
9399 }
9400
9401 unsafe {
9402 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9403 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9404 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9405 reconstruct_compacted_lms_suffixes_32s_2k_omp(
9406 t,
9407 sa,
9408 n,
9409 k,
9410 m,
9411 fs,
9412 f,
9413 buckets,
9414 SaSint::from(use_local_buffer),
9415 threads,
9416 thread_state,
9417 );
9418 }
9419 } else {
9420 unsafe {
9421 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9422 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9423 count_lms_suffixes_32s_2k(t, n, k, buckets);
9424 }
9425 }
9426
9427 unsafe {
9428 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9429 initialize_buckets_start_and_end_32s_4k(k, buckets);
9430 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9431 place_lms_suffixes_histogram_32s_4k(sa, n, k, m, buckets);
9432 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9433 induce_final_order_32s_4k(t, sa, n, k, buckets, threads, thread_state);
9434 }
9435 } else {
9436 unsafe {
9437 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9438 sa[0] = sa[n_usize - 1];
9439 }
9440
9441 unsafe {
9442 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9443 initialize_buckets_start_and_end_32s_6k(k, buckets);
9444 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9445 place_lms_suffixes_histogram_32s_6k(sa, n, k, m, buckets);
9446 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9447 induce_final_order_32s_6k(t, sa, n, k, buckets, threads, thread_state);
9448 }
9449 }
9450
9451 return 0;
9452 } else if k > 0 && n <= SAINT_MAX / 2 && ((fs / k) >= 4 || (local_buffer_size / k) >= 4) {
9453 let k_usize = usize::try_from(k).expect("k must be non-negative");
9454 let alignment = if fs >= 1024 && ((fs - 1024) / k) >= 4 {
9455 1024usize
9456 } else {
9457 16usize
9458 };
9459 let need = 4 * k_usize;
9460 let use_local_buffer = local_buffer_size > fs;
9461 let buckets_ptr = if use_local_buffer {
9462 local_buffer.as_mut_ptr()
9463 } else {
9464 unsafe {
9465 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9466 let start =
9467 if fs_usize >= need + alignment && ((fs_usize - alignment) / k_usize) >= 4 {
9468 let byte_ptr = sa[total_len - need - alignment..].as_mut_ptr() as usize;
9469 let aligned = align_up(byte_ptr, alignment * mem::size_of::<SaSint>());
9470 (aligned - sa_ptr as usize) / mem::size_of::<SaSint>()
9471 } else {
9472 total_len - need
9473 };
9474 sa[start..].as_mut_ptr()
9475 }
9476 };
9477
9478 let m = unsafe {
9479 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9480 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9481 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9482 count_and_gather_lms_suffixes_32s_2k_omp(
9483 t,
9484 sa,
9485 n,
9486 k,
9487 buckets,
9488 SaSint::from(use_local_buffer),
9489 threads,
9490 thread_state,
9491 )
9492 };
9493 if m > 1 {
9494 let m_usize = usize::try_from(m).expect("m must be non-negative");
9495 unsafe {
9496 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9497 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9498 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9499 initialize_buckets_for_radix_and_partial_sorting_32s_4k(
9500 t,
9501 k,
9502 buckets,
9503 sa[n_usize - m_usize],
9504 );
9505 let (_, induction_bucket) = buckets.split_at_mut(1);
9506 radix_sort_lms_suffixes_32s_2k_omp(t, sa, n, m, induction_bucket, threads);
9507 radix_sort_set_markers_32s_4k_omp(sa, k, induction_bucket, threads);
9508 place_lms_suffixes_interval_32s_4k(sa, n, k, m - 1, buckets);
9509 induce_partial_order_32s_4k_omp(t, sa, n, k, buckets, threads, thread_state);
9510 }
9511
9512 let names = unsafe {
9513 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9514 renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(sa, n, m, threads, thread_state)
9515 };
9516 if names < m {
9517 let f = unsafe {
9518 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9519 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9520 compact_lms_suffixes_32s_omp(t, sa, n, m, fs, threads)
9521 };
9522
9523 let new_t_start =
9524 total_len - usize::try_from(m - f).expect("m - f must be non-negative");
9525 if main_32s_recursion(
9526 unsafe {
9527 std::slice::from_raw_parts_mut(sa_ptr, total_len)[new_t_start..]
9528 .as_mut_ptr()
9529 },
9530 sa_ptr,
9531 sa_capacity,
9532 m - f,
9533 names - f,
9534 fs + n - 2 * m + f,
9535 threads,
9536 thread_state,
9537 local_buffer,
9538 ) != 0
9539 {
9540 return -2;
9541 }
9542
9543 unsafe {
9544 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9545 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9546 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9547 reconstruct_compacted_lms_suffixes_32s_2k_omp(
9548 t,
9549 sa,
9550 n,
9551 k,
9552 m,
9553 fs,
9554 f,
9555 buckets,
9556 SaSint::from(use_local_buffer),
9557 threads,
9558 thread_state,
9559 );
9560 }
9561 } else {
9562 unsafe {
9563 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9564 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9565 count_lms_suffixes_32s_2k(t, n, k, buckets);
9566 }
9567 }
9568 } else {
9569 unsafe {
9570 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9571 sa[0] = sa[n_usize - 1];
9572 }
9573 }
9574
9575 unsafe {
9576 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9577 initialize_buckets_start_and_end_32s_4k(k, buckets);
9578 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9579 place_lms_suffixes_histogram_32s_4k(sa, n, k, m, buckets);
9580 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9581 induce_final_order_32s_4k(t, sa, n, k, buckets, threads, thread_state);
9582 }
9583
9584 return 0;
9585 } else if k > 0 && ((fs / k) >= 2 || (local_buffer_size / k) >= 2) {
9586 let k_usize = usize::try_from(k).expect("k must be non-negative");
9587 let alignment = if fs >= 1024 && ((fs - 1024) / k) >= 2 {
9588 1024usize
9589 } else {
9590 16usize
9591 };
9592 let need = 2 * k_usize;
9593 let use_local_buffer = local_buffer_size > fs;
9594 let buckets_ptr = if use_local_buffer {
9595 local_buffer.as_mut_ptr()
9596 } else {
9597 unsafe {
9598 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9599 let start =
9600 if fs_usize >= need + alignment && ((fs_usize - alignment) / k_usize) >= 2 {
9601 let byte_ptr = sa[total_len - need - alignment..].as_mut_ptr() as usize;
9602 let aligned = align_up(byte_ptr, alignment * mem::size_of::<SaSint>());
9603 (aligned - sa_ptr as usize) / mem::size_of::<SaSint>()
9604 } else {
9605 total_len - need
9606 };
9607 sa[start..].as_mut_ptr()
9608 }
9609 };
9610
9611 let m = unsafe {
9612 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9613 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9614 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9615 count_and_gather_lms_suffixes_32s_2k_omp(
9616 t,
9617 sa,
9618 n,
9619 k,
9620 buckets,
9621 SaSint::from(use_local_buffer),
9622 threads,
9623 thread_state,
9624 )
9625 };
9626 if m > 1 {
9627 let m_usize = usize::try_from(m).expect("m must be non-negative");
9628 unsafe {
9629 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9630 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9631 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9632 initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(
9633 t,
9634 k,
9635 buckets,
9636 sa[n_usize - m_usize],
9637 );
9638 let (_, induction_bucket) = buckets.split_at_mut(1);
9639 radix_sort_lms_suffixes_32s_2k_omp(t, sa, n, m, induction_bucket, threads);
9640 place_lms_suffixes_interval_32s_2k(sa, n, k, m - 1, buckets);
9641 }
9642
9643 unsafe {
9644 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9645 initialize_buckets_start_and_end_32s_2k(k, buckets);
9646 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9647 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9648 induce_partial_order_32s_2k_omp(t, sa, n, k, buckets, threads, thread_state);
9649 }
9650
9651 let names = unsafe {
9652 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9653 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9654 renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(t, sa, n, m, threads)
9655 };
9656 if names < m {
9657 let f = unsafe {
9658 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9659 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9660 compact_lms_suffixes_32s_omp(t, sa, n, m, fs, threads)
9661 };
9662
9663 let new_t_start =
9664 total_len - usize::try_from(m - f).expect("m - f must be non-negative");
9665 if main_32s_recursion(
9666 unsafe {
9667 std::slice::from_raw_parts_mut(sa_ptr, total_len)[new_t_start..]
9668 .as_mut_ptr()
9669 },
9670 sa_ptr,
9671 sa_capacity,
9672 m - f,
9673 names - f,
9674 fs + n - 2 * m + f,
9675 threads,
9676 thread_state,
9677 local_buffer,
9678 ) != 0
9679 {
9680 return -2;
9681 }
9682
9683 unsafe {
9684 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9685 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9686 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9687 reconstruct_compacted_lms_suffixes_32s_2k_omp(
9688 t,
9689 sa,
9690 n,
9691 k,
9692 m,
9693 fs,
9694 f,
9695 buckets,
9696 SaSint::from(use_local_buffer),
9697 threads,
9698 thread_state,
9699 );
9700 }
9701 } else {
9702 unsafe {
9703 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9704 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9705 count_lms_suffixes_32s_2k(t, n, k, buckets);
9706 }
9707 }
9708 } else {
9709 unsafe {
9710 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9711 sa[0] = sa[n_usize - 1];
9712 }
9713 }
9714
9715 unsafe {
9716 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9717 initialize_buckets_end_32s_2k(k, buckets);
9718 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9719 place_lms_suffixes_histogram_32s_2k(sa, n, k, m, buckets);
9720 }
9721
9722 unsafe {
9723 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9724 initialize_buckets_start_and_end_32s_2k(k, buckets);
9725 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9726 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9727 induce_final_order_32s_2k(t, sa, n, k, buckets, threads, thread_state);
9728 }
9729
9730 0
9731 } else {
9732 let k_usize = usize::try_from(k).expect("k must be non-negative");
9733 let mut heap_buckets = if fs < k { Some(vec![0; k_usize]) } else { None };
9734 let alignment = if fs >= 1024 && (fs - 1024) >= k {
9735 1024usize
9736 } else {
9737 16usize
9738 };
9739 let mut buckets_ptr = if let Some(ref mut heap) = heap_buckets {
9740 heap.as_mut_ptr()
9741 } else {
9742 unsafe {
9743 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9744 let start = if fs_usize >= k_usize + alignment {
9745 let byte_ptr = sa[total_len - k_usize - alignment..].as_mut_ptr() as usize;
9746 let aligned = align_up(byte_ptr, alignment * mem::size_of::<SaSint>());
9747 (aligned - sa_ptr as usize) / mem::size_of::<SaSint>()
9748 } else {
9749 total_len - k_usize
9750 };
9751 sa[start..].as_mut_ptr()
9752 }
9753 };
9754
9755 if buckets_ptr.is_null() {
9756 return -2;
9757 }
9758
9759 unsafe {
9760 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9761 sa[..n_usize].fill(0);
9762 }
9763
9764 unsafe {
9765 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9766 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
9767 count_suffixes_32s(t, n, k, buckets);
9768 initialize_buckets_end_32s_1k(k, buckets);
9769 }
9770
9771 let m = unsafe {
9772 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9773 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9774 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
9775 radix_sort_lms_suffixes_32s_1k(t, sa, n, buckets)
9776 };
9777 if m > 1 {
9778 unsafe {
9779 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9780 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9781 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
9782 induce_partial_order_32s_1k_omp(t, sa, n, k, buckets, threads, thread_state);
9783 }
9784
9785 let names = unsafe {
9786 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9787 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9788 renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(t, sa, n, m, threads)
9789 };
9790 if names < m {
9791 if heap_buckets.is_some() {
9792 let _ = heap_buckets.take();
9793 buckets_ptr = std::ptr::null_mut();
9794 }
9795
9796 let f = unsafe {
9797 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9798 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9799 compact_lms_suffixes_32s_omp(t, sa, n, m, fs, threads)
9800 };
9801
9802 let new_t_start =
9803 total_len - usize::try_from(m - f).expect("m - f must be non-negative");
9804 if main_32s_recursion(
9805 unsafe {
9806 std::slice::from_raw_parts_mut(sa_ptr, total_len)[new_t_start..]
9807 .as_mut_ptr()
9808 },
9809 sa_ptr,
9810 sa_capacity,
9811 m - f,
9812 names - f,
9813 fs + n - 2 * m + f,
9814 threads,
9815 thread_state,
9816 local_buffer,
9817 ) != 0
9818 {
9819 return -2;
9820 }
9821
9822 unsafe {
9823 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9824 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9825 reconstruct_compacted_lms_suffixes_32s_1k_omp(t, sa, n, m, fs, f, threads);
9826 }
9827
9828 if buckets_ptr.is_null() {
9829 heap_buckets = Some(vec![0; k_usize]);
9830 buckets_ptr = heap_buckets.as_mut().unwrap().as_mut_ptr();
9831 if buckets_ptr.is_null() {
9832 return -2;
9833 }
9834 }
9835 }
9836
9837 unsafe {
9838 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9839 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
9840 count_suffixes_32s(t, n, k, buckets);
9841 initialize_buckets_end_32s_1k(k, buckets);
9842 }
9843 unsafe {
9844 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9845 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9846 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
9847 place_lms_suffixes_interval_32s_1k(t, sa, k, m, buckets);
9848 }
9849 }
9850
9851 unsafe {
9852 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9853 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9854 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
9855 induce_final_order_32s_1k(t, sa, n, k, buckets, threads, thread_state);
9856 }
9857
9858 0
9859 }
9860}
9861
9862#[allow(dead_code)]
9863fn main_32s_entry(
9864 t_ptr: *mut SaSint,
9865 sa: &mut [SaSint],
9866 n: SaSint,
9867 k: SaSint,
9868 fs: SaSint,
9869 threads: SaSint,
9870 thread_state: &mut [ThreadState],
9871) -> SaSint {
9872 let mut local_buffer = [0; 2 * LIBSAIS_LOCAL_BUFFER_SIZE];
9873 main_32s_recursion(
9874 t_ptr,
9875 sa.as_mut_ptr(),
9876 sa.len(),
9877 n,
9878 k,
9879 fs,
9880 threads,
9881 thread_state,
9882 &mut local_buffer[LIBSAIS_LOCAL_BUFFER_SIZE..],
9883 )
9884}
9885
9886#[allow(dead_code)]
9887fn main_16u(
9888 t: &[u16],
9889 sa: &mut [SaSint],
9890 n: SaSint,
9891 buckets: &mut [SaSint],
9892 flags: SaSint,
9893 r: SaSint,
9894 i_out: Option<&mut [SaSint]>,
9895 fs: SaSint,
9896 freq: Option<&mut [SaSint]>,
9897 threads: SaSint,
9898 thread_state: &mut [ThreadState],
9899) -> SaSint {
9900 let fs = fs.min(SAINT_MAX - n);
9901
9902 let m = count_and_gather_lms_suffixes_16u_omp(t, sa, n, buckets, threads, thread_state);
9903 let k = initialize_buckets_start_and_end_16u(buckets, freq);
9904
9905 if (flags & LIBSAIS_FLAGS_GSA) != 0 && (buckets[0] != 0 || buckets[2] != 0 || buckets[3] != 1) {
9906 return -1;
9907 }
9908
9909 if m > 0 {
9910 let first_lms_suffix = sa[(n - m) as usize];
9911 let left_suffixes_count =
9912 initialize_buckets_for_lms_suffixes_radix_sort_16u(t, buckets, first_lms_suffix);
9913
9914 if threads > 1 && n >= 65_536 {
9915 sa[..(n - m) as usize].fill(0);
9916 }
9917 radix_sort_lms_suffixes_16u_omp(t, sa, n, m, flags, buckets, threads, thread_state);
9918 if threads > 1 && n >= 65_536 {
9919 sa[(n - m) as usize..n as usize].fill(0);
9920 }
9921
9922 initialize_buckets_for_partial_sorting_16u(
9923 t,
9924 buckets,
9925 first_lms_suffix,
9926 left_suffixes_count,
9927 );
9928 induce_partial_order_16u_omp(
9929 t,
9930 sa,
9931 n,
9932 k,
9933 flags,
9934 buckets,
9935 first_lms_suffix,
9936 left_suffixes_count,
9937 threads,
9938 );
9939
9940 let names = renumber_and_gather_lms_suffixes_omp(sa, n, m, fs, threads, thread_state);
9941 if names < m {
9942 let recursive_t_start = (n + fs - m) as usize;
9943 let recursive_t_ptr = sa[recursive_t_start..].as_mut_ptr();
9944 if main_32s_entry(
9945 recursive_t_ptr,
9946 sa,
9947 m,
9948 names,
9949 fs + n - 2 * m,
9950 threads,
9951 thread_state,
9952 ) != 0
9953 {
9954 return -2;
9955 }
9956
9957 gather_lms_suffixes_16u_omp(t, sa, n, threads, thread_state);
9958 reconstruct_lms_suffixes_omp(sa, n, m, threads);
9959 }
9960
9961 place_lms_suffixes_interval_16u(sa, n, m, flags, buckets);
9962 } else {
9963 sa[..n as usize].fill(0);
9964 }
9965
9966 induce_final_order_16u_omp(t, sa, n, k, flags, r, i_out, buckets, threads, thread_state)
9967}
9968
9969#[allow(dead_code)]
9970fn main_16u_alloc(
9971 t: &[u16],
9972 sa: &mut [SaSint],
9973 flags: SaSint,
9974 r: SaSint,
9975 i_out: Option<&mut [SaSint]>,
9976 fs: SaSint,
9977 freq: Option<&mut [SaSint]>,
9978 threads: SaSint,
9979) -> SaSint {
9980 if fs < 0
9981 || threads < 0
9982 || sa.len()
9983 < t.len()
9984 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
9985 || freq.as_ref().is_some_and(|freq| freq.len() < ALPHABET_SIZE)
9986 {
9987 return -1;
9988 }
9989
9990 fill_freq(t, freq);
9991 if t.len() <= 1 {
9992 if t.len() == 1 {
9993 sa[0] = 0;
9994 }
9995 return if (flags & LIBSAIS_FLAGS_BWT) != 0 {
9996 t.len() as SaSint
9997 } else {
9998 0
9999 };
10000 }
10001
10002 let mut buckets = vec![0; 8 * ALPHABET_SIZE];
10003 let threads = normalize_threads(threads);
10004 let mut thread_state = if threads > 1 {
10005 match alloc_thread_state(threads) {
10006 Some(thread_state) => thread_state,
10007 None => return -2,
10008 }
10009 } else {
10010 Vec::new()
10011 };
10012
10013 main_16u(
10014 t,
10015 sa,
10016 t.len() as SaSint,
10017 &mut buckets,
10018 flags,
10019 r,
10020 i_out,
10021 fs,
10022 None,
10023 threads,
10024 &mut thread_state,
10025 )
10026}
10027
10028fn main_16u_ctx(
10029 ctx: &mut Context,
10030 t: &[u16],
10031 sa: &mut [SaSint],
10032 flags: SaSint,
10033 r: SaSint,
10034 i_out: Option<&mut [SaSint]>,
10035 fs: SaSint,
10036 freq: Option<&mut [SaSint]>,
10037) -> SaSint {
10038 if fs < 0
10039 || sa.len()
10040 < t.len()
10041 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
10042 || freq.as_ref().is_some_and(|freq| freq.len() < ALPHABET_SIZE)
10043 {
10044 return -1;
10045 }
10046
10047 if ctx.threads <= 0 || ctx.buckets.len() < 8 * ALPHABET_SIZE {
10048 return -2;
10049 }
10050
10051 fill_freq(t, freq);
10052 if t.len() <= 1 {
10053 if t.len() == 1 {
10054 sa[0] = 0;
10055 }
10056 return if (flags & LIBSAIS_FLAGS_BWT) != 0 {
10057 t.len() as SaSint
10058 } else {
10059 0
10060 };
10061 }
10062
10063 let mut empty_thread_state = [];
10064 let thread_state = if ctx.threads > 1 {
10065 match ctx.thread_state.as_deref_mut() {
10066 Some(thread_state) if thread_state.len() >= ctx.threads as usize => thread_state,
10067 None => return -2,
10068 Some(_) => return -2,
10069 }
10070 } else {
10071 &mut empty_thread_state
10072 };
10073
10074 main_16u(
10075 t,
10076 sa,
10077 t.len() as SaSint,
10078 &mut ctx.buckets,
10079 flags,
10080 r,
10081 i_out,
10082 fs,
10083 None,
10084 ctx.threads,
10085 thread_state,
10086 )
10087}
10088
10089fn main_int(t: &mut [SaSint], sa: &mut [SaSint], k: SaSint, fs: SaSint, threads: SaSint) -> SaSint {
10090 let threads = normalize_threads(threads);
10091 let mut thread_state = if threads > 1 {
10092 match alloc_thread_state(threads) {
10093 Some(thread_state) => thread_state,
10094 None => return -2,
10095 }
10096 } else {
10097 Vec::new()
10098 };
10099
10100 main_32s_entry(
10101 t.as_mut_ptr(),
10102 sa,
10103 t.len() as SaSint,
10104 k,
10105 fs,
10106 threads,
10107 &mut thread_state,
10108 )
10109}
10110
10111pub fn libsais16(t: &[u16], sa: &mut [SaSint], fs: SaSint, freq: Option<&mut [SaSint]>) -> SaSint {
10120 main_16u_alloc(t, sa, 0, 0, None, fs, freq, 1)
10121}
10122
10123pub fn libsais16_gsa(
10132 t: &[u16],
10133 sa: &mut [SaSint],
10134 fs: SaSint,
10135 freq: Option<&mut [SaSint]>,
10136) -> SaSint {
10137 main_16u_alloc(t, sa, LIBSAIS_FLAGS_GSA, 0, None, fs, freq, 1)
10138}
10139
10140pub fn libsais16_int(t: &mut [SaSint], sa: &mut [SaSint], k: SaSint, fs: SaSint) -> SaSint {
10151 if fs < 0
10152 || sa.len()
10153 < t.len()
10154 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
10155 {
10156 return -1;
10157 }
10158
10159 if t.len() <= 1 {
10160 if t.len() == 1 {
10161 sa[0] = 0;
10162 }
10163 return 0;
10164 }
10165
10166 main_int(t, sa, k, fs, 1)
10167}
10168
10169pub fn libsais16_ctx(
10179 ctx: &mut Context,
10180 t: &[u16],
10181 sa: &mut [SaSint],
10182 fs: SaSint,
10183 freq: Option<&mut [SaSint]>,
10184) -> SaSint {
10185 main_16u_ctx(ctx, t, sa, 0, 0, None, fs, freq)
10186}
10187
10188pub fn libsais16_gsa_ctx(
10198 ctx: &mut Context,
10199 t: &[u16],
10200 sa: &mut [SaSint],
10201 fs: SaSint,
10202 freq: Option<&mut [SaSint]>,
10203) -> SaSint {
10204 main_16u_ctx(ctx, t, sa, LIBSAIS_FLAGS_GSA, 0, None, fs, freq)
10205}
10206
10207pub fn libsais16_omp(
10217 t: &[u16],
10218 sa: &mut [SaSint],
10219 fs: SaSint,
10220 freq: Option<&mut [SaSint]>,
10221 threads: SaSint,
10222) -> SaSint {
10223 if threads < 0 {
10224 -1
10225 } else {
10226 main_16u_alloc(t, sa, 0, 0, None, fs, freq, threads)
10227 }
10228}
10229
10230pub fn libsais16_gsa_omp(
10240 t: &[u16],
10241 sa: &mut [SaSint],
10242 fs: SaSint,
10243 freq: Option<&mut [SaSint]>,
10244 threads: SaSint,
10245) -> SaSint {
10246 if threads < 0 {
10247 -1
10248 } else {
10249 main_16u_alloc(t, sa, LIBSAIS_FLAGS_GSA, 0, None, fs, freq, threads)
10250 }
10251}
10252
10253pub fn libsais16_int_omp(
10265 t: &mut [SaSint],
10266 sa: &mut [SaSint],
10267 k: SaSint,
10268 fs: SaSint,
10269 threads: SaSint,
10270) -> SaSint {
10271 if threads < 0
10272 || fs < 0
10273 || sa.len()
10274 < t.len()
10275 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
10276 {
10277 return -1;
10278 }
10279
10280 if t.len() <= 1 {
10281 if t.len() == 1 {
10282 sa[0] = 0;
10283 }
10284 return 0;
10285 }
10286
10287 main_int(t, sa, k, fs, threads)
10288}
10289
10290fn build_bwt(
10291 t: &[u16],
10292 u: &mut [u16],
10293 a: &mut [SaSint],
10294 fs: SaSint,
10295 freq: Option<&mut [SaSint]>,
10296 threads: SaSint,
10297) -> SaSint {
10298 if fs < 0
10299 || threads < 0
10300 || u.len() < t.len()
10301 || a.len()
10302 < t.len()
10303 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
10304 || freq.as_ref().is_some_and(|freq| freq.len() < ALPHABET_SIZE)
10305 {
10306 return -1;
10307 }
10308 if t.len() <= 1 {
10309 fill_freq(t, freq);
10310 if t.len() == 1 {
10311 u[0] = t[0];
10312 }
10313 return t.len() as SaSint;
10314 }
10315
10316 let n = t.len();
10317 let mut index = main_16u_alloc(t, a, LIBSAIS_FLAGS_BWT, 0, None, fs, freq, threads);
10318 if index >= 0 {
10319 index += 1;
10320 u[0] = t[n - 1];
10321 bwt_copy_16u(&mut u[1..], a, index - 1);
10322 bwt_copy_16u(
10323 &mut u[index as usize..],
10324 &a[index as usize..],
10325 n as SaSint - index,
10326 );
10327 }
10328 index
10329}
10330
10331pub fn libsais16_bwt(
10341 t: &[u16],
10342 u: &mut [u16],
10343 a: &mut [SaSint],
10344 fs: SaSint,
10345 freq: Option<&mut [SaSint]>,
10346) -> SaSint {
10347 build_bwt(t, u, a, fs, freq, 1)
10348}
10349
10350fn build_bwt_aux(
10351 t: &[u16],
10352 u: &mut [u16],
10353 a: &mut [SaSint],
10354 fs: SaSint,
10355 freq: Option<&mut [SaSint]>,
10356 r: SaSint,
10357 i: &mut [SaSint],
10358 threads: SaSint,
10359) -> SaSint {
10360 if threads < 0 || r < 2 || (r & (r - 1)) != 0 {
10361 return -1;
10362 }
10363 let samples = if t.is_empty() {
10364 1
10365 } else {
10366 (t.len() - 1) / r as usize + 1
10367 };
10368 if i.len() < samples {
10369 return -1;
10370 }
10371 let n = t.len();
10372 if n <= 1 {
10373 fill_freq(t, freq);
10374 if n == 1 {
10375 u[0] = t[0];
10376 }
10377 i[0] = n as SaSint;
10378 return 0;
10379 }
10380
10381 let index = main_16u_alloc(t, a, LIBSAIS_FLAGS_BWT, r, Some(i), fs, freq, threads);
10382 if index == 0 {
10383 u[0] = t[n - 1];
10384 bwt_copy_16u(&mut u[1..], a, i[0] - 1);
10385 bwt_copy_16u(
10386 &mut u[i[0] as usize..],
10387 &a[i[0] as usize..],
10388 n as SaSint - i[0],
10389 );
10390 }
10391 index
10392}
10393
10394pub fn libsais16_bwt_aux(
10406 t: &[u16],
10407 u: &mut [u16],
10408 a: &mut [SaSint],
10409 fs: SaSint,
10410 freq: Option<&mut [SaSint]>,
10411 r: SaSint,
10412 i: &mut [SaSint],
10413) -> SaSint {
10414 build_bwt_aux(t, u, a, fs, freq, r, i, 1)
10415}
10416
10417pub fn libsais16_bwt_ctx(
10428 ctx: &mut Context,
10429 t: &[u16],
10430 u: &mut [u16],
10431 a: &mut [SaSint],
10432 fs: SaSint,
10433 freq: Option<&mut [SaSint]>,
10434) -> SaSint {
10435 if fs < 0
10436 || u.len() < t.len()
10437 || a.len()
10438 < t.len()
10439 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
10440 || freq.as_ref().is_some_and(|freq| freq.len() < ALPHABET_SIZE)
10441 {
10442 return -1;
10443 }
10444 if t.len() <= 1 {
10445 fill_freq(t, freq);
10446 if t.len() == 1 {
10447 u[0] = t[0];
10448 }
10449 return t.len() as SaSint;
10450 }
10451
10452 let n = t.len();
10453 let mut index = main_16u_ctx(ctx, t, a, LIBSAIS_FLAGS_BWT, 0, None, fs, freq);
10454 if index >= 0 {
10455 index += 1;
10456 u[0] = t[n - 1];
10457 bwt_copy_16u(&mut u[1..], a, index - 1);
10458 bwt_copy_16u(
10459 &mut u[index as usize..],
10460 &a[index as usize..],
10461 n as SaSint - index,
10462 );
10463 }
10464 index
10465}
10466
10467pub fn libsais16_bwt_aux_ctx(
10480 ctx: &mut Context,
10481 t: &[u16],
10482 u: &mut [u16],
10483 a: &mut [SaSint],
10484 fs: SaSint,
10485 freq: Option<&mut [SaSint]>,
10486 r: SaSint,
10487 i: &mut [SaSint],
10488) -> SaSint {
10489 if fs < 0 || r < 2 || (r & (r - 1)) != 0 {
10490 return -1;
10491 }
10492 let samples = if t.is_empty() {
10493 1
10494 } else {
10495 (t.len() - 1) / r as usize + 1
10496 };
10497 if u.len() < t.len()
10498 || a.len()
10499 < t.len()
10500 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
10501 || i.len() < samples
10502 || freq.as_ref().is_some_and(|freq| freq.len() < ALPHABET_SIZE)
10503 {
10504 return -1;
10505 }
10506 if t.len() <= 1 {
10507 fill_freq(t, freq);
10508 if t.len() == 1 {
10509 u[0] = t[0];
10510 }
10511 i[0] = t.len() as SaSint;
10512 return 0;
10513 }
10514
10515 let n = t.len();
10516 let index = main_16u_ctx(ctx, t, a, LIBSAIS_FLAGS_BWT, r, Some(i), fs, freq);
10517 if index == 0 {
10518 u[0] = t[n - 1];
10519 bwt_copy_16u(&mut u[1..], a, i[0] - 1);
10520 bwt_copy_16u(
10521 &mut u[i[0] as usize..],
10522 &a[i[0] as usize..],
10523 n as SaSint - i[0],
10524 );
10525 }
10526 index
10527}
10528
10529pub fn libsais16_bwt_omp(
10540 t: &[u16],
10541 u: &mut [u16],
10542 a: &mut [SaSint],
10543 fs: SaSint,
10544 freq: Option<&mut [SaSint]>,
10545 threads: SaSint,
10546) -> SaSint {
10547 if threads < 0 {
10548 -1
10549 } else {
10550 build_bwt(t, u, a, fs, freq, threads)
10551 }
10552}
10553
10554pub fn libsais16_bwt_aux_omp(
10567 t: &[u16],
10568 u: &mut [u16],
10569 a: &mut [SaSint],
10570 fs: SaSint,
10571 freq: Option<&mut [SaSint]>,
10572 r: SaSint,
10573 i: &mut [SaSint],
10574 threads: SaSint,
10575) -> SaSint {
10576 if threads < 0 {
10577 -1
10578 } else {
10579 build_bwt_aux(t, u, a, fs, freq, r, i, threads)
10580 }
10581}
10582
10583fn validate_unbwt_aux(
10584 t: &[u16],
10585 u: &[u16],
10586 a: &[SaSint],
10587 freq: Option<&[SaSint]>,
10588 r: SaSint,
10589 i: &[SaSint],
10590) -> SaSint {
10591 let n = t.len();
10592 if u.len() < n
10593 || a.len() < n
10594 || freq.is_some_and(|freq| freq.len() < ALPHABET_SIZE)
10595 || ((r != n as SaSint) && (r < 2 || (r & (r - 1)) != 0))
10596 || i.is_empty()
10597 {
10598 return -1;
10599 }
10600 if n <= 1 {
10601 return if i[0] == n as SaSint { 0 } else { -1 };
10602 }
10603
10604 let samples = (n - 1) / r as usize + 1;
10605 if i.len() < samples {
10606 return -1;
10607 }
10608
10609 for &index in &i[..samples] {
10610 if index <= 0 || index as usize > n {
10611 return -1;
10612 }
10613 }
10614 0
10615}
10616
10617fn unbwt_compute_histogram(t: &[u16], count: &mut [usize]) {
10618 for &symbol in t {
10619 count[symbol as usize] += 1;
10620 }
10621}
10622
10623fn unbwt_shift(n: usize) -> usize {
10624 let mut shift = 0usize;
10625 while (n >> shift) > (1usize << UNBWT_FASTBITS) {
10626 shift += 1;
10627 }
10628 shift
10629}
10630
10631fn unbwt_calculate_fastbits(bucket2: &mut [usize], fastbits: &mut [u16], shift: usize) {
10632 let mut v = 0usize;
10633 let mut sum = 1usize;
10634 for (w, bucket) in bucket2.iter_mut().enumerate().take(ALPHABET_SIZE) {
10635 let prev = sum;
10636 sum += *bucket;
10637 *bucket = prev;
10638 if prev != sum {
10639 while v <= ((sum - 1) >> shift) {
10640 fastbits[v] = w as u16;
10641 v += 1;
10642 }
10643 }
10644 }
10645}
10646
10647fn unbwt_calculate_p(t: &[u16], p: &mut [usize], bucket2: &mut [usize], index: usize) {
10648 for row in 0..index {
10649 let symbol = t[row] as usize;
10650 p[bucket2[symbol]] = row;
10651 bucket2[symbol] += 1;
10652 }
10653
10654 for row in index + 1..=t.len() {
10655 let symbol = t[row - 1] as usize;
10656 p[bucket2[symbol]] = row;
10657 bucket2[symbol] += 1;
10658 }
10659}
10660
10661#[allow(dead_code, non_snake_case)]
10662fn unbwt_calculate_P(
10663 t: &[u16],
10664 p: &mut [usize],
10665 bucket2: &mut [usize],
10666 index: usize,
10667 block_start: usize,
10668 block_end: usize,
10669) {
10670 let first_end = index.min(block_end);
10671 for row in block_start..first_end {
10672 let symbol = t[row] as usize;
10673 p[bucket2[symbol]] = row;
10674 bucket2[symbol] += 1;
10675 }
10676
10677 let second_start = block_start.max(index) + 1;
10678 for row in second_start..=block_end {
10679 let symbol = t[row - 1] as usize;
10680 p[bucket2[symbol]] = row;
10681 bucket2[symbol] += 1;
10682 }
10683}
10684
10685fn unbwt_init_single(
10686 t: &[u16],
10687 p: &mut [usize],
10688 freq: Option<&[SaSint]>,
10689 i: &[SaSint],
10690 bucket2: &mut [usize],
10691 fastbits: &mut [u16],
10692) {
10693 let shift = unbwt_shift(t.len());
10694 if let Some(freq) = freq {
10695 for c in 0..ALPHABET_SIZE {
10696 bucket2[c] = freq[c] as usize;
10697 }
10698 } else {
10699 bucket2.fill(0);
10700 unbwt_compute_histogram(t, bucket2);
10701 }
10702
10703 unbwt_calculate_fastbits(bucket2, fastbits, shift);
10704 unbwt_calculate_p(t, p, bucket2, i[0] as usize);
10705}
10706
10707#[allow(dead_code)]
10708fn unbwt_init_parallel(
10709 t: &[u16],
10710 p: &mut [usize],
10711 freq: Option<&[SaSint]>,
10712 i: &[SaSint],
10713 bucket2: &mut [usize],
10714 fastbits: &mut [u16],
10715 buckets: &mut [usize],
10716 threads: SaSint,
10717) {
10718 let n = t.len();
10719 let available_threads = buckets.len() / ALPHABET_SIZE;
10720 let num_threads = if threads > 1 && n >= 65_536 && available_threads > 1 {
10721 usize::try_from(threads)
10722 .expect("threads must be non-negative")
10723 .min(available_threads)
10724 .max(1)
10725 } else {
10726 1
10727 };
10728
10729 if num_threads == 1 {
10730 unbwt_init_single(t, p, freq, i, bucket2, fastbits);
10731 return;
10732 }
10733
10734 let index = usize::try_from(i[0]).expect("primary index must be non-negative");
10735 let shift = unbwt_shift(n);
10736 let block_stride = (n / num_threads) & !15usize;
10737
10738 for thread in 0..num_threads {
10739 let block_start = thread * block_stride;
10740 let block_size = if thread + 1 < num_threads {
10741 block_stride
10742 } else {
10743 n - block_start
10744 };
10745 let local = &mut buckets[thread * ALPHABET_SIZE..(thread + 1) * ALPHABET_SIZE];
10746 local.fill(0);
10747 unbwt_compute_histogram(&t[block_start..block_start + block_size], local);
10748 }
10749
10750 bucket2.fill(0);
10751 for thread in 0..num_threads {
10752 let local = &mut buckets[thread * ALPHABET_SIZE..(thread + 1) * ALPHABET_SIZE];
10753 for c in 0..ALPHABET_SIZE {
10754 let a = bucket2[c];
10755 let b = local[c];
10756 bucket2[c] = a + b;
10757 local[c] = a;
10758 }
10759 }
10760
10761 unbwt_calculate_fastbits(bucket2, fastbits, shift);
10762
10763 for thread in 0..num_threads {
10764 let block_start = thread * block_stride;
10765 let block_size = if thread + 1 < num_threads {
10766 block_stride
10767 } else {
10768 n - block_start
10769 };
10770 let local = &mut buckets[thread * ALPHABET_SIZE..(thread + 1) * ALPHABET_SIZE];
10771 for c in 0..ALPHABET_SIZE {
10772 local[c] += bucket2[c];
10773 }
10774 unbwt_calculate_P(t, p, local, index, block_start, block_start + block_size);
10775 }
10776
10777 let last_local = &buckets[(num_threads - 1) * ALPHABET_SIZE..num_threads * ALPHABET_SIZE];
10778 bucket2.copy_from_slice(last_local);
10779}
10780
10781fn unbwt_decode_symbol(
10782 p0: usize,
10783 p: &[usize],
10784 bucket2: &[usize],
10785 fastbits: &[u16],
10786 shift: usize,
10787) -> (u16, usize) {
10788 let mut c0 = fastbits[p0 >> shift] as usize;
10789 if bucket2[c0] <= p0 {
10790 while bucket2[c0] <= p0 {
10791 c0 += 1;
10792 }
10793 }
10794 (c0 as u16, p[p0])
10795}
10796
10797#[allow(dead_code)]
10798fn unbwt_decode_1(
10799 u: &mut [u16],
10800 p: &[usize],
10801 bucket2: &[usize],
10802 fastbits: &[u16],
10803 shift: usize,
10804 i0: &mut usize,
10805 k: usize,
10806) {
10807 let mut cursors = [*i0];
10808 unbwt_decode_lanes::<1>(u, p, bucket2, fastbits, shift, k, &mut cursors, k);
10809 *i0 = cursors[0];
10810}
10811
10812#[allow(dead_code)]
10813fn unbwt_decode_2(
10814 u: &mut [u16],
10815 p: &[usize],
10816 bucket2: &[usize],
10817 fastbits: &[u16],
10818 shift: usize,
10819 r: usize,
10820 i0: &mut usize,
10821 i1: &mut usize,
10822 k: usize,
10823) {
10824 let mut cursors = [*i0, *i1];
10825 unbwt_decode_lanes::<2>(u, p, bucket2, fastbits, shift, r, &mut cursors, k);
10826 *i0 = cursors[0];
10827 *i1 = cursors[1];
10828}
10829
10830#[allow(dead_code)]
10831fn unbwt_decode_3(
10832 u: &mut [u16],
10833 p: &[usize],
10834 bucket2: &[usize],
10835 fastbits: &[u16],
10836 shift: usize,
10837 r: usize,
10838 i0: &mut usize,
10839 i1: &mut usize,
10840 i2: &mut usize,
10841 k: usize,
10842) {
10843 let mut cursors = [*i0, *i1, *i2];
10844 unbwt_decode_lanes::<3>(u, p, bucket2, fastbits, shift, r, &mut cursors, k);
10845 *i0 = cursors[0];
10846 *i1 = cursors[1];
10847 *i2 = cursors[2];
10848}
10849
10850#[allow(dead_code)]
10851fn unbwt_decode_4(
10852 u: &mut [u16],
10853 p: &[usize],
10854 bucket2: &[usize],
10855 fastbits: &[u16],
10856 shift: usize,
10857 r: usize,
10858 i0: &mut usize,
10859 i1: &mut usize,
10860 i2: &mut usize,
10861 i3: &mut usize,
10862 k: usize,
10863) {
10864 let mut cursors = [*i0, *i1, *i2, *i3];
10865 unbwt_decode_lanes::<4>(u, p, bucket2, fastbits, shift, r, &mut cursors, k);
10866 *i0 = cursors[0];
10867 *i1 = cursors[1];
10868 *i2 = cursors[2];
10869 *i3 = cursors[3];
10870}
10871
10872#[allow(dead_code)]
10873fn unbwt_decode_5(
10874 u: &mut [u16],
10875 p: &[usize],
10876 bucket2: &[usize],
10877 fastbits: &[u16],
10878 shift: usize,
10879 r: usize,
10880 cursors: &mut [usize; 5],
10881 k: usize,
10882) {
10883 unbwt_decode_lanes::<5>(u, p, bucket2, fastbits, shift, r, cursors, k);
10884}
10885
10886#[allow(dead_code)]
10887fn unbwt_decode_6(
10888 u: &mut [u16],
10889 p: &[usize],
10890 bucket2: &[usize],
10891 fastbits: &[u16],
10892 shift: usize,
10893 r: usize,
10894 cursors: &mut [usize; 6],
10895 k: usize,
10896) {
10897 unbwt_decode_lanes::<6>(u, p, bucket2, fastbits, shift, r, cursors, k);
10898}
10899
10900#[allow(dead_code)]
10901fn unbwt_decode_7(
10902 u: &mut [u16],
10903 p: &[usize],
10904 bucket2: &[usize],
10905 fastbits: &[u16],
10906 shift: usize,
10907 r: usize,
10908 cursors: &mut [usize; 7],
10909 k: usize,
10910) {
10911 unbwt_decode_lanes::<7>(u, p, bucket2, fastbits, shift, r, cursors, k);
10912}
10913
10914#[allow(dead_code)]
10915fn unbwt_decode_8(
10916 u: &mut [u16],
10917 p: &[usize],
10918 bucket2: &[usize],
10919 fastbits: &[u16],
10920 shift: usize,
10921 r: usize,
10922 cursors: &mut [usize; 8],
10923 k: usize,
10924) {
10925 unbwt_decode_lanes::<8>(u, p, bucket2, fastbits, shift, r, cursors, k);
10926}
10927
10928fn unbwt_decode(
10929 u: &mut [u16],
10930 p: &[usize],
10931 n: usize,
10932 r: usize,
10933 i: &[SaSint],
10934 bucket2: &[usize],
10935 fastbits: &[u16],
10936) {
10937 let shift = unbwt_shift(n);
10938 let blocks = 1 + (n - 1) / r;
10939 let remainder = n - r * (blocks - 1);
10940 unbwt_decode_blocks(u, p, r, i, bucket2, fastbits, shift, blocks, remainder);
10941}
10942
10943fn unbwt_decode_blocks(
10944 u: &mut [u16],
10945 p: &[usize],
10946 r: usize,
10947 i: &[SaSint],
10948 bucket2: &[usize],
10949 fastbits: &[u16],
10950 shift: usize,
10951 blocks: usize,
10952 remainder: usize,
10953) {
10954 let mut blocks_left = blocks;
10955 let mut i_offset = 0usize;
10956 let mut u_offset = 0usize;
10957
10958 while blocks_left > 8 {
10959 let mut cursors = [
10960 i[i_offset] as usize,
10961 i[i_offset + 1] as usize,
10962 i[i_offset + 2] as usize,
10963 i[i_offset + 3] as usize,
10964 i[i_offset + 4] as usize,
10965 i[i_offset + 5] as usize,
10966 i[i_offset + 6] as usize,
10967 i[i_offset + 7] as usize,
10968 ];
10969 unbwt_decode_lanes::<8>(
10970 &mut u[u_offset..],
10971 p,
10972 bucket2,
10973 fastbits,
10974 shift,
10975 r,
10976 &mut cursors,
10977 r,
10978 );
10979 i_offset += 8;
10980 blocks_left -= 8;
10981 u_offset += 8 * r;
10982 }
10983
10984 match blocks_left {
10985 1 => {
10986 let mut cursors = [i[i_offset] as usize];
10987 unbwt_decode_lanes::<1>(
10988 &mut u[u_offset..],
10989 p,
10990 bucket2,
10991 fastbits,
10992 shift,
10993 r,
10994 &mut cursors,
10995 remainder,
10996 );
10997 }
10998 2 => {
10999 let mut cursors = [i[i_offset] as usize, i[i_offset + 1] as usize];
11000 unbwt_decode_lanes::<2>(
11001 &mut u[u_offset..],
11002 p,
11003 bucket2,
11004 fastbits,
11005 shift,
11006 r,
11007 &mut cursors,
11008 remainder,
11009 );
11010 let mut first = [cursors[0]];
11011 unbwt_decode_lanes::<1>(
11012 &mut u[u_offset + remainder..],
11013 p,
11014 bucket2,
11015 fastbits,
11016 shift,
11017 r,
11018 &mut first,
11019 r - remainder,
11020 );
11021 }
11022 3 => {
11023 let mut cursors = [
11024 i[i_offset] as usize,
11025 i[i_offset + 1] as usize,
11026 i[i_offset + 2] as usize,
11027 ];
11028 unbwt_decode_lanes::<3>(
11029 &mut u[u_offset..],
11030 p,
11031 bucket2,
11032 fastbits,
11033 shift,
11034 r,
11035 &mut cursors,
11036 remainder,
11037 );
11038 let mut first = [cursors[0], cursors[1]];
11039 unbwt_decode_lanes::<2>(
11040 &mut u[u_offset + remainder..],
11041 p,
11042 bucket2,
11043 fastbits,
11044 shift,
11045 r,
11046 &mut first,
11047 r - remainder,
11048 );
11049 }
11050 4 => {
11051 let mut cursors = [
11052 i[i_offset] as usize,
11053 i[i_offset + 1] as usize,
11054 i[i_offset + 2] as usize,
11055 i[i_offset + 3] as usize,
11056 ];
11057 unbwt_decode_lanes::<4>(
11058 &mut u[u_offset..],
11059 p,
11060 bucket2,
11061 fastbits,
11062 shift,
11063 r,
11064 &mut cursors,
11065 remainder,
11066 );
11067 let mut first = [cursors[0], cursors[1], cursors[2]];
11068 unbwt_decode_lanes::<3>(
11069 &mut u[u_offset + remainder..],
11070 p,
11071 bucket2,
11072 fastbits,
11073 shift,
11074 r,
11075 &mut first,
11076 r - remainder,
11077 );
11078 }
11079 5 => {
11080 let mut cursors = [
11081 i[i_offset] as usize,
11082 i[i_offset + 1] as usize,
11083 i[i_offset + 2] as usize,
11084 i[i_offset + 3] as usize,
11085 i[i_offset + 4] as usize,
11086 ];
11087 unbwt_decode_lanes::<5>(
11088 &mut u[u_offset..],
11089 p,
11090 bucket2,
11091 fastbits,
11092 shift,
11093 r,
11094 &mut cursors,
11095 remainder,
11096 );
11097 let mut first = [cursors[0], cursors[1], cursors[2], cursors[3]];
11098 unbwt_decode_lanes::<4>(
11099 &mut u[u_offset + remainder..],
11100 p,
11101 bucket2,
11102 fastbits,
11103 shift,
11104 r,
11105 &mut first,
11106 r - remainder,
11107 );
11108 }
11109 6 => {
11110 let mut cursors = [
11111 i[i_offset] as usize,
11112 i[i_offset + 1] as usize,
11113 i[i_offset + 2] as usize,
11114 i[i_offset + 3] as usize,
11115 i[i_offset + 4] as usize,
11116 i[i_offset + 5] as usize,
11117 ];
11118 unbwt_decode_lanes::<6>(
11119 &mut u[u_offset..],
11120 p,
11121 bucket2,
11122 fastbits,
11123 shift,
11124 r,
11125 &mut cursors,
11126 remainder,
11127 );
11128 let mut first = [cursors[0], cursors[1], cursors[2], cursors[3], cursors[4]];
11129 unbwt_decode_lanes::<5>(
11130 &mut u[u_offset + remainder..],
11131 p,
11132 bucket2,
11133 fastbits,
11134 shift,
11135 r,
11136 &mut first,
11137 r - remainder,
11138 );
11139 }
11140 7 => {
11141 let mut cursors = [
11142 i[i_offset] as usize,
11143 i[i_offset + 1] as usize,
11144 i[i_offset + 2] as usize,
11145 i[i_offset + 3] as usize,
11146 i[i_offset + 4] as usize,
11147 i[i_offset + 5] as usize,
11148 i[i_offset + 6] as usize,
11149 ];
11150 unbwt_decode_lanes::<7>(
11151 &mut u[u_offset..],
11152 p,
11153 bucket2,
11154 fastbits,
11155 shift,
11156 r,
11157 &mut cursors,
11158 remainder,
11159 );
11160 let mut first = [
11161 cursors[0], cursors[1], cursors[2], cursors[3], cursors[4], cursors[5],
11162 ];
11163 unbwt_decode_lanes::<6>(
11164 &mut u[u_offset + remainder..],
11165 p,
11166 bucket2,
11167 fastbits,
11168 shift,
11169 r,
11170 &mut first,
11171 r - remainder,
11172 );
11173 }
11174 _ => {
11175 let mut cursors = [
11176 i[i_offset] as usize,
11177 i[i_offset + 1] as usize,
11178 i[i_offset + 2] as usize,
11179 i[i_offset + 3] as usize,
11180 i[i_offset + 4] as usize,
11181 i[i_offset + 5] as usize,
11182 i[i_offset + 6] as usize,
11183 i[i_offset + 7] as usize,
11184 ];
11185 unbwt_decode_lanes::<8>(
11186 &mut u[u_offset..],
11187 p,
11188 bucket2,
11189 fastbits,
11190 shift,
11191 r,
11192 &mut cursors,
11193 remainder,
11194 );
11195 let mut first = [
11196 cursors[0], cursors[1], cursors[2], cursors[3], cursors[4], cursors[5], cursors[6],
11197 ];
11198 unbwt_decode_lanes::<7>(
11199 &mut u[u_offset + remainder..],
11200 p,
11201 bucket2,
11202 fastbits,
11203 shift,
11204 r,
11205 &mut first,
11206 r - remainder,
11207 );
11208 }
11209 }
11210}
11211
11212#[allow(dead_code)]
11213fn unbwt_decode_omp(
11214 u: &mut [u16],
11215 p: &[usize],
11216 n: usize,
11217 r: usize,
11218 i: &[SaSint],
11219 bucket2: &[usize],
11220 fastbits: &[u16],
11221 threads: SaSint,
11222) {
11223 let blocks = 1 + (n - 1) / r;
11224 let remainder = n - r * (blocks - 1);
11225 let num_threads = if threads > 1 && n >= 65_536 {
11226 usize::try_from(threads)
11227 .expect("threads must be non-negative")
11228 .min(blocks)
11229 .max(1)
11230 } else {
11231 1
11232 };
11233
11234 if num_threads == 1 {
11235 unbwt_decode(u, p, n, r, i, bucket2, fastbits);
11236 return;
11237 }
11238
11239 let shift = unbwt_shift(n);
11240 let block_stride = blocks / num_threads;
11241 let block_remainder = blocks % num_threads;
11242 for thread in 0..num_threads {
11243 let block_count = block_stride + usize::from(thread < block_remainder);
11244 let block_start = block_stride * thread + thread.min(block_remainder);
11245 let tail = if thread + 1 < num_threads {
11246 r
11247 } else {
11248 remainder
11249 };
11250 unbwt_decode_blocks(
11251 &mut u[r * block_start..],
11252 p,
11253 r,
11254 &i[block_start..],
11255 bucket2,
11256 fastbits,
11257 shift,
11258 block_count,
11259 tail,
11260 );
11261 }
11262}
11263
11264fn unbwt_decode_lanes<const LANES: usize>(
11265 u: &mut [u16],
11266 p: &[usize],
11267 bucket2: &[usize],
11268 fastbits: &[u16],
11269 shift: usize,
11270 r: usize,
11271 cursors: &mut [usize; LANES],
11272 k: usize,
11273) {
11274 for pos in 0..k {
11275 for lane in 0..LANES {
11276 let (symbol, next) = unbwt_decode_symbol(cursors[lane], p, bucket2, fastbits, shift);
11277 cursors[lane] = next;
11278 u[lane * r + pos] = symbol;
11279 }
11280 }
11281}
11282
11283fn unbwt_core(
11284 t: &[u16],
11285 u: &mut [u16],
11286 a: &mut [SaSint],
11287 freq: Option<&[SaSint]>,
11288 r: SaSint,
11289 i: &[SaSint],
11290) -> SaSint {
11291 let n = t.len();
11292 let shift = unbwt_shift(n);
11293 let mut bucket2 = vec![0usize; ALPHABET_SIZE];
11294 let mut fastbits = vec![0u16; 1 + (n >> shift)];
11295
11296 unbwt_core_with_buffers(t, u, a, freq, r, i, &mut bucket2, &mut fastbits, 1)
11297}
11298
11299fn unbwt_core_with_buffers(
11300 t: &[u16],
11301 u: &mut [u16],
11302 a: &mut [SaSint],
11303 freq: Option<&[SaSint]>,
11304 r: SaSint,
11305 i: &[SaSint],
11306 bucket2: &mut [usize],
11307 fastbits: &mut [u16],
11308 threads: SaSint,
11309) -> SaSint {
11310 let n = t.len();
11311 let shift = unbwt_shift(n);
11312 if bucket2.len() < ALPHABET_SIZE || fastbits.len() < 1 + (n >> shift) {
11313 return -2;
11314 }
11315
11316 let mut p = vec![0usize; n + 1];
11317 unbwt_init_single(
11318 t,
11319 &mut p,
11320 freq,
11321 i,
11322 &mut bucket2[..ALPHABET_SIZE],
11323 &mut fastbits[..1 + (n >> shift)],
11324 );
11325 unbwt_decode_omp(
11326 u,
11327 &p,
11328 n,
11329 r as usize,
11330 i,
11331 &bucket2[..ALPHABET_SIZE],
11332 &fastbits[..1 + (n >> shift)],
11333 threads,
11334 );
11335
11336 for (dst, &src) in a.iter_mut().zip(p.iter().skip(1)) {
11337 *dst = src as SaSint;
11338 }
11339 0
11340}
11341
11342fn inverse_bwt(
11343 t: &[u16],
11344 u: &mut [u16],
11345 a: &mut [SaSint],
11346 freq: Option<&[SaSint]>,
11347 primary: SaSint,
11348) -> SaSint {
11349 let n = t.len();
11350 let i = [primary];
11351 let rc = validate_unbwt_aux(t, u, a, freq, n as SaSint, &i);
11352 if rc != 0 {
11353 return rc;
11354 }
11355 if n <= 1 {
11356 if n == 1 {
11357 u[0] = t[0];
11358 }
11359 return 0;
11360 }
11361 unbwt_core(t, u, a, freq, n as SaSint, &i)
11362}
11363
11364pub fn libsais16_unbwt(
11374 t: &[u16],
11375 u: &mut [u16],
11376 a: &mut [SaSint],
11377 freq: Option<&[SaSint]>,
11378 i: SaSint,
11379) -> SaSint {
11380 inverse_bwt(t, u, a, freq, i)
11381}
11382
11383pub fn libsais16_unbwt_ctx(
11394 ctx: &mut UnbwtContext,
11395 t: &[u16],
11396 u: &mut [u16],
11397 a: &mut [SaSint],
11398 freq: Option<&[SaSint]>,
11399 i: SaSint,
11400) -> SaSint {
11401 libsais16_unbwt_aux_ctx(ctx, t, u, a, freq, t.len() as SaSint, &[i])
11402}
11403
11404pub fn libsais16_unbwt_aux(
11415 t: &[u16],
11416 u: &mut [u16],
11417 a: &mut [SaSint],
11418 freq: Option<&[SaSint]>,
11419 r: SaSint,
11420 i: &[SaSint],
11421) -> SaSint {
11422 let rc = validate_unbwt_aux(t, u, a, freq, r, i);
11423 if rc != 0 {
11424 return rc;
11425 }
11426 if t.len() <= 1 {
11427 if t.len() == 1 {
11428 u[0] = t[0];
11429 }
11430 return 0;
11431 }
11432 unbwt_core(t, u, a, freq, r, i)
11433}
11434
11435pub fn libsais16_unbwt_aux_ctx(
11447 ctx: &mut UnbwtContext,
11448 t: &[u16],
11449 u: &mut [u16],
11450 a: &mut [SaSint],
11451 freq: Option<&[SaSint]>,
11452 r: SaSint,
11453 i: &[SaSint],
11454) -> SaSint {
11455 let rc = validate_unbwt_aux(t, u, a, freq, r, i);
11456 if rc != 0 {
11457 return rc;
11458 }
11459 if t.len() <= 1 {
11460 if t.len() == 1 {
11461 u[0] = t[0];
11462 }
11463 return 0;
11464 }
11465 unbwt_core_with_buffers(
11466 t,
11467 u,
11468 a,
11469 freq,
11470 r,
11471 i,
11472 &mut ctx.bucket2,
11473 &mut ctx.fastbits,
11474 ctx.threads,
11475 )
11476}
11477
11478pub fn libsais16_unbwt_omp(
11489 t: &[u16],
11490 u: &mut [u16],
11491 a: &mut [SaSint],
11492 freq: Option<&[SaSint]>,
11493 i: SaSint,
11494 threads: SaSint,
11495) -> SaSint {
11496 if threads < 0 {
11497 -1
11498 } else {
11499 let primary = [i];
11500 libsais16_unbwt_aux_omp(t, u, a, freq, t.len() as SaSint, &primary, threads)
11501 }
11502}
11503
11504pub fn libsais16_unbwt_aux_omp(
11516 t: &[u16],
11517 u: &mut [u16],
11518 a: &mut [SaSint],
11519 freq: Option<&[SaSint]>,
11520 r: SaSint,
11521 i: &[SaSint],
11522 threads: SaSint,
11523) -> SaSint {
11524 if threads < 0 {
11525 -1
11526 } else {
11527 let rc = validate_unbwt_aux(t, u, a, freq, r, i);
11528 if rc != 0 {
11529 return rc;
11530 }
11531 if t.len() <= 1 {
11532 if t.len() == 1 {
11533 u[0] = t[0];
11534 }
11535 return 0;
11536 }
11537 let n = t.len();
11538 let shift = unbwt_shift(n);
11539 let mut bucket2 = vec![0usize; ALPHABET_SIZE];
11540 let mut fastbits = vec![0u16; 1 + (n >> shift)];
11541 unbwt_core_with_buffers(
11542 t,
11543 u,
11544 a,
11545 freq,
11546 r,
11547 i,
11548 &mut bucket2,
11549 &mut fastbits,
11550 normalize_threads(threads),
11551 )
11552 }
11553}
11554
11555pub fn libsais16_plcp(t: &[u16], sa: &[SaSint], plcp: &mut [SaSint]) -> SaSint {
11563 compute_plcp(t, sa, plcp, false)
11564}
11565
11566pub fn libsais16_plcp_gsa(t: &[u16], sa: &[SaSint], plcp: &mut [SaSint]) -> SaSint {
11574 if t.last().copied().unwrap_or(0) != 0 {
11575 -1
11576 } else {
11577 compute_plcp(t, sa, plcp, true)
11578 }
11579}
11580
11581fn compute_plcp(t: &[u16], sa: &[SaSint], plcp: &mut [SaSint], gsa: bool) -> SaSint {
11582 if sa.len() != t.len() || plcp.len() != t.len() {
11583 return -1;
11584 }
11585 if t.len() <= 1 {
11586 if t.len() == 1 {
11587 plcp[0] = 0;
11588 }
11589 return 0;
11590 }
11591
11592 if compute_phi(sa, plcp) != 0 {
11593 return -1;
11594 }
11595
11596 compute_plcp_from_phi(t, plcp, gsa)
11597}
11598
11599fn compute_phi(sa: &[SaSint], plcp: &mut [SaSint]) -> SaSint {
11600 let n = sa.len();
11601 let mut previous = n as SaSint;
11602 for &suffix_value in sa {
11603 let Some(suffix) = suffix_index(suffix_value, n) else {
11604 return -1;
11605 };
11606 plcp[suffix] = previous;
11607 previous = suffix_value;
11608 }
11609 0
11610}
11611
11612fn compute_plcp_from_phi(t: &[u16], plcp: &mut [SaSint], gsa: bool) -> SaSint {
11613 let n = t.len();
11614 let mut l = 0usize;
11615 for i in 0..t.len() {
11616 let previous = plcp[i];
11617 if previous == n as SaSint {
11618 plcp[i] = 0;
11619 l = 0;
11620 continue;
11621 }
11622
11623 let Some(prev) = suffix_index(previous, n) else {
11624 return -1;
11625 };
11626
11627 while i + l < t.len()
11628 && prev + l < t.len()
11629 && t[i + l] == t[prev + l]
11630 && (!gsa || t[i + l] != 0)
11631 {
11632 l += 1;
11633 }
11634 plcp[i] = l as SaSint;
11635 l = l.saturating_sub(1);
11636 }
11637 0
11638}
11639
11640#[allow(dead_code)]
11641fn compute_phi_omp(sa: &[SaSint], plcp: &mut [SaSint], n: SaSint, threads: SaSint) -> SaSint {
11642 let n_usize = n as usize;
11643 if threads == 1 || n < 65_536 {
11644 return compute_phi(&sa[..n_usize], &mut plcp[..n_usize]);
11645 }
11646
11647 let block_stride = (n / threads) & !15;
11648 for thread in 0..threads {
11649 let block_start = thread * block_stride;
11650 let block_size = if thread < threads - 1 {
11651 block_stride
11652 } else {
11653 n - block_start
11654 };
11655 let start = block_start as usize;
11656 let end = (block_start + block_size) as usize;
11657 let mut previous = if start > 0 { sa[start - 1] } else { n };
11658 for &suffix_value in &sa[start..end] {
11659 let Some(suffix) = suffix_index(suffix_value, n_usize) else {
11660 return -1;
11661 };
11662 plcp[suffix] = previous;
11663 previous = suffix_value;
11664 }
11665 }
11666 0
11667}
11668
11669#[allow(dead_code)]
11670fn compute_plcp_omp(t: &[u16], plcp: &mut [SaSint], n: SaSint, threads: SaSint) -> SaSint {
11671 if threads == 1 || n < 65_536 {
11672 let n = n as usize;
11673 return compute_plcp_from_phi(&t[..n], &mut plcp[..n], false);
11674 }
11675
11676 let block_stride = (n / threads) & !15;
11677 for thread in 0..threads {
11678 let block_start = thread * block_stride;
11679 let block_size = if thread < threads - 1 {
11680 block_stride
11681 } else {
11682 n - block_start
11683 };
11684 let rc = compute_plcp_range(
11685 t,
11686 plcp,
11687 n as usize,
11688 block_start as isize,
11689 block_size as isize,
11690 false,
11691 );
11692 if rc != 0 {
11693 return rc;
11694 }
11695 }
11696 0
11697}
11698
11699fn compute_plcp_range(
11700 t: &[u16],
11701 plcp: &mut [SaSint],
11702 n: usize,
11703 omp_block_start: isize,
11704 omp_block_size: isize,
11705 gsa: bool,
11706) -> SaSint {
11707 let mut l = 0usize;
11708 let end = (omp_block_start + omp_block_size) as usize;
11709 for i in omp_block_start as usize..end {
11710 let previous = plcp[i];
11711 if previous == n as SaSint {
11712 plcp[i] = 0;
11713 l = 0;
11714 continue;
11715 }
11716
11717 let Some(prev) = suffix_index(previous, n) else {
11718 return -1;
11719 };
11720
11721 while i + l < t.len()
11722 && prev + l < t.len()
11723 && t[i + l] == t[prev + l]
11724 && (!gsa || t[i + l] != 0)
11725 {
11726 l += 1;
11727 }
11728 plcp[i] = l as SaSint;
11729 l = l.saturating_sub(1);
11730 }
11731 0
11732}
11733
11734#[allow(dead_code)]
11735fn compute_plcp_gsa(
11736 t: &[u16],
11737 plcp: &mut [SaSint],
11738 omp_block_start: isize,
11739 omp_block_size: isize,
11740) -> SaSint {
11741 let n = t.len();
11742 let mut l = 0usize;
11743 let end = (omp_block_start + omp_block_size) as usize;
11744 for i in omp_block_start as usize..end {
11745 let previous = plcp[i];
11746 if previous == n as SaSint {
11747 plcp[i] = 0;
11748 l = 0;
11749 continue;
11750 }
11751
11752 let Some(prev) = suffix_index(previous, n) else {
11753 return -1;
11754 };
11755
11756 while i + l < t.len() && prev + l < t.len() && t[i + l] == t[prev + l] && t[i + l] != 0 {
11757 l += 1;
11758 }
11759 plcp[i] = l as SaSint;
11760 l = l.saturating_sub(1);
11761 }
11762 0
11763}
11764
11765#[allow(dead_code)]
11766fn compute_plcp_gsa_omp(t: &[u16], plcp: &mut [SaSint], n: SaSint, threads: SaSint) -> SaSint {
11767 if threads == 1 || n < 65_536 {
11768 return compute_plcp_gsa(t, plcp, 0, n as isize);
11769 }
11770
11771 let block_stride = (n / threads) & !15;
11772 for thread in 0..threads {
11773 let block_start = thread * block_stride;
11774 let block_size = if thread < threads - 1 {
11775 block_stride
11776 } else {
11777 n - block_start
11778 };
11779 let rc = compute_plcp_gsa(t, plcp, block_start as isize, block_size as isize);
11780 if rc != 0 {
11781 return rc;
11782 }
11783 }
11784 0
11785}
11786
11787#[allow(dead_code)]
11788fn compute_lcp(
11789 plcp: &[SaSint],
11790 sa: &[SaSint],
11791 lcp: &mut [SaSint],
11792 omp_block_start: isize,
11793 omp_block_size: isize,
11794) -> SaSint {
11795 let end = (omp_block_start + omp_block_size) as usize;
11796 for row in omp_block_start as usize..end {
11797 let Some(suffix) = suffix_index(sa[row], plcp.len()) else {
11798 return -1;
11799 };
11800 lcp[row] = plcp[suffix];
11801 }
11802 0
11803}
11804
11805#[allow(dead_code)]
11806fn compute_lcp_omp(
11807 plcp: &[SaSint],
11808 sa: &[SaSint],
11809 lcp: &mut [SaSint],
11810 n: SaSint,
11811 threads: SaSint,
11812) -> SaSint {
11813 if threads == 1 || n < 65_536 {
11814 return compute_lcp(plcp, sa, lcp, 0, n as isize);
11815 }
11816
11817 let block_stride = (n / threads) & !15;
11818 for thread in 0..threads {
11819 let block_start = thread * block_stride;
11820 let block_size = if thread < threads - 1 {
11821 block_stride
11822 } else {
11823 n - block_start
11824 };
11825 let rc = compute_lcp(plcp, sa, lcp, block_start as isize, block_size as isize);
11826 if rc != 0 {
11827 return rc;
11828 }
11829 }
11830 0
11831}
11832
11833pub fn libsais16_lcp(plcp: &[SaSint], sa: &[SaSint], lcp: &mut [SaSint]) -> SaSint {
11841 if plcp.len() != sa.len() || lcp.len() != sa.len() {
11842 return -1;
11843 }
11844 for (row, &suffix) in sa.iter().enumerate() {
11845 let Some(suffix) = suffix_index(suffix, plcp.len()) else {
11846 return -1;
11847 };
11848 lcp[row] = plcp[suffix];
11849 }
11850 0
11851}
11852
11853fn suffix_index(value: SaSint, len: usize) -> Option<usize> {
11854 usize::try_from(value).ok().filter(|&index| index < len)
11855}
11856
11857pub fn libsais16_plcp_omp(
11866 t: &[u16],
11867 sa: &[SaSint],
11868 plcp: &mut [SaSint],
11869 threads: SaSint,
11870) -> SaSint {
11871 if threads < 0 {
11872 return -1;
11873 }
11874 if sa.len() != t.len() || plcp.len() != t.len() {
11875 return -1;
11876 }
11877 if t.len() <= 1 {
11878 if t.len() == 1 {
11879 plcp[0] = 0;
11880 }
11881 return 0;
11882 }
11883
11884 let n = t.len() as SaSint;
11885 let threads = normalize_threads(threads);
11886 if compute_phi_omp(sa, plcp, n, threads) != 0 {
11887 return -1;
11888 }
11889 compute_plcp_omp(t, plcp, n, threads)
11890}
11891
11892pub fn libsais16_plcp_gsa_omp(
11901 t: &[u16],
11902 sa: &[SaSint],
11903 plcp: &mut [SaSint],
11904 threads: SaSint,
11905) -> SaSint {
11906 if threads < 0 {
11907 return -1;
11908 }
11909 if t.last().copied().unwrap_or(0) != 0 {
11910 return -1;
11911 }
11912 if sa.len() != t.len() || plcp.len() != t.len() {
11913 return -1;
11914 }
11915 if t.len() <= 1 {
11916 if t.len() == 1 {
11917 plcp[0] = 0;
11918 }
11919 return 0;
11920 }
11921
11922 let n = t.len() as SaSint;
11923 let threads = normalize_threads(threads);
11924 if compute_phi_omp(sa, plcp, n, threads) != 0 {
11925 return -1;
11926 }
11927 compute_plcp_gsa_omp(t, plcp, n, threads)
11928}
11929
11930pub fn libsais16_lcp_omp(
11939 plcp: &[SaSint],
11940 sa: &[SaSint],
11941 lcp: &mut [SaSint],
11942 threads: SaSint,
11943) -> SaSint {
11944 if threads < 0 {
11945 return -1;
11946 }
11947 if plcp.len() != sa.len() || lcp.len() != sa.len() {
11948 return -1;
11949 }
11950
11951 compute_lcp_omp(
11952 plcp,
11953 sa,
11954 lcp,
11955 sa.len() as SaSint,
11956 normalize_threads(threads),
11957 )
11958}
11959
11960#[cfg(all(test, feature = "upstream-c"))]
11961mod tests {
11962 use super::*;
11963
11964 unsafe extern "C" {
11965 fn probe_public_libsais16(t: *const u16, sa: *mut SaSint, n: SaSint, fs: SaSint) -> SaSint;
11966 fn probe_public_libsais16_freq(
11967 t: *const u16,
11968 sa: *mut SaSint,
11969 n: SaSint,
11970 fs: SaSint,
11971 freq: *mut SaSint,
11972 ) -> SaSint;
11973 fn probe_public_libsais16_gsa(
11974 t: *const u16,
11975 sa: *mut SaSint,
11976 n: SaSint,
11977 fs: SaSint,
11978 ) -> SaSint;
11979 fn probe_public_libsais16_gsa_freq(
11980 t: *const u16,
11981 sa: *mut SaSint,
11982 n: SaSint,
11983 fs: SaSint,
11984 freq: *mut SaSint,
11985 ) -> SaSint;
11986 fn probe_public_libsais16_int(
11987 t: *mut SaSint,
11988 sa: *mut SaSint,
11989 n: SaSint,
11990 k: SaSint,
11991 fs: SaSint,
11992 ) -> SaSint;
11993 fn probe_libsais16_main_32s_entry(
11994 t: *mut SaSint,
11995 sa: *mut SaSint,
11996 n: SaSint,
11997 k: SaSint,
11998 fs: SaSint,
11999 threads: SaSint,
12000 ) -> SaSint;
12001 fn probe_libsais16_final_sorting_scan_left_to_right_32s(
12002 t: *const SaSint,
12003 sa: *mut SaSint,
12004 induction_bucket: *mut SaSint,
12005 omp_block_start: SaSint,
12006 omp_block_size: SaSint,
12007 );
12008 fn probe_libsais16_final_sorting_scan_right_to_left_32s(
12009 t: *const SaSint,
12010 sa: *mut SaSint,
12011 induction_bucket: *mut SaSint,
12012 omp_block_start: SaSint,
12013 omp_block_size: SaSint,
12014 );
12015 fn probe_libsais16_clear_lms_suffixes_omp(
12016 sa: *mut SaSint,
12017 n: SaSint,
12018 k: SaSint,
12019 bucket_start: *mut SaSint,
12020 bucket_end: *mut SaSint,
12021 threads: SaSint,
12022 );
12023 fn probe_libsais16_flip_suffix_markers_omp(sa: *mut SaSint, l: SaSint, threads: SaSint);
12024 fn probe_libsais16_induce_final_order_32s_6k(
12025 t: *const SaSint,
12026 sa: *mut SaSint,
12027 n: SaSint,
12028 k: SaSint,
12029 buckets: *mut SaSint,
12030 threads: SaSint,
12031 );
12032 fn probe_libsais16_induce_final_order_32s_4k(
12033 t: *const SaSint,
12034 sa: *mut SaSint,
12035 n: SaSint,
12036 k: SaSint,
12037 buckets: *mut SaSint,
12038 threads: SaSint,
12039 );
12040 fn probe_libsais16_induce_final_order_32s_2k(
12041 t: *const SaSint,
12042 sa: *mut SaSint,
12043 n: SaSint,
12044 k: SaSint,
12045 buckets: *mut SaSint,
12046 threads: SaSint,
12047 );
12048 fn probe_libsais16_induce_final_order_32s_1k(
12049 t: *const SaSint,
12050 sa: *mut SaSint,
12051 n: SaSint,
12052 k: SaSint,
12053 buckets: *mut SaSint,
12054 threads: SaSint,
12055 );
12056 fn probe_libsais16_induce_partial_order_32s_6k_omp(
12057 t: *const SaSint,
12058 sa: *mut SaSint,
12059 n: SaSint,
12060 k: SaSint,
12061 buckets: *mut SaSint,
12062 first_lms_suffix: SaSint,
12063 left_suffixes_count: SaSint,
12064 threads: SaSint,
12065 );
12066 fn probe_libsais16_induce_partial_order_32s_4k_omp(
12067 t: *const SaSint,
12068 sa: *mut SaSint,
12069 n: SaSint,
12070 k: SaSint,
12071 buckets: *mut SaSint,
12072 threads: SaSint,
12073 );
12074 fn probe_libsais16_induce_partial_order_32s_2k_omp(
12075 t: *const SaSint,
12076 sa: *mut SaSint,
12077 n: SaSint,
12078 k: SaSint,
12079 buckets: *mut SaSint,
12080 threads: SaSint,
12081 );
12082 fn probe_libsais16_induce_partial_order_32s_1k_omp(
12083 t: *const SaSint,
12084 sa: *mut SaSint,
12085 n: SaSint,
12086 k: SaSint,
12087 buckets: *mut SaSint,
12088 threads: SaSint,
12089 );
12090 fn probe_libsais16_induce_partial_order_16u_omp(
12091 t: *const u16,
12092 sa: *mut SaSint,
12093 n: SaSint,
12094 k: SaSint,
12095 flags: SaSint,
12096 buckets: *mut SaSint,
12097 first_lms_suffix: SaSint,
12098 left_suffixes_count: SaSint,
12099 threads: SaSint,
12100 );
12101 fn probe_libsais16_induce_final_order_16u_omp(
12102 t: *const u16,
12103 sa: *mut SaSint,
12104 n: SaSint,
12105 k: SaSint,
12106 flags: SaSint,
12107 r: SaSint,
12108 i: *mut SaSint,
12109 buckets: *mut SaSint,
12110 threads: SaSint,
12111 ) -> SaSint;
12112 fn probe_public_libsais16_bwt(
12113 t: *const u16,
12114 u: *mut u16,
12115 a: *mut SaSint,
12116 n: SaSint,
12117 fs: SaSint,
12118 ) -> SaSint;
12119 fn probe_public_libsais16_bwt_freq(
12120 t: *const u16,
12121 u: *mut u16,
12122 a: *mut SaSint,
12123 n: SaSint,
12124 fs: SaSint,
12125 freq: *mut SaSint,
12126 ) -> SaSint;
12127 fn probe_public_libsais16_bwt_aux(
12128 t: *const u16,
12129 u: *mut u16,
12130 a: *mut SaSint,
12131 n: SaSint,
12132 fs: SaSint,
12133 r: SaSint,
12134 i: *mut SaSint,
12135 ) -> SaSint;
12136 fn probe_public_libsais16_bwt_aux_freq(
12137 t: *const u16,
12138 u: *mut u16,
12139 a: *mut SaSint,
12140 n: SaSint,
12141 fs: SaSint,
12142 freq: *mut SaSint,
12143 r: SaSint,
12144 i: *mut SaSint,
12145 ) -> SaSint;
12146 fn probe_public_libsais16_unbwt(
12147 t: *const u16,
12148 u: *mut u16,
12149 a: *mut SaSint,
12150 n: SaSint,
12151 i: SaSint,
12152 ) -> SaSint;
12153 fn probe_public_libsais16_unbwt_freq(
12154 t: *const u16,
12155 u: *mut u16,
12156 a: *mut SaSint,
12157 n: SaSint,
12158 freq: *const SaSint,
12159 i: SaSint,
12160 ) -> SaSint;
12161 fn probe_public_libsais16_unbwt_aux(
12162 t: *const u16,
12163 u: *mut u16,
12164 a: *mut SaSint,
12165 n: SaSint,
12166 r: SaSint,
12167 i: *const SaSint,
12168 ) -> SaSint;
12169 fn probe_public_libsais16_unbwt_aux_freq(
12170 t: *const u16,
12171 u: *mut u16,
12172 a: *mut SaSint,
12173 n: SaSint,
12174 freq: *const SaSint,
12175 r: SaSint,
12176 i: *const SaSint,
12177 ) -> SaSint;
12178 fn probe_public_libsais16_plcp(
12179 t: *const u16,
12180 sa: *const SaSint,
12181 plcp: *mut SaSint,
12182 n: SaSint,
12183 ) -> SaSint;
12184 fn probe_public_libsais16_plcp_gsa(
12185 t: *const u16,
12186 sa: *const SaSint,
12187 plcp: *mut SaSint,
12188 n: SaSint,
12189 ) -> SaSint;
12190 fn probe_public_libsais16_lcp(
12191 plcp: *const SaSint,
12192 sa: *const SaSint,
12193 lcp: *mut SaSint,
12194 n: SaSint,
12195 ) -> SaSint;
12196 fn probe_libsais16_gather_lms_suffixes_16u(
12197 t: *const u16,
12198 sa: *mut SaSint,
12199 n: SaSint,
12200 m: SaSint,
12201 omp_block_start: SaSint,
12202 omp_block_size: SaSint,
12203 );
12204 fn probe_libsais16_count_and_gather_lms_suffixes_16u(
12205 t: *const u16,
12206 sa: *mut SaSint,
12207 n: SaSint,
12208 buckets: *mut SaSint,
12209 omp_block_start: SaSint,
12210 omp_block_size: SaSint,
12211 ) -> SaSint;
12212 fn probe_libsais16_initialize_buckets_start_and_end_16u(
12213 buckets: *mut SaSint,
12214 freq: *mut SaSint,
12215 ) -> SaSint;
12216 fn probe_libsais16_initialize_buckets_for_lms_suffixes_radix_sort_16u(
12217 t: *const u16,
12218 buckets: *mut SaSint,
12219 first_lms_suffix: SaSint,
12220 ) -> SaSint;
12221 fn probe_libsais16_radix_sort_lms_suffixes_16u(
12222 t: *const u16,
12223 sa: *mut SaSint,
12224 induction_bucket: *mut SaSint,
12225 omp_block_start: SaSint,
12226 omp_block_size: SaSint,
12227 );
12228 fn probe_libsais16_initialize_buckets_for_partial_sorting_16u(
12229 t: *const u16,
12230 buckets: *mut SaSint,
12231 first_lms_suffix: SaSint,
12232 left_suffixes_count: SaSint,
12233 );
12234 fn probe_libsais16_partial_sorting_scan_left_to_right_16u(
12235 t: *const u16,
12236 sa: *mut SaSint,
12237 buckets: *mut SaSint,
12238 d: SaSint,
12239 omp_block_start: SaSint,
12240 omp_block_size: SaSint,
12241 ) -> SaSint;
12242 fn probe_libsais16_partial_sorting_scan_right_to_left_16u(
12243 t: *const u16,
12244 sa: *mut SaSint,
12245 buckets: *mut SaSint,
12246 d: SaSint,
12247 omp_block_start: SaSint,
12248 omp_block_size: SaSint,
12249 ) -> SaSint;
12250 fn probe_libsais16_partial_gsa_scan_right_to_left_16u(
12251 t: *const u16,
12252 sa: *mut SaSint,
12253 buckets: *mut SaSint,
12254 d: SaSint,
12255 omp_block_start: SaSint,
12256 omp_block_size: SaSint,
12257 ) -> SaSint;
12258 fn probe_libsais16_partial_sorting_shift_markers_16u_omp(
12259 sa: *mut SaSint,
12260 n: SaSint,
12261 buckets: *const SaSint,
12262 threads: SaSint,
12263 );
12264 fn probe_libsais16_final_sorting_scan_left_to_right_16u(
12265 t: *const u16,
12266 sa: *mut SaSint,
12267 induction_bucket: *mut SaSint,
12268 omp_block_start: SaSint,
12269 omp_block_size: SaSint,
12270 );
12271 fn probe_libsais16_final_sorting_scan_right_to_left_16u(
12272 t: *const u16,
12273 sa: *mut SaSint,
12274 induction_bucket: *mut SaSint,
12275 omp_block_start: SaSint,
12276 omp_block_size: SaSint,
12277 );
12278 fn probe_libsais16_final_gsa_scan_right_to_left_16u(
12279 t: *const u16,
12280 sa: *mut SaSint,
12281 induction_bucket: *mut SaSint,
12282 omp_block_start: SaSint,
12283 omp_block_size: SaSint,
12284 );
12285 fn probe_libsais16_final_bwt_scan_left_to_right_16u(
12286 t: *const u16,
12287 sa: *mut SaSint,
12288 induction_bucket: *mut SaSint,
12289 omp_block_start: SaSint,
12290 omp_block_size: SaSint,
12291 );
12292 fn probe_libsais16_final_bwt_scan_right_to_left_16u(
12293 t: *const u16,
12294 sa: *mut SaSint,
12295 induction_bucket: *mut SaSint,
12296 omp_block_start: SaSint,
12297 omp_block_size: SaSint,
12298 ) -> SaSint;
12299 fn probe_libsais16_final_bwt_aux_scan_left_to_right_16u(
12300 t: *const u16,
12301 sa: *mut SaSint,
12302 rm: SaSint,
12303 i_sample: *mut SaSint,
12304 induction_bucket: *mut SaSint,
12305 omp_block_start: SaSint,
12306 omp_block_size: SaSint,
12307 );
12308 fn probe_libsais16_final_bwt_aux_scan_right_to_left_16u(
12309 t: *const u16,
12310 sa: *mut SaSint,
12311 rm: SaSint,
12312 i_sample: *mut SaSint,
12313 induction_bucket: *mut SaSint,
12314 omp_block_start: SaSint,
12315 omp_block_size: SaSint,
12316 );
12317 fn probe_libsais16_renumber_lms_suffixes_16u(
12318 sa: *mut SaSint,
12319 m: SaSint,
12320 name: SaSint,
12321 omp_block_start: SaSint,
12322 omp_block_size: SaSint,
12323 ) -> SaSint;
12324 fn probe_libsais16_place_lms_suffixes_interval_16u(
12325 sa: *mut SaSint,
12326 n: SaSint,
12327 m: SaSint,
12328 flags: SaSint,
12329 buckets: *mut SaSint,
12330 );
12331 fn probe_libsais16_bwt_copy_16u(u: *mut u16, a: *mut SaSint, n: SaSint);
12332 fn probe_libsais16_gather_lms_suffixes_16u_omp(
12333 t: *const u16,
12334 sa: *mut SaSint,
12335 n: SaSint,
12336 threads: SaSint,
12337 );
12338 fn probe_libsais16_count_and_gather_lms_suffixes_16u_omp(
12339 t: *const u16,
12340 sa: *mut SaSint,
12341 n: SaSint,
12342 buckets: *mut SaSint,
12343 threads: SaSint,
12344 ) -> SaSint;
12345 fn probe_libsais16_radix_sort_lms_suffixes_16u_omp(
12346 t: *const u16,
12347 sa: *mut SaSint,
12348 n: SaSint,
12349 m: SaSint,
12350 flags: SaSint,
12351 buckets: *mut SaSint,
12352 threads: SaSint,
12353 );
12354 fn probe_libsais16_partial_sorting_scan_left_to_right_16u_omp(
12355 t: *const u16,
12356 sa: *mut SaSint,
12357 n: SaSint,
12358 k: SaSint,
12359 buckets: *mut SaSint,
12360 left_suffixes_count: SaSint,
12361 d: SaSint,
12362 threads: SaSint,
12363 ) -> SaSint;
12364 fn probe_libsais16_partial_sorting_scan_right_to_left_16u_omp(
12365 t: *const u16,
12366 sa: *mut SaSint,
12367 n: SaSint,
12368 k: SaSint,
12369 buckets: *mut SaSint,
12370 first_lms_suffix: SaSint,
12371 left_suffixes_count: SaSint,
12372 d: SaSint,
12373 threads: SaSint,
12374 );
12375 fn probe_libsais16_partial_gsa_scan_right_to_left_16u_omp(
12376 t: *const u16,
12377 sa: *mut SaSint,
12378 n: SaSint,
12379 k: SaSint,
12380 buckets: *mut SaSint,
12381 first_lms_suffix: SaSint,
12382 left_suffixes_count: SaSint,
12383 d: SaSint,
12384 threads: SaSint,
12385 );
12386 fn probe_libsais16_renumber_lms_suffixes_16u_omp(
12387 sa: *mut SaSint,
12388 m: SaSint,
12389 threads: SaSint,
12390 ) -> SaSint;
12391 fn probe_libsais16_final_bwt_scan_left_to_right_16u_omp(
12392 t: *const u16,
12393 sa: *mut SaSint,
12394 n: SaSint,
12395 k: SaSint,
12396 induction_bucket: *mut SaSint,
12397 threads: SaSint,
12398 );
12399 fn probe_libsais16_final_bwt_aux_scan_left_to_right_16u_omp(
12400 t: *const u16,
12401 sa: *mut SaSint,
12402 n: SaSint,
12403 k: SaSint,
12404 rm: SaSint,
12405 i_sample: *mut SaSint,
12406 induction_bucket: *mut SaSint,
12407 threads: SaSint,
12408 );
12409 fn probe_libsais16_final_sorting_scan_left_to_right_16u_omp(
12410 t: *const u16,
12411 sa: *mut SaSint,
12412 n: SaSint,
12413 k: SaSint,
12414 induction_bucket: *mut SaSint,
12415 threads: SaSint,
12416 );
12417 fn probe_libsais16_final_bwt_scan_right_to_left_16u_omp(
12418 t: *const u16,
12419 sa: *mut SaSint,
12420 n: SaSint,
12421 k: SaSint,
12422 induction_bucket: *mut SaSint,
12423 threads: SaSint,
12424 ) -> SaSint;
12425 fn probe_libsais16_final_bwt_aux_scan_right_to_left_16u_omp(
12426 t: *const u16,
12427 sa: *mut SaSint,
12428 n: SaSint,
12429 k: SaSint,
12430 rm: SaSint,
12431 i_sample: *mut SaSint,
12432 induction_bucket: *mut SaSint,
12433 threads: SaSint,
12434 );
12435 fn probe_libsais16_final_sorting_scan_right_to_left_16u_omp(
12436 t: *const u16,
12437 sa: *mut SaSint,
12438 omp_block_start: SaSint,
12439 omp_block_size: SaSint,
12440 k: SaSint,
12441 induction_bucket: *mut SaSint,
12442 threads: SaSint,
12443 );
12444 fn probe_libsais16_final_gsa_scan_right_to_left_16u_omp(
12445 t: *const u16,
12446 sa: *mut SaSint,
12447 omp_block_start: SaSint,
12448 omp_block_size: SaSint,
12449 k: SaSint,
12450 induction_bucket: *mut SaSint,
12451 threads: SaSint,
12452 );
12453 fn probe_libsais16_bwt_copy_16u_omp(
12454 u: *mut u16,
12455 a: *mut SaSint,
12456 n: SaSint,
12457 threads: SaSint,
12458 );
12459 fn probe_libsais16_gather_marked_lms_suffixes(
12460 sa: *mut SaSint,
12461 m: SaSint,
12462 l: SaSint,
12463 omp_block_start: SaSint,
12464 omp_block_size: SaSint,
12465 ) -> SaSint;
12466 fn probe_libsais16_gather_marked_lms_suffixes_omp(
12467 sa: *mut SaSint,
12468 n: SaSint,
12469 m: SaSint,
12470 fs: SaSint,
12471 threads: SaSint,
12472 );
12473 fn probe_libsais16_renumber_and_gather_lms_suffixes_omp(
12474 sa: *mut SaSint,
12475 n: SaSint,
12476 m: SaSint,
12477 fs: SaSint,
12478 threads: SaSint,
12479 ) -> SaSint;
12480 fn probe_libsais16_reconstruct_lms_suffixes(
12481 sa: *mut SaSint,
12482 n: SaSint,
12483 m: SaSint,
12484 omp_block_start: SaSint,
12485 omp_block_size: SaSint,
12486 );
12487 fn probe_libsais16_reconstruct_lms_suffixes_omp(
12488 sa: *mut SaSint,
12489 n: SaSint,
12490 m: SaSint,
12491 threads: SaSint,
12492 );
12493 fn probe_libsais16_renumber_distinct_lms_suffixes_32s_4k(
12494 sa: *mut SaSint,
12495 m: SaSint,
12496 name: SaSint,
12497 omp_block_start: SaSint,
12498 omp_block_size: SaSint,
12499 ) -> SaSint;
12500 fn probe_libsais16_mark_distinct_lms_suffixes_32s(
12501 sa: *mut SaSint,
12502 m: SaSint,
12503 omp_block_start: SaSint,
12504 omp_block_size: SaSint,
12505 );
12506 fn probe_libsais16_clamp_lms_suffixes_length_32s(
12507 sa: *mut SaSint,
12508 m: SaSint,
12509 omp_block_start: SaSint,
12510 omp_block_size: SaSint,
12511 );
12512 fn probe_libsais16_renumber_distinct_lms_suffixes_32s_4k_omp(
12513 sa: *mut SaSint,
12514 m: SaSint,
12515 threads: SaSint,
12516 ) -> SaSint;
12517 fn probe_libsais16_mark_distinct_lms_suffixes_32s_omp(
12518 sa: *mut SaSint,
12519 n: SaSint,
12520 m: SaSint,
12521 threads: SaSint,
12522 );
12523 fn probe_libsais16_clamp_lms_suffixes_length_32s_omp(
12524 sa: *mut SaSint,
12525 n: SaSint,
12526 m: SaSint,
12527 threads: SaSint,
12528 );
12529 fn probe_libsais16_renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(
12530 sa: *mut SaSint,
12531 n: SaSint,
12532 m: SaSint,
12533 threads: SaSint,
12534 ) -> SaSint;
12535 fn probe_libsais16_renumber_unique_and_nonunique_lms_suffixes_32s(
12536 t: *mut SaSint,
12537 sa: *mut SaSint,
12538 m: SaSint,
12539 f: SaSint,
12540 omp_block_start: SaSint,
12541 omp_block_size: SaSint,
12542 ) -> SaSint;
12543 fn probe_libsais16_compact_unique_and_nonunique_lms_suffixes_32s(
12544 sa: *mut SaSint,
12545 m: SaSint,
12546 pl: *mut SaSint,
12547 pr: *mut SaSint,
12548 omp_block_start: SaSint,
12549 omp_block_size: SaSint,
12550 );
12551 fn probe_libsais16_renumber_unique_and_nonunique_lms_suffixes_32s_omp(
12552 t: *mut SaSint,
12553 sa: *mut SaSint,
12554 m: SaSint,
12555 threads: SaSint,
12556 ) -> SaSint;
12557 fn probe_libsais16_compact_unique_and_nonunique_lms_suffixes_32s_omp(
12558 sa: *mut SaSint,
12559 n: SaSint,
12560 m: SaSint,
12561 fs: SaSint,
12562 f: SaSint,
12563 threads: SaSint,
12564 );
12565 fn probe_libsais16_compact_lms_suffixes_32s_omp(
12566 t: *mut SaSint,
12567 sa: *mut SaSint,
12568 n: SaSint,
12569 m: SaSint,
12570 fs: SaSint,
12571 threads: SaSint,
12572 ) -> SaSint;
12573 fn probe_libsais16_merge_unique_lms_suffixes_32s(
12574 t: *mut SaSint,
12575 sa: *mut SaSint,
12576 n: SaSint,
12577 m: SaSint,
12578 l: SaSint,
12579 omp_block_start: SaSint,
12580 omp_block_size: SaSint,
12581 );
12582 fn probe_libsais16_merge_nonunique_lms_suffixes_32s(
12583 sa: *mut SaSint,
12584 n: SaSint,
12585 m: SaSint,
12586 l: SaSint,
12587 omp_block_start: SaSint,
12588 omp_block_size: SaSint,
12589 );
12590 fn probe_libsais16_merge_unique_lms_suffixes_32s_omp(
12591 t: *mut SaSint,
12592 sa: *mut SaSint,
12593 n: SaSint,
12594 m: SaSint,
12595 threads: SaSint,
12596 );
12597 fn probe_libsais16_merge_nonunique_lms_suffixes_32s_omp(
12598 sa: *mut SaSint,
12599 n: SaSint,
12600 m: SaSint,
12601 f: SaSint,
12602 threads: SaSint,
12603 );
12604 fn probe_libsais16_merge_compacted_lms_suffixes_32s_omp(
12605 t: *mut SaSint,
12606 sa: *mut SaSint,
12607 n: SaSint,
12608 m: SaSint,
12609 f: SaSint,
12610 threads: SaSint,
12611 );
12612 fn probe_libsais16_radix_sort_lms_suffixes_32s_6k(
12613 t: *const SaSint,
12614 sa: *mut SaSint,
12615 induction_bucket: *mut SaSint,
12616 omp_block_start: SaSint,
12617 omp_block_size: SaSint,
12618 );
12619 fn probe_libsais16_radix_sort_lms_suffixes_32s_2k(
12620 t: *const SaSint,
12621 sa: *mut SaSint,
12622 induction_bucket: *mut SaSint,
12623 omp_block_start: SaSint,
12624 omp_block_size: SaSint,
12625 );
12626 fn probe_libsais16_radix_sort_lms_suffixes_32s_6k_omp(
12627 t: *const SaSint,
12628 sa: *mut SaSint,
12629 n: SaSint,
12630 m: SaSint,
12631 induction_bucket: *mut SaSint,
12632 threads: SaSint,
12633 );
12634 fn probe_libsais16_radix_sort_lms_suffixes_32s_2k_omp(
12635 t: *const SaSint,
12636 sa: *mut SaSint,
12637 n: SaSint,
12638 m: SaSint,
12639 induction_bucket: *mut SaSint,
12640 threads: SaSint,
12641 );
12642 fn probe_libsais16_radix_sort_lms_suffixes_32s_1k(
12643 t: *const SaSint,
12644 sa: *mut SaSint,
12645 n: SaSint,
12646 buckets: *mut SaSint,
12647 ) -> SaSint;
12648 fn probe_libsais16_radix_sort_set_markers_32s_6k(
12649 sa: *mut SaSint,
12650 induction_bucket: *mut SaSint,
12651 omp_block_start: SaSint,
12652 omp_block_size: SaSint,
12653 );
12654 fn probe_libsais16_radix_sort_set_markers_32s_4k(
12655 sa: *mut SaSint,
12656 induction_bucket: *mut SaSint,
12657 omp_block_start: SaSint,
12658 omp_block_size: SaSint,
12659 );
12660 fn probe_libsais16_radix_sort_set_markers_32s_6k_omp(
12661 sa: *mut SaSint,
12662 k: SaSint,
12663 induction_bucket: *mut SaSint,
12664 threads: SaSint,
12665 );
12666 fn probe_libsais16_radix_sort_set_markers_32s_4k_omp(
12667 sa: *mut SaSint,
12668 k: SaSint,
12669 induction_bucket: *mut SaSint,
12670 threads: SaSint,
12671 );
12672 fn probe_libsais16_place_lms_suffixes_histogram_32s_6k(
12673 sa: *mut SaSint,
12674 n: SaSint,
12675 k: SaSint,
12676 m: SaSint,
12677 buckets: *const SaSint,
12678 );
12679 fn probe_libsais16_place_lms_suffixes_histogram_32s_4k(
12680 sa: *mut SaSint,
12681 n: SaSint,
12682 k: SaSint,
12683 m: SaSint,
12684 buckets: *const SaSint,
12685 );
12686 fn probe_libsais16_place_lms_suffixes_histogram_32s_2k(
12687 sa: *mut SaSint,
12688 n: SaSint,
12689 k: SaSint,
12690 m: SaSint,
12691 buckets: *const SaSint,
12692 );
12693 fn probe_libsais16_gather_lms_suffixes_32s(
12694 t: *const SaSint,
12695 sa: *mut SaSint,
12696 n: SaSint,
12697 ) -> SaSint;
12698 fn probe_libsais16_gather_compacted_lms_suffixes_32s(
12699 t: *const SaSint,
12700 sa: *mut SaSint,
12701 n: SaSint,
12702 ) -> SaSint;
12703 fn probe_libsais16_count_lms_suffixes_32s_2k(
12704 t: *const SaSint,
12705 n: SaSint,
12706 k: SaSint,
12707 buckets: *mut SaSint,
12708 );
12709 fn probe_libsais16_count_and_gather_lms_suffixes_32s_4k(
12710 t: *const SaSint,
12711 sa: *mut SaSint,
12712 n: SaSint,
12713 k: SaSint,
12714 buckets: *mut SaSint,
12715 omp_block_start: SaSint,
12716 omp_block_size: SaSint,
12717 ) -> SaSint;
12718 fn probe_libsais16_count_and_gather_lms_suffixes_32s_4k_omp(
12719 t: *const SaSint,
12720 sa: *mut SaSint,
12721 n: SaSint,
12722 k: SaSint,
12723 buckets: *mut SaSint,
12724 local_buckets: SaSint,
12725 threads: SaSint,
12726 ) -> SaSint;
12727 fn probe_libsais16_count_suffixes_32s(
12728 t: *const SaSint,
12729 n: SaSint,
12730 k: SaSint,
12731 buckets: *mut SaSint,
12732 );
12733 fn probe_libsais16_initialize_buckets_start_and_end_32s_6k(k: SaSint, buckets: *mut SaSint);
12734 fn probe_libsais16_initialize_buckets_start_and_end_32s_4k(k: SaSint, buckets: *mut SaSint);
12735 fn probe_libsais16_initialize_buckets_end_32s_2k(k: SaSint, buckets: *mut SaSint);
12736 fn probe_libsais16_initialize_buckets_start_and_end_32s_2k(k: SaSint, buckets: *mut SaSint);
12737 fn probe_libsais16_initialize_buckets_start_32s_1k(k: SaSint, buckets: *mut SaSint);
12738 fn probe_libsais16_initialize_buckets_end_32s_1k(k: SaSint, buckets: *mut SaSint);
12739 fn probe_libsais16_initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(
12740 t: *const SaSint,
12741 k: SaSint,
12742 buckets: *mut SaSint,
12743 first_lms_suffix: SaSint,
12744 );
12745 fn probe_libsais16_initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(
12746 t: *const SaSint,
12747 k: SaSint,
12748 buckets: *mut SaSint,
12749 first_lms_suffix: SaSint,
12750 ) -> SaSint;
12751 fn probe_libsais16_initialize_buckets_for_radix_and_partial_sorting_32s_4k(
12752 t: *const SaSint,
12753 k: SaSint,
12754 buckets: *mut SaSint,
12755 first_lms_suffix: SaSint,
12756 );
12757 fn probe_libsais16_place_lms_suffixes_interval_32s_4k(
12758 sa: *mut SaSint,
12759 n: SaSint,
12760 k: SaSint,
12761 m: SaSint,
12762 buckets: *const SaSint,
12763 );
12764 fn probe_libsais16_place_lms_suffixes_interval_32s_2k(
12765 sa: *mut SaSint,
12766 n: SaSint,
12767 k: SaSint,
12768 m: SaSint,
12769 buckets: *const SaSint,
12770 );
12771 fn probe_libsais16_place_lms_suffixes_interval_32s_1k(
12772 t: *const SaSint,
12773 sa: *mut SaSint,
12774 k: SaSint,
12775 m: SaSint,
12776 buckets: *mut SaSint,
12777 );
12778 fn probe_libsais16_renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(
12779 t: *mut SaSint,
12780 sa: *mut SaSint,
12781 n: SaSint,
12782 m: SaSint,
12783 threads: SaSint,
12784 ) -> SaSint;
12785 fn probe_libsais16_partial_sorting_shift_markers_32s_6k_omp(
12786 sa: *mut SaSint,
12787 k: SaSint,
12788 buckets: *const SaSint,
12789 threads: SaSint,
12790 );
12791 fn probe_libsais16_partial_sorting_shift_markers_32s_4k(sa: *mut SaSint, n: SaSint);
12792 fn probe_libsais16_partial_sorting_shift_buckets_32s_6k(k: SaSint, buckets: *mut SaSint);
12793 fn probe_libsais16_partial_sorting_scan_left_to_right_32s_6k(
12794 t: *const SaSint,
12795 sa: *mut SaSint,
12796 buckets: *mut SaSint,
12797 d: SaSint,
12798 omp_block_start: SaSint,
12799 omp_block_size: SaSint,
12800 ) -> SaSint;
12801 fn probe_libsais16_partial_sorting_scan_left_to_right_32s_4k(
12802 t: *const SaSint,
12803 sa: *mut SaSint,
12804 k: SaSint,
12805 buckets: *mut SaSint,
12806 d: SaSint,
12807 omp_block_start: SaSint,
12808 omp_block_size: SaSint,
12809 ) -> SaSint;
12810 fn probe_libsais16_partial_sorting_scan_left_to_right_32s_1k(
12811 t: *const SaSint,
12812 sa: *mut SaSint,
12813 buckets: *mut SaSint,
12814 omp_block_start: SaSint,
12815 omp_block_size: SaSint,
12816 );
12817 fn probe_libsais16_partial_sorting_scan_left_to_right_32s_6k_omp(
12818 t: *const SaSint,
12819 sa: *mut SaSint,
12820 n: SaSint,
12821 buckets: *mut SaSint,
12822 left_suffixes_count: SaSint,
12823 d: SaSint,
12824 threads: SaSint,
12825 ) -> SaSint;
12826 fn probe_libsais16_partial_sorting_scan_left_to_right_32s_4k_omp(
12827 t: *const SaSint,
12828 sa: *mut SaSint,
12829 n: SaSint,
12830 k: SaSint,
12831 buckets: *mut SaSint,
12832 d: SaSint,
12833 threads: SaSint,
12834 ) -> SaSint;
12835 fn probe_libsais16_partial_sorting_scan_left_to_right_32s_1k_omp(
12836 t: *const SaSint,
12837 sa: *mut SaSint,
12838 n: SaSint,
12839 buckets: *mut SaSint,
12840 threads: SaSint,
12841 );
12842 fn probe_libsais16_partial_sorting_scan_right_to_left_32s_6k(
12843 t: *const SaSint,
12844 sa: *mut SaSint,
12845 buckets: *mut SaSint,
12846 d: SaSint,
12847 omp_block_start: SaSint,
12848 omp_block_size: SaSint,
12849 ) -> SaSint;
12850 fn probe_libsais16_partial_sorting_scan_right_to_left_32s_4k(
12851 t: *const SaSint,
12852 sa: *mut SaSint,
12853 k: SaSint,
12854 buckets: *mut SaSint,
12855 d: SaSint,
12856 omp_block_start: SaSint,
12857 omp_block_size: SaSint,
12858 ) -> SaSint;
12859 fn probe_libsais16_partial_sorting_scan_right_to_left_32s_1k(
12860 t: *const SaSint,
12861 sa: *mut SaSint,
12862 buckets: *mut SaSint,
12863 omp_block_start: SaSint,
12864 omp_block_size: SaSint,
12865 );
12866 fn probe_libsais16_partial_sorting_scan_right_to_left_32s_6k_omp(
12867 t: *const SaSint,
12868 sa: *mut SaSint,
12869 n: SaSint,
12870 buckets: *mut SaSint,
12871 first_lms_suffix: SaSint,
12872 left_suffixes_count: SaSint,
12873 d: SaSint,
12874 threads: SaSint,
12875 ) -> SaSint;
12876 fn probe_libsais16_partial_sorting_scan_right_to_left_32s_4k_omp(
12877 t: *const SaSint,
12878 sa: *mut SaSint,
12879 n: SaSint,
12880 k: SaSint,
12881 buckets: *mut SaSint,
12882 d: SaSint,
12883 threads: SaSint,
12884 ) -> SaSint;
12885 fn probe_libsais16_partial_sorting_scan_right_to_left_32s_1k_omp(
12886 t: *const SaSint,
12887 sa: *mut SaSint,
12888 n: SaSint,
12889 buckets: *mut SaSint,
12890 threads: SaSint,
12891 );
12892 fn probe_libsais16_partial_sorting_gather_lms_suffixes_32s_4k(
12893 sa: *mut SaSint,
12894 omp_block_start: SaSint,
12895 omp_block_size: SaSint,
12896 ) -> SaSint;
12897 fn probe_libsais16_partial_sorting_gather_lms_suffixes_32s_1k(
12898 sa: *mut SaSint,
12899 omp_block_start: SaSint,
12900 omp_block_size: SaSint,
12901 ) -> SaSint;
12902 fn probe_libsais16_partial_sorting_gather_lms_suffixes_32s_4k_omp(
12903 sa: *mut SaSint,
12904 n: SaSint,
12905 threads: SaSint,
12906 );
12907 fn probe_libsais16_partial_sorting_gather_lms_suffixes_32s_1k_omp(
12908 sa: *mut SaSint,
12909 n: SaSint,
12910 threads: SaSint,
12911 );
12912 fn probe_libsais16_count_and_gather_lms_suffixes_32s_2k(
12913 t: *const SaSint,
12914 sa: *mut SaSint,
12915 n: SaSint,
12916 k: SaSint,
12917 buckets: *mut SaSint,
12918 omp_block_start: SaSint,
12919 omp_block_size: SaSint,
12920 ) -> SaSint;
12921 fn probe_libsais16_count_and_gather_compacted_lms_suffixes_32s_2k(
12922 t: *const SaSint,
12923 sa: *mut SaSint,
12924 n: SaSint,
12925 k: SaSint,
12926 buckets: *mut SaSint,
12927 omp_block_start: SaSint,
12928 omp_block_size: SaSint,
12929 ) -> SaSint;
12930 fn probe_libsais16_count_and_gather_lms_suffixes_32s_2k_omp(
12931 t: *const SaSint,
12932 sa: *mut SaSint,
12933 n: SaSint,
12934 k: SaSint,
12935 buckets: *mut SaSint,
12936 local_buckets: SaSint,
12937 threads: SaSint,
12938 ) -> SaSint;
12939 fn probe_libsais16_count_and_gather_compacted_lms_suffixes_32s_2k_omp(
12940 t: *const SaSint,
12941 sa: *mut SaSint,
12942 n: SaSint,
12943 k: SaSint,
12944 buckets: *mut SaSint,
12945 local_buckets: SaSint,
12946 threads: SaSint,
12947 );
12948 fn probe_libsais16_reconstruct_compacted_lms_suffixes_32s_2k_omp(
12949 t: *mut SaSint,
12950 sa: *mut SaSint,
12951 n: SaSint,
12952 k: SaSint,
12953 m: SaSint,
12954 fs: SaSint,
12955 f: SaSint,
12956 buckets: *mut SaSint,
12957 local_buckets: SaSint,
12958 threads: SaSint,
12959 );
12960 fn probe_libsais16_reconstruct_compacted_lms_suffixes_32s_1k_omp(
12961 t: *mut SaSint,
12962 sa: *mut SaSint,
12963 n: SaSint,
12964 m: SaSint,
12965 fs: SaSint,
12966 f: SaSint,
12967 threads: SaSint,
12968 );
12969 }
12970
12971 fn brute_sa(t: &[u16]) -> Vec<SaSint> {
12972 let mut sa: Vec<_> = (0..t.len() as SaSint).collect();
12973 sa.sort_by(|&a, &b| t[a as usize..].cmp(&t[b as usize..]));
12974 sa
12975 }
12976
12977 #[test]
12978 fn libsais16_gather_lms_suffixes_16u_matches_c() {
12979 let cases: &[&[u16]] = &[
12980 &[2, 1, 3, 1, 2, 0],
12981 &[7, 7, 7, 7, 0],
12982 &[3, 1, 2, 1, 0, 4, 1, 0],
12983 &[9, 1, 9, 1, 9, 0, 2, 2, 0],
12984 ];
12985
12986 for &text in cases {
12987 let n = text.len() as SaSint;
12988 let mut rust_sa = vec![-99; text.len()];
12989 let mut c_sa = rust_sa.clone();
12990
12991 gather_lms_suffixes_16u(text, &mut rust_sa, n, n - 1, 0, n);
12992 unsafe {
12993 probe_libsais16_gather_lms_suffixes_16u(
12994 text.as_ptr(),
12995 c_sa.as_mut_ptr(),
12996 n,
12997 n - 1,
12998 0,
12999 n,
13000 );
13001 }
13002
13003 assert_eq!(rust_sa, c_sa);
13004 }
13005 }
13006
13007 #[test]
13008 fn libsais16_count_and_gather_lms_suffixes_16u_matches_c() {
13009 let cases: &[&[u16]] = &[
13010 &[2, 1, 3, 1, 2, 0],
13011 &[7, 7, 7, 7, 0],
13012 &[3, 1, 2, 1, 0, 4, 1, 0],
13013 &[9, 1, 9, 1, 9, 0, 2, 2, 0],
13014 ];
13015
13016 for &text in cases {
13017 let n = text.len() as SaSint;
13018 let mut rust_sa = vec![-99; text.len()];
13019 let mut c_sa = rust_sa.clone();
13020 let mut rust_buckets = vec![-1; 4 * ALPHABET_SIZE];
13021 let mut c_buckets = rust_buckets.clone();
13022
13023 let rust_m =
13024 count_and_gather_lms_suffixes_16u(text, &mut rust_sa, n, &mut rust_buckets, 0, n);
13025 let c_m = unsafe {
13026 probe_libsais16_count_and_gather_lms_suffixes_16u(
13027 text.as_ptr(),
13028 c_sa.as_mut_ptr(),
13029 n,
13030 c_buckets.as_mut_ptr(),
13031 0,
13032 n,
13033 )
13034 };
13035
13036 assert_eq!(rust_m, c_m);
13037 assert_eq!(rust_sa, c_sa);
13038 assert_eq!(rust_buckets, c_buckets);
13039 }
13040 }
13041
13042 #[test]
13043 fn libsais16_initialize_buckets_start_and_end_16u_matches_c() {
13044 let mut rust_buckets = vec![0; 8 * ALPHABET_SIZE];
13045 for (symbol, counts) in [
13046 (0usize, [1, 0, 0, 2]),
13047 (1, [0, 3, 1, 0]),
13048 (7, [2, 1, 0, 1]),
13049 (1024, [0, 0, 5, 0]),
13050 ] {
13051 for state in 0..4 {
13052 rust_buckets[buckets_index4(symbol, state)] = counts[state];
13053 }
13054 }
13055 let mut c_buckets = rust_buckets.clone();
13056 let mut rust_freq = vec![-1; ALPHABET_SIZE];
13057 let mut c_freq = rust_freq.clone();
13058
13059 let rust_k = initialize_buckets_start_and_end_16u(&mut rust_buckets, Some(&mut rust_freq));
13060 let c_k = unsafe {
13061 probe_libsais16_initialize_buckets_start_and_end_16u(
13062 c_buckets.as_mut_ptr(),
13063 c_freq.as_mut_ptr(),
13064 )
13065 };
13066
13067 assert_eq!(rust_k, c_k);
13068 assert_eq!(rust_buckets, c_buckets);
13069 assert_eq!(rust_freq, c_freq);
13070
13071 let mut rust_buckets_no_freq = vec![0; 8 * ALPHABET_SIZE];
13072 rust_buckets_no_freq[..4 * ALPHABET_SIZE]
13073 .copy_from_slice(&rust_buckets[..4 * ALPHABET_SIZE]);
13074 let mut c_buckets_no_freq = rust_buckets_no_freq.clone();
13075
13076 let rust_k = initialize_buckets_start_and_end_16u(&mut rust_buckets_no_freq, None);
13077 let c_k = unsafe {
13078 probe_libsais16_initialize_buckets_start_and_end_16u(
13079 c_buckets_no_freq.as_mut_ptr(),
13080 std::ptr::null_mut(),
13081 )
13082 };
13083
13084 assert_eq!(rust_k, c_k);
13085 assert_eq!(rust_buckets_no_freq, c_buckets_no_freq);
13086 }
13087
13088 #[test]
13089 fn libsais16_lms_radix_bucket_initialization_matches_c() {
13090 let text = [3, 1, 2, 1, 0, 4, 1, 0];
13091 let n = text.len() as SaSint;
13092 let mut rust_sa = vec![-99; text.len()];
13093 let mut rust_buckets = vec![0; 8 * ALPHABET_SIZE];
13094 let m = count_and_gather_lms_suffixes_16u(
13095 &text,
13096 &mut rust_sa,
13097 n,
13098 &mut rust_buckets[..4 * ALPHABET_SIZE],
13099 0,
13100 n,
13101 );
13102 initialize_buckets_start_and_end_16u(&mut rust_buckets, None);
13103 let first_lms_suffix = rust_sa[(n - m) as usize];
13104
13105 let mut c_buckets = rust_buckets.clone();
13106 let rust_count = initialize_buckets_for_lms_suffixes_radix_sort_16u(
13107 &text,
13108 &mut rust_buckets,
13109 first_lms_suffix,
13110 );
13111 let c_count = unsafe {
13112 probe_libsais16_initialize_buckets_for_lms_suffixes_radix_sort_16u(
13113 text.as_ptr(),
13114 c_buckets.as_mut_ptr(),
13115 first_lms_suffix,
13116 )
13117 };
13118
13119 assert_eq!(rust_count, c_count);
13120 assert_eq!(rust_buckets, c_buckets);
13121 }
13122
13123 #[test]
13124 fn libsais16_radix_sort_lms_suffixes_16u_matches_c() {
13125 let text = [3, 1, 2, 1, 0, 4, 1, 0];
13126 let n = text.len() as SaSint;
13127 let mut rust_sa = vec![-99; text.len()];
13128 let mut rust_buckets = vec![0; 8 * ALPHABET_SIZE];
13129 let m = count_and_gather_lms_suffixes_16u(
13130 &text,
13131 &mut rust_sa,
13132 n,
13133 &mut rust_buckets[..4 * ALPHABET_SIZE],
13134 0,
13135 n,
13136 );
13137 initialize_buckets_start_and_end_16u(&mut rust_buckets, None);
13138 let first_lms_suffix = rust_sa[(n - m) as usize];
13139 initialize_buckets_for_lms_suffixes_radix_sort_16u(
13140 &text,
13141 &mut rust_buckets,
13142 first_lms_suffix,
13143 );
13144
13145 let mut c_sa = rust_sa.clone();
13146 let mut c_buckets = rust_buckets.clone();
13147 {
13148 let induction_bucket = &mut rust_buckets[4 * ALPHABET_SIZE..];
13149 radix_sort_lms_suffixes_16u(&text, &mut rust_sa, induction_bucket, n - m + 1, m - 1);
13150 }
13151 unsafe {
13152 probe_libsais16_radix_sort_lms_suffixes_16u(
13153 text.as_ptr(),
13154 c_sa.as_mut_ptr(),
13155 c_buckets[4 * ALPHABET_SIZE..].as_mut_ptr(),
13156 n - m + 1,
13157 m - 1,
13158 );
13159 }
13160
13161 assert_eq!(rust_sa, c_sa);
13162 assert_eq!(rust_buckets, c_buckets);
13163 }
13164
13165 #[test]
13166 fn libsais16_initialize_buckets_for_partial_sorting_16u_matches_c() {
13167 let text = [3, 1, 2, 1, 0, 4, 1, 0];
13168 let n = text.len() as SaSint;
13169 let mut rust_sa = vec![-99; text.len()];
13170 let mut rust_buckets = vec![0; 8 * ALPHABET_SIZE];
13171 let m = count_and_gather_lms_suffixes_16u(
13172 &text,
13173 &mut rust_sa,
13174 n,
13175 &mut rust_buckets[..4 * ALPHABET_SIZE],
13176 0,
13177 n,
13178 );
13179 initialize_buckets_start_and_end_16u(&mut rust_buckets, None);
13180 let first_lms_suffix = rust_sa[(n - m) as usize];
13181 let left_suffixes_count = initialize_buckets_for_lms_suffixes_radix_sort_16u(
13182 &text,
13183 &mut rust_buckets,
13184 first_lms_suffix,
13185 );
13186 let mut c_buckets = rust_buckets.clone();
13187
13188 initialize_buckets_for_partial_sorting_16u(
13189 &text,
13190 &mut rust_buckets,
13191 first_lms_suffix,
13192 left_suffixes_count,
13193 );
13194 unsafe {
13195 probe_libsais16_initialize_buckets_for_partial_sorting_16u(
13196 text.as_ptr(),
13197 c_buckets.as_mut_ptr(),
13198 first_lms_suffix,
13199 left_suffixes_count,
13200 );
13201 }
13202
13203 assert_eq!(rust_buckets, c_buckets);
13204 }
13205
13206 fn partial_scan_fixture() -> ([u16; 10], Vec<SaSint>, Vec<SaSint>) {
13207 let text = [1, 0, 2, 1, 3, 0, 2, 4, 1, 0];
13208 let mut sa = vec![0; 128];
13209 sa[..5].copy_from_slice(&[3, 5 | SAINT_MIN, 7, 2, 9 | SAINT_MIN]);
13210
13211 let mut buckets = vec![0; 6 * ALPHABET_SIZE];
13212 for v in 0..32 {
13213 buckets[v] = 80 + (v as SaSint) * 4;
13214 buckets[2 * ALPHABET_SIZE + v] = if v % 3 == 0 { 2 } else { 0 };
13215 buckets[4 * ALPHABET_SIZE + v] = 20 + (v as SaSint) * 4;
13216 }
13217
13218 (text, sa, buckets)
13219 }
13220
13221 #[test]
13222 fn libsais16_partial_sorting_scan_left_to_right_16u_matches_c() {
13223 let (text, mut rust_sa, mut rust_buckets) = partial_scan_fixture();
13224 let mut c_sa = rust_sa.clone();
13225 let mut c_buckets = rust_buckets.clone();
13226
13227 let rust_d =
13228 partial_sorting_scan_left_to_right_16u(&text, &mut rust_sa, &mut rust_buckets, 3, 0, 5);
13229 let c_d = unsafe {
13230 probe_libsais16_partial_sorting_scan_left_to_right_16u(
13231 text.as_ptr(),
13232 c_sa.as_mut_ptr(),
13233 c_buckets.as_mut_ptr(),
13234 3,
13235 0,
13236 5,
13237 )
13238 };
13239
13240 assert_eq!(rust_d, c_d);
13241 assert_eq!(rust_sa, c_sa);
13242 assert_eq!(rust_buckets, c_buckets);
13243 }
13244
13245 #[test]
13246 fn libsais16_partial_sorting_scan_right_to_left_16u_matches_c() {
13247 let (text, mut rust_sa, mut rust_buckets) = partial_scan_fixture();
13248 let mut c_sa = rust_sa.clone();
13249 let mut c_buckets = rust_buckets.clone();
13250
13251 let rust_d =
13252 partial_sorting_scan_right_to_left_16u(&text, &mut rust_sa, &mut rust_buckets, 3, 0, 5);
13253 let c_d = unsafe {
13254 probe_libsais16_partial_sorting_scan_right_to_left_16u(
13255 text.as_ptr(),
13256 c_sa.as_mut_ptr(),
13257 c_buckets.as_mut_ptr(),
13258 3,
13259 0,
13260 5,
13261 )
13262 };
13263
13264 assert_eq!(rust_d, c_d);
13265 assert_eq!(rust_sa, c_sa);
13266 assert_eq!(rust_buckets, c_buckets);
13267 }
13268
13269 #[test]
13270 fn libsais16_partial_gsa_scan_right_to_left_16u_matches_c() {
13271 let (text, mut rust_sa, mut rust_buckets) = partial_scan_fixture();
13272 let mut c_sa = rust_sa.clone();
13273 let mut c_buckets = rust_buckets.clone();
13274
13275 let rust_d =
13276 partial_gsa_scan_right_to_left_16u(&text, &mut rust_sa, &mut rust_buckets, 3, 0, 5);
13277 let c_d = unsafe {
13278 probe_libsais16_partial_gsa_scan_right_to_left_16u(
13279 text.as_ptr(),
13280 c_sa.as_mut_ptr(),
13281 c_buckets.as_mut_ptr(),
13282 3,
13283 0,
13284 5,
13285 )
13286 };
13287
13288 assert_eq!(rust_d, c_d);
13289 assert_eq!(rust_sa, c_sa);
13290 assert_eq!(rust_buckets, c_buckets);
13291 }
13292
13293 #[test]
13294 fn libsais16_partial_sorting_shift_markers_16u_matches_c() {
13295 let mut rust_sa = vec![0; 16];
13296 rust_sa[2..6].copy_from_slice(&[1, 2 | SAINT_MIN, 3 | SAINT_MIN, 4]);
13297 rust_sa[8..12].copy_from_slice(&[5 | SAINT_MIN, 6, 7 | SAINT_MIN, 8]);
13298 let mut c_sa = rust_sa.clone();
13299
13300 let mut buckets = vec![0; 6 * ALPHABET_SIZE];
13301 buckets[0] = 2;
13302 buckets[2] = 8;
13303 buckets[4 * ALPHABET_SIZE + 2] = 6;
13304 buckets[4 * ALPHABET_SIZE + 4] = 12;
13305
13306 let n = rust_sa.len() as SaSint;
13307 partial_sorting_shift_markers_16u_omp(&mut rust_sa, n, &buckets, 1);
13308 unsafe {
13309 probe_libsais16_partial_sorting_shift_markers_16u_omp(
13310 c_sa.as_mut_ptr(),
13311 c_sa.len() as SaSint,
13312 buckets.as_ptr(),
13313 1,
13314 );
13315 }
13316
13317 assert_eq!(rust_sa, c_sa);
13318 }
13319
13320 #[test]
13321 fn libsais16_partial_left_to_right_16u_block_omp_uses_cache_pipeline() {
13322 let block_size = 65_536usize;
13323 let k = 512usize;
13324 let text: Vec<u16> = (0..block_size + 2)
13325 .map(|i| 1 + ((i * 17 + i / 7) % (k - 1)) as u16)
13326 .collect();
13327 let sa_len = block_size + 2 * k * 100;
13328 let mut base_sa = vec![0; sa_len];
13329 for (i, slot) in base_sa.iter_mut().take(block_size).enumerate() {
13330 *slot = (i + 2) as SaSint;
13331 }
13332 let mut base_buckets = vec![0; 8 * ALPHABET_SIZE];
13333 for v in 0..2 * k {
13334 base_buckets[4 * ALPHABET_SIZE + v] = (block_size + v * 100) as SaSint;
13335 }
13336
13337 let mut scalar_sa = base_sa.clone();
13338 let mut threaded_sa = base_sa;
13339 let mut scalar_buckets = base_buckets.clone();
13340 let mut threaded_buckets = base_buckets;
13341 let mut thread_state = alloc_thread_state(4).unwrap();
13342 let scalar_d = partial_sorting_scan_left_to_right_16u(
13343 &text,
13344 &mut scalar_sa,
13345 &mut scalar_buckets,
13346 0,
13347 0,
13348 block_size as SaSint,
13349 );
13350 let threaded_d = partial_sorting_scan_left_to_right_16u_block_omp(
13351 &text,
13352 &mut threaded_sa,
13353 k as SaSint,
13354 &mut threaded_buckets,
13355 0,
13356 0,
13357 block_size as SaSint,
13358 4,
13359 &mut thread_state,
13360 );
13361
13362 assert_eq!(threaded_d, scalar_d);
13363 assert_eq!(threaded_sa, scalar_sa);
13364 assert_eq!(threaded_buckets, scalar_buckets);
13365 }
13366
13367 #[test]
13368 fn libsais16_partial_left_to_right_16u_omp_uses_block_pipeline() {
13369 let block_size = 65_536usize;
13370 let k = 512usize;
13371 let text: Vec<u16> = (0..block_size + 2)
13372 .map(|i| 1 + ((i * 17 + i / 7) % (k - 1)) as u16)
13373 .collect();
13374 let sa_len = block_size + 2 * k * 100;
13375 let mut base_sa = vec![0; sa_len];
13376 for (i, slot) in base_sa.iter_mut().take(block_size).enumerate() {
13377 let value = (i + 2) as SaSint;
13378 *slot = if i % 17 == 0 {
13379 value | SAINT_MIN
13380 } else {
13381 value
13382 };
13383 }
13384 let mut base_buckets = vec![0; 8 * ALPHABET_SIZE];
13385 for v in 0..2 * k {
13386 base_buckets[4 * ALPHABET_SIZE + v] = (block_size + v * 100) as SaSint;
13387 base_buckets[2 * ALPHABET_SIZE + v] = if v % 5 == 0 { 3 } else { 0 };
13388 }
13389
13390 let mut scalar_sa = base_sa.clone();
13391 let mut threaded_sa = base_sa;
13392 let mut scalar_buckets = base_buckets.clone();
13393 let mut threaded_buckets = base_buckets;
13394 let scalar_d = partial_sorting_scan_left_to_right_16u_omp(
13395 &text,
13396 &mut scalar_sa,
13397 text.len() as SaSint,
13398 k as SaSint,
13399 &mut scalar_buckets,
13400 block_size as SaSint,
13401 7,
13402 1,
13403 );
13404 let threaded_d = partial_sorting_scan_left_to_right_16u_omp(
13405 &text,
13406 &mut threaded_sa,
13407 text.len() as SaSint,
13408 k as SaSint,
13409 &mut threaded_buckets,
13410 block_size as SaSint,
13411 7,
13412 4,
13413 );
13414
13415 assert_eq!(threaded_d, scalar_d);
13416 assert_eq!(threaded_sa, scalar_sa);
13417 assert_eq!(threaded_buckets, scalar_buckets);
13418 }
13419
13420 #[test]
13421 fn libsais16_partial_right_to_left_16u_block_omp_uses_cache_pipeline() {
13422 let block_size = 65_536usize;
13423 let k = 512usize;
13424 let width = 2 * k;
13425 let block_start = width * 200 + 1024;
13426 let text: Vec<u16> = (0..block_size + 2)
13427 .map(|i| 1 + ((i * 17 + i / 7) % (k - 1)) as u16)
13428 .collect();
13429 let sa_len = block_start + block_size + 1;
13430 let mut base_sa = vec![0; sa_len];
13431 for i in 0..block_size {
13432 let value = (i + 2) as SaSint;
13433 base_sa[block_start + i] = if i % 17 == 0 {
13434 value | SAINT_MIN
13435 } else {
13436 value
13437 };
13438 }
13439 let mut base_buckets = vec![0; 8 * ALPHABET_SIZE];
13440 for v in 0..width {
13441 base_buckets[v] = ((v + 1) * 200) as SaSint;
13442 base_buckets[2 * ALPHABET_SIZE + v] = if v % 5 == 0 { 3 } else { 0 };
13443 }
13444
13445 let mut scalar_sa = base_sa.clone();
13446 let mut threaded_sa = base_sa.clone();
13447 let mut scalar_buckets = base_buckets.clone();
13448 let mut threaded_buckets = base_buckets.clone();
13449 let mut thread_state = alloc_thread_state(4).unwrap();
13450 let scalar_d = partial_sorting_scan_right_to_left_16u(
13451 &text,
13452 &mut scalar_sa,
13453 &mut scalar_buckets,
13454 7,
13455 block_start as SaSint,
13456 block_size as SaSint,
13457 );
13458 let threaded_d = partial_sorting_scan_right_to_left_16u_block_omp(
13459 &text,
13460 &mut threaded_sa,
13461 k as SaSint,
13462 &mut threaded_buckets,
13463 7,
13464 block_start as SaSint,
13465 block_size as SaSint,
13466 4,
13467 &mut thread_state,
13468 );
13469 assert_eq!(threaded_d, scalar_d);
13470 assert_eq!(threaded_sa, scalar_sa);
13471 assert_eq!(threaded_buckets, scalar_buckets);
13472
13473 let mut scalar_sa = base_sa;
13474 let mut threaded_sa = scalar_sa.clone();
13475 let mut scalar_buckets = base_buckets.clone();
13476 let mut threaded_buckets = base_buckets;
13477 let scalar_d = partial_gsa_scan_right_to_left_16u(
13478 &text,
13479 &mut scalar_sa,
13480 &mut scalar_buckets,
13481 7,
13482 block_start as SaSint,
13483 block_size as SaSint,
13484 );
13485 let threaded_d = partial_gsa_scan_right_to_left_16u_block_omp(
13486 &text,
13487 &mut threaded_sa,
13488 k as SaSint,
13489 &mut threaded_buckets,
13490 7,
13491 block_start as SaSint,
13492 block_size as SaSint,
13493 4,
13494 &mut thread_state,
13495 );
13496 assert_eq!(threaded_d, scalar_d);
13497 assert_eq!(threaded_sa, scalar_sa);
13498 assert_eq!(threaded_buckets, scalar_buckets);
13499 }
13500
13501 #[test]
13502 fn libsais16_partial_right_to_left_16u_omp_uses_block_pipeline() {
13503 let block_size = 65_536usize;
13504 let k = 512usize;
13505 let width = 2 * k;
13506 let block_start = width * 200 + 1024;
13507 let text: Vec<u16> = (0..block_size + 2)
13508 .map(|i| 1 + ((i * 17 + i / 7) % (k - 1)) as u16)
13509 .collect();
13510 let sa_len = block_start + block_size + 1;
13511 let n = sa_len as SaSint;
13512 let first_lms_suffix = n - (block_start + block_size) as SaSint;
13513 let left_suffixes_count = block_start as SaSint - 1;
13514 let mut base_sa = vec![0; sa_len];
13515 for i in 0..block_size {
13516 let value = (i + 2) as SaSint;
13517 base_sa[block_start + i] = if i % 17 == 0 {
13518 value | SAINT_MIN
13519 } else {
13520 value
13521 };
13522 }
13523 let mut base_buckets = vec![0; 8 * ALPHABET_SIZE];
13524 for v in 0..width {
13525 base_buckets[v] = ((v + 1) * 200) as SaSint;
13526 base_buckets[2 * ALPHABET_SIZE + v] = if v % 5 == 0 { 3 } else { 0 };
13527 }
13528
13529 let mut scalar_sa = base_sa.clone();
13530 let mut threaded_sa = base_sa.clone();
13531 let mut scalar_buckets = base_buckets.clone();
13532 let mut threaded_buckets = base_buckets.clone();
13533 partial_sorting_scan_right_to_left_16u_omp(
13534 &text,
13535 &mut scalar_sa,
13536 n,
13537 k as SaSint,
13538 &mut scalar_buckets,
13539 first_lms_suffix,
13540 left_suffixes_count,
13541 7,
13542 1,
13543 );
13544 partial_sorting_scan_right_to_left_16u_omp(
13545 &text,
13546 &mut threaded_sa,
13547 n,
13548 k as SaSint,
13549 &mut threaded_buckets,
13550 first_lms_suffix,
13551 left_suffixes_count,
13552 7,
13553 4,
13554 );
13555 assert_eq!(threaded_sa, scalar_sa);
13556 assert_eq!(threaded_buckets, scalar_buckets);
13557
13558 let mut scalar_sa = base_sa;
13559 let mut threaded_sa = scalar_sa.clone();
13560 let mut scalar_buckets = base_buckets.clone();
13561 let mut threaded_buckets = base_buckets;
13562 partial_gsa_scan_right_to_left_16u_omp(
13563 &text,
13564 &mut scalar_sa,
13565 n,
13566 k as SaSint,
13567 &mut scalar_buckets,
13568 first_lms_suffix,
13569 left_suffixes_count,
13570 7,
13571 1,
13572 );
13573 partial_gsa_scan_right_to_left_16u_omp(
13574 &text,
13575 &mut threaded_sa,
13576 n,
13577 k as SaSint,
13578 &mut threaded_buckets,
13579 first_lms_suffix,
13580 left_suffixes_count,
13581 7,
13582 4,
13583 );
13584 assert_eq!(threaded_sa, scalar_sa);
13585 assert_eq!(threaded_buckets, scalar_buckets);
13586 }
13587
13588 fn final_scan_fixture() -> ([u16; 10], Vec<SaSint>, Vec<SaSint>) {
13589 let text = [1, 0, 2, 1, 3, 0, 2, 4, 1, 0];
13590 let mut sa = vec![0; 96];
13591 sa[..6].copy_from_slice(&[3, 0, 5 | SAINT_MIN, 7, 2, 9 | SAINT_MIN]);
13592
13593 let mut induction_bucket = vec![0; ALPHABET_SIZE];
13594 for c in 0..8 {
13595 induction_bucket[c] = 24 + (c as SaSint) * 6;
13596 }
13597
13598 (text, sa, induction_bucket)
13599 }
13600
13601 fn final_order_buckets(induction_bucket: &[SaSint]) -> Vec<SaSint> {
13602 let mut buckets = vec![0; 8 * ALPHABET_SIZE];
13603 buckets[6 * ALPHABET_SIZE..7 * ALPHABET_SIZE].copy_from_slice(induction_bucket);
13604 buckets[7 * ALPHABET_SIZE..8 * ALPHABET_SIZE].copy_from_slice(induction_bucket);
13605 buckets
13606 }
13607
13608 #[test]
13609 fn libsais16_final_sorting_scan_left_to_right_16u_matches_c() {
13610 let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
13611 let mut c_sa = rust_sa.clone();
13612 let mut c_bucket = rust_bucket.clone();
13613
13614 final_sorting_scan_left_to_right_16u(&text, &mut rust_sa, &mut rust_bucket, 0, 6);
13615 unsafe {
13616 probe_libsais16_final_sorting_scan_left_to_right_16u(
13617 text.as_ptr(),
13618 c_sa.as_mut_ptr(),
13619 c_bucket.as_mut_ptr(),
13620 0,
13621 6,
13622 );
13623 }
13624
13625 assert_eq!(rust_sa, c_sa);
13626 assert_eq!(rust_bucket, c_bucket);
13627 }
13628
13629 #[test]
13630 fn libsais16_final_sorting_scan_right_to_left_16u_matches_c() {
13631 let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
13632 let mut c_sa = rust_sa.clone();
13633 let mut c_bucket = rust_bucket.clone();
13634
13635 final_sorting_scan_right_to_left_16u(&text, &mut rust_sa, &mut rust_bucket, 0, 6);
13636 unsafe {
13637 probe_libsais16_final_sorting_scan_right_to_left_16u(
13638 text.as_ptr(),
13639 c_sa.as_mut_ptr(),
13640 c_bucket.as_mut_ptr(),
13641 0,
13642 6,
13643 );
13644 }
13645
13646 assert_eq!(rust_sa, c_sa);
13647 assert_eq!(rust_bucket, c_bucket);
13648 }
13649
13650 #[test]
13651 fn libsais16_final_gsa_scan_right_to_left_16u_matches_c() {
13652 let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
13653 let mut c_sa = rust_sa.clone();
13654 let mut c_bucket = rust_bucket.clone();
13655
13656 final_gsa_scan_right_to_left_16u(&text, &mut rust_sa, &mut rust_bucket, 0, 6);
13657 unsafe {
13658 probe_libsais16_final_gsa_scan_right_to_left_16u(
13659 text.as_ptr(),
13660 c_sa.as_mut_ptr(),
13661 c_bucket.as_mut_ptr(),
13662 0,
13663 6,
13664 );
13665 }
13666
13667 assert_eq!(rust_sa, c_sa);
13668 assert_eq!(rust_bucket, c_bucket);
13669 }
13670
13671 #[test]
13672 fn libsais16_final_sorting_32s_helpers_behave_like_upstream_shapes() {
13673 let t = vec![0, 1, 2, 1, 0, 1, 2, 1, 0];
13674
13675 let mut rust_sa = vec![1, 0, 0];
13676 let mut rust_bucket = vec![0, 1, 3];
13677 let mut c_sa = rust_sa.clone();
13678 let mut c_bucket = rust_bucket.clone();
13679 final_sorting_scan_left_to_right_32s(&t, &mut rust_sa, &mut rust_bucket, 0, 1);
13680 unsafe {
13681 probe_libsais16_final_sorting_scan_left_to_right_32s(
13682 t.as_ptr(),
13683 c_sa.as_mut_ptr(),
13684 c_bucket.as_mut_ptr(),
13685 0,
13686 1,
13687 );
13688 }
13689 assert_eq!(rust_sa, c_sa);
13690 assert_eq!(rust_bucket, c_bucket);
13691
13692 let mut rust_sa = vec![0, 2, 0];
13693 let mut rust_bucket = vec![1, 2, 3];
13694 let mut c_sa = rust_sa.clone();
13695 let mut c_bucket = rust_bucket.clone();
13696 final_sorting_scan_right_to_left_32s(&t, &mut rust_sa, &mut rust_bucket, 0, 2);
13697 unsafe {
13698 probe_libsais16_final_sorting_scan_right_to_left_32s(
13699 t.as_ptr(),
13700 c_sa.as_mut_ptr(),
13701 c_bucket.as_mut_ptr(),
13702 0,
13703 2,
13704 );
13705 }
13706 assert_eq!(rust_sa, c_sa);
13707 assert_eq!(rust_bucket, c_bucket);
13708
13709 let mut sa = vec![1, 2, 0, 0];
13710 let mut induction_bucket = vec![0, 1, 3];
13711 let mut cache = vec![ThreadCache::default(); PER_THREAD_CACHE_SIZE];
13712 final_sorting_scan_left_to_right_32s_block_omp(
13713 &t,
13714 &mut sa,
13715 &mut induction_bucket,
13716 &mut cache,
13717 0,
13718 2,
13719 2,
13720 );
13721 assert_eq!(sa[0] & SAINT_MAX, 0);
13722 assert_eq!(sa[1] & SAINT_MAX, 1);
13723 assert_eq!(induction_bucket[0], 1);
13724 assert_eq!(induction_bucket[1], 2);
13725
13726 let mut sa = vec![0, 2, 0, 0];
13727 let mut induction_bucket = vec![1, 2, 3];
13728 let mut cache = vec![ThreadCache::default(); PER_THREAD_CACHE_SIZE];
13729 final_sorting_scan_right_to_left_32s_block_omp(
13730 &t,
13731 &mut sa,
13732 &mut induction_bucket,
13733 &mut cache,
13734 0,
13735 2,
13736 2,
13737 );
13738 assert_eq!(sa[1] & SAINT_MAX, 1);
13739 assert_eq!(induction_bucket[1], 1);
13740 }
13741
13742 #[test]
13743 fn libsais16_final_left_to_right_16u_block_omp_uses_cache_pipeline() {
13744 let block_size = 65_536usize;
13745 let k = 512usize;
13746 let text: Vec<u16> = (0..=block_size).map(|i| 1 + (i % (k - 1)) as u16).collect();
13747 let sa_len = block_size + k * 200;
13748 let mut base_sa = vec![0; sa_len];
13749 for (i, slot) in base_sa.iter_mut().take(block_size).enumerate() {
13750 *slot = (i + 1) as SaSint;
13751 }
13752 let mut base_bucket = vec![0; k];
13753 for c in 0..k {
13754 base_bucket[c] = (block_size + c * 200) as SaSint;
13755 }
13756
13757 let mut scalar_sa = base_sa.clone();
13758 let mut threaded_sa = base_sa.clone();
13759 let mut scalar_bucket = base_bucket.clone();
13760 let mut threaded_bucket = base_bucket.clone();
13761 let mut thread_state = alloc_thread_state(4).unwrap();
13762 final_bwt_scan_left_to_right_16u(
13763 &text,
13764 &mut scalar_sa,
13765 &mut scalar_bucket,
13766 0,
13767 block_size as SaSint,
13768 );
13769 final_bwt_scan_left_to_right_16u_block_omp(
13770 &text,
13771 &mut threaded_sa,
13772 k as SaSint,
13773 &mut threaded_bucket,
13774 0,
13775 block_size as SaSint,
13776 4,
13777 &mut thread_state,
13778 );
13779 assert_eq!(threaded_sa, scalar_sa);
13780 assert_eq!(threaded_bucket, scalar_bucket);
13781
13782 let rm = 3;
13783 let mut scalar_sa = base_sa.clone();
13784 let mut threaded_sa = base_sa.clone();
13785 let mut scalar_bucket = base_bucket.clone();
13786 let mut threaded_bucket = base_bucket.clone();
13787 let mut scalar_i = vec![-1; (block_size / (rm as usize + 1)) + 2];
13788 let mut threaded_i = scalar_i.clone();
13789 final_bwt_aux_scan_left_to_right_16u(
13790 &text,
13791 &mut scalar_sa,
13792 rm,
13793 &mut scalar_i,
13794 &mut scalar_bucket,
13795 0,
13796 block_size as SaSint,
13797 );
13798 final_bwt_aux_scan_left_to_right_16u_block_omp(
13799 &text,
13800 &mut threaded_sa,
13801 k as SaSint,
13802 rm,
13803 &mut threaded_i,
13804 &mut threaded_bucket,
13805 0,
13806 block_size as SaSint,
13807 4,
13808 &mut thread_state,
13809 );
13810 assert_eq!(threaded_sa, scalar_sa);
13811 assert_eq!(threaded_i, scalar_i);
13812 assert_eq!(threaded_bucket, scalar_bucket);
13813
13814 let mut scalar_sa = base_sa;
13815 let mut threaded_sa = scalar_sa.clone();
13816 let mut scalar_bucket = base_bucket.clone();
13817 let mut threaded_bucket = base_bucket;
13818 final_sorting_scan_left_to_right_16u(
13819 &text,
13820 &mut scalar_sa,
13821 &mut scalar_bucket,
13822 0,
13823 block_size as SaSint,
13824 );
13825 final_sorting_scan_left_to_right_16u_block_omp(
13826 &text,
13827 &mut threaded_sa,
13828 k as SaSint,
13829 &mut threaded_bucket,
13830 0,
13831 block_size as SaSint,
13832 4,
13833 &mut thread_state,
13834 );
13835 assert_eq!(threaded_sa, scalar_sa);
13836 assert_eq!(threaded_bucket, scalar_bucket);
13837 }
13838
13839 #[test]
13840 fn libsais16_final_right_to_left_16u_block_omp_uses_cache_pipeline() {
13841 let block_size = 65_536usize;
13842 let k = 512usize;
13843 let block_start = k * 200 + 1024;
13844 let text: Vec<u16> = (0..=block_size + 1)
13845 .map(|i| 1 + (i % (k - 1)) as u16)
13846 .collect();
13847 let sa_len = block_start + block_size + 1;
13848 let mut base_sa = vec![0; sa_len];
13849 for i in 0..block_size {
13850 base_sa[block_start + i] = (i + 1) as SaSint;
13851 }
13852 let mut base_bucket = vec![0; k];
13853 for c in 0..k {
13854 base_bucket[c] = ((c + 1) * 200) as SaSint;
13855 }
13856
13857 let mut scalar_sa = base_sa.clone();
13858 let mut threaded_sa = base_sa.clone();
13859 let mut scalar_bucket = base_bucket.clone();
13860 let mut threaded_bucket = base_bucket.clone();
13861 let mut thread_state = alloc_thread_state(4).unwrap();
13862 final_bwt_scan_right_to_left_16u(
13863 &text,
13864 &mut scalar_sa,
13865 &mut scalar_bucket,
13866 block_start as SaSint,
13867 block_size as SaSint,
13868 );
13869 final_bwt_scan_right_to_left_16u_block_omp(
13870 &text,
13871 &mut threaded_sa,
13872 k as SaSint,
13873 &mut threaded_bucket,
13874 block_start as SaSint,
13875 block_size as SaSint,
13876 4,
13877 &mut thread_state,
13878 );
13879 assert_eq!(threaded_sa, scalar_sa);
13880 assert_eq!(threaded_bucket, scalar_bucket);
13881
13882 let rm = 3;
13883 let mut scalar_sa = base_sa.clone();
13884 let mut threaded_sa = base_sa.clone();
13885 let mut scalar_bucket = base_bucket.clone();
13886 let mut threaded_bucket = base_bucket.clone();
13887 let mut scalar_i = vec![-1; (block_size / (rm as usize + 1)) + 2];
13888 let mut threaded_i = scalar_i.clone();
13889 final_bwt_aux_scan_right_to_left_16u(
13890 &text,
13891 &mut scalar_sa,
13892 rm,
13893 &mut scalar_i,
13894 &mut scalar_bucket,
13895 block_start as SaSint,
13896 block_size as SaSint,
13897 );
13898 final_bwt_aux_scan_right_to_left_16u_block_omp(
13899 &text,
13900 &mut threaded_sa,
13901 k as SaSint,
13902 rm,
13903 &mut threaded_i,
13904 &mut threaded_bucket,
13905 block_start as SaSint,
13906 block_size as SaSint,
13907 4,
13908 &mut thread_state,
13909 );
13910 assert_eq!(threaded_sa, scalar_sa);
13911 assert_eq!(threaded_i, scalar_i);
13912 assert_eq!(threaded_bucket, scalar_bucket);
13913
13914 let mut scalar_sa = base_sa.clone();
13915 let mut threaded_sa = base_sa.clone();
13916 let mut scalar_bucket = base_bucket.clone();
13917 let mut threaded_bucket = base_bucket.clone();
13918 final_sorting_scan_right_to_left_16u(
13919 &text,
13920 &mut scalar_sa,
13921 &mut scalar_bucket,
13922 block_start as SaSint,
13923 block_size as SaSint,
13924 );
13925 final_sorting_scan_right_to_left_16u_block_omp(
13926 &text,
13927 &mut threaded_sa,
13928 k as SaSint,
13929 &mut threaded_bucket,
13930 block_start as SaSint,
13931 block_size as SaSint,
13932 4,
13933 &mut thread_state,
13934 );
13935 assert_eq!(threaded_sa, scalar_sa);
13936 assert_eq!(threaded_bucket, scalar_bucket);
13937
13938 let mut scalar_sa = base_sa;
13939 let mut threaded_sa = scalar_sa.clone();
13940 let mut scalar_bucket = base_bucket.clone();
13941 let mut threaded_bucket = base_bucket;
13942 final_gsa_scan_right_to_left_16u(
13943 &text,
13944 &mut scalar_sa,
13945 &mut scalar_bucket,
13946 block_start as SaSint,
13947 block_size as SaSint,
13948 );
13949 final_gsa_scan_right_to_left_16u_block_omp(
13950 &text,
13951 &mut threaded_sa,
13952 k as SaSint,
13953 &mut threaded_bucket,
13954 block_start as SaSint,
13955 block_size as SaSint,
13956 4,
13957 &mut thread_state,
13958 );
13959 assert_eq!(threaded_sa, scalar_sa);
13960 assert_eq!(threaded_bucket, scalar_bucket);
13961 }
13962
13963 #[test]
13964 fn libsais16_clear_lms_suffixes_omp_zeroes_requested_bucket_ranges() {
13965 let mut rust_sa = vec![5, 4, 3, 2, 1, 9];
13966 let mut c_sa = rust_sa.clone();
13967 let n = rust_sa.len() as SaSint;
13968 let mut bucket_start = vec![1, 4, 5];
13969 let mut bucket_end = vec![3, 5, 5];
13970
13971 clear_lms_suffixes_omp(&mut rust_sa, n, 3, &bucket_start, &bucket_end, 2);
13972 unsafe {
13973 probe_libsais16_clear_lms_suffixes_omp(
13974 c_sa.as_mut_ptr(),
13975 n,
13976 3,
13977 bucket_start.as_mut_ptr(),
13978 bucket_end.as_mut_ptr(),
13979 2,
13980 );
13981 }
13982
13983 assert_eq!(rust_sa, c_sa);
13984 }
13985
13986 #[test]
13987 fn libsais16_partial_order_wrapper_helpers_match_manual_sequence() {
13988 let mut rust_sa = vec![1, 2, 3, 4];
13989 let mut c_sa = rust_sa.clone();
13990 flip_suffix_markers_omp(&mut rust_sa, 3, 2);
13991 unsafe {
13992 probe_libsais16_flip_suffix_markers_omp(c_sa.as_mut_ptr(), 3, 2);
13993 }
13994 assert_eq!(rust_sa, c_sa);
13995
13996 let t = vec![0, 1, 2, 1, 0, 1, 2, 1, 0];
13997 let n = t.len() as SaSint;
13998 let k = 3;
13999 let mut wrapped_sa = vec![0; t.len()];
14000 let mut wrapped_buckets = vec![0; k as usize];
14001 let mut wrapped_state = alloc_thread_state(1).unwrap();
14002 induce_partial_order_32s_1k_omp(
14003 &t,
14004 &mut wrapped_sa,
14005 n,
14006 k,
14007 &mut wrapped_buckets,
14008 1,
14009 &mut wrapped_state,
14010 );
14011
14012 let mut manual_sa = vec![0; t.len()];
14013 let mut manual_buckets = vec![0; k as usize];
14014 let mut manual_state = alloc_thread_state(1).unwrap();
14015 count_suffixes_32s(&t, n, k, &mut manual_buckets);
14016 initialize_buckets_start_32s_1k(k, &mut manual_buckets);
14017 partial_sorting_scan_left_to_right_32s_1k_omp(
14018 &t,
14019 &mut manual_sa,
14020 n,
14021 &mut manual_buckets,
14022 1,
14023 &mut manual_state,
14024 );
14025 count_suffixes_32s(&t, n, k, &mut manual_buckets);
14026 initialize_buckets_end_32s_1k(k, &mut manual_buckets);
14027 partial_sorting_scan_right_to_left_32s_1k_omp(
14028 &t,
14029 &mut manual_sa,
14030 n,
14031 &mut manual_buckets,
14032 1,
14033 &mut manual_state,
14034 );
14035 partial_sorting_gather_lms_suffixes_32s_1k_omp(&mut manual_sa, n, 1, &mut manual_state);
14036
14037 assert_eq!(wrapped_sa, manual_sa);
14038 assert_eq!(wrapped_buckets, manual_buckets);
14039 }
14040
14041 #[test]
14042 fn libsais16_induce_partial_order_32s_wrappers_match_c() {
14043 let t = make_main_32s_stress_text(128, 24);
14044 let n = t.len() as SaSint;
14045 let k = 24;
14046 let threads = 1;
14047
14048 let mut rust_sa = vec![0; t.len()];
14049 let mut rust_buckets = vec![0; 6 * k as usize];
14050 let mut rust_state = alloc_thread_state(threads).unwrap();
14051 let m = count_and_gather_lms_suffixes_32s_4k_omp(
14052 &t,
14053 &mut rust_sa,
14054 n,
14055 k,
14056 &mut rust_buckets,
14057 1,
14058 threads,
14059 &mut rust_state,
14060 );
14061 assert!(m > 1);
14062 rust_sa[..(n - m) as usize].fill(0);
14063 let first_lms_suffix = rust_sa[(n - m) as usize];
14064 let left_suffixes_count = initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(
14065 &t,
14066 k,
14067 &mut rust_buckets,
14068 first_lms_suffix,
14069 );
14070 let (_, induction_bucket) = rust_buckets.split_at_mut(4 * k as usize);
14071 radix_sort_lms_suffixes_32s_6k_omp(&t, &mut rust_sa, n, m, induction_bucket, threads);
14072 radix_sort_set_markers_32s_6k_omp(&mut rust_sa, k, induction_bucket, threads);
14073 initialize_buckets_for_partial_sorting_32s_6k(
14074 &t,
14075 k,
14076 &mut rust_buckets,
14077 first_lms_suffix,
14078 left_suffixes_count,
14079 );
14080 let mut c_sa = rust_sa.clone();
14081 let mut c_buckets = rust_buckets.clone();
14082 induce_partial_order_32s_6k_omp(
14083 &t,
14084 &mut rust_sa,
14085 n,
14086 k,
14087 &mut rust_buckets,
14088 first_lms_suffix,
14089 left_suffixes_count,
14090 threads,
14091 &mut rust_state,
14092 );
14093 unsafe {
14094 probe_libsais16_induce_partial_order_32s_6k_omp(
14095 t.as_ptr(),
14096 c_sa.as_mut_ptr(),
14097 n,
14098 k,
14099 c_buckets.as_mut_ptr(),
14100 first_lms_suffix,
14101 left_suffixes_count,
14102 threads,
14103 );
14104 }
14105 assert_eq!(rust_sa, c_sa);
14106 assert_eq!(rust_buckets, c_buckets);
14107
14108 let mut rust_sa = vec![0; t.len()];
14109 let mut rust_buckets = vec![0; 4 * k as usize];
14110 let mut rust_state = alloc_thread_state(threads).unwrap();
14111 let m = count_and_gather_lms_suffixes_32s_2k_omp(
14112 &t,
14113 &mut rust_sa,
14114 n,
14115 k,
14116 &mut rust_buckets,
14117 1,
14118 threads,
14119 &mut rust_state,
14120 );
14121 assert!(m > 1);
14122 let first_lms_suffix = rust_sa[(n - m) as usize];
14123 initialize_buckets_for_radix_and_partial_sorting_32s_4k(
14124 &t,
14125 k,
14126 &mut rust_buckets,
14127 first_lms_suffix,
14128 );
14129 let (_, induction_bucket) = rust_buckets.split_at_mut(1);
14130 radix_sort_lms_suffixes_32s_2k_omp(&t, &mut rust_sa, n, m, induction_bucket, threads);
14131 radix_sort_set_markers_32s_4k_omp(&mut rust_sa, k, induction_bucket, threads);
14132 place_lms_suffixes_interval_32s_4k(&mut rust_sa, n, k, m - 1, &rust_buckets);
14133 let mut c_sa = rust_sa.clone();
14134 let mut c_buckets = rust_buckets.clone();
14135 induce_partial_order_32s_4k_omp(
14136 &t,
14137 &mut rust_sa,
14138 n,
14139 k,
14140 &mut rust_buckets,
14141 threads,
14142 &mut rust_state,
14143 );
14144 unsafe {
14145 probe_libsais16_induce_partial_order_32s_4k_omp(
14146 t.as_ptr(),
14147 c_sa.as_mut_ptr(),
14148 n,
14149 k,
14150 c_buckets.as_mut_ptr(),
14151 threads,
14152 );
14153 }
14154 assert_eq!(rust_sa, c_sa);
14155 assert_eq!(rust_buckets, c_buckets);
14156
14157 let mut rust_sa = vec![0; t.len()];
14158 let mut rust_buckets = vec![0; 2 * k as usize];
14159 let mut rust_state = alloc_thread_state(threads).unwrap();
14160 let m = count_and_gather_lms_suffixes_32s_2k_omp(
14161 &t,
14162 &mut rust_sa,
14163 n,
14164 k,
14165 &mut rust_buckets,
14166 1,
14167 threads,
14168 &mut rust_state,
14169 );
14170 assert!(m > 1);
14171 let first_lms_suffix = rust_sa[(n - m) as usize];
14172 initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(
14173 &t,
14174 k,
14175 &mut rust_buckets,
14176 first_lms_suffix,
14177 );
14178 let (_, induction_bucket) = rust_buckets.split_at_mut(1);
14179 radix_sort_lms_suffixes_32s_2k_omp(&t, &mut rust_sa, n, m, induction_bucket, threads);
14180 place_lms_suffixes_interval_32s_2k(&mut rust_sa, n, k, m - 1, &rust_buckets);
14181 initialize_buckets_start_and_end_32s_2k(k, &mut rust_buckets);
14182 let mut c_sa = rust_sa.clone();
14183 let mut c_buckets = rust_buckets.clone();
14184 induce_partial_order_32s_2k_omp(
14185 &t,
14186 &mut rust_sa,
14187 n,
14188 k,
14189 &mut rust_buckets,
14190 threads,
14191 &mut rust_state,
14192 );
14193 unsafe {
14194 probe_libsais16_induce_partial_order_32s_2k_omp(
14195 t.as_ptr(),
14196 c_sa.as_mut_ptr(),
14197 n,
14198 k,
14199 c_buckets.as_mut_ptr(),
14200 threads,
14201 );
14202 }
14203 assert_eq!(rust_sa, c_sa);
14204 assert_eq!(rust_buckets, c_buckets);
14205
14206 let mut rust_sa = vec![0; t.len()];
14207 let mut rust_buckets = vec![0; k as usize];
14208 let mut rust_state = alloc_thread_state(threads).unwrap();
14209 count_suffixes_32s(&t, n, k, &mut rust_buckets);
14210 initialize_buckets_end_32s_1k(k, &mut rust_buckets);
14211 let m = radix_sort_lms_suffixes_32s_1k(&t, &mut rust_sa, n, &mut rust_buckets);
14212 assert!(m > 1);
14213 let mut c_sa = rust_sa.clone();
14214 let mut c_buckets = rust_buckets.clone();
14215 induce_partial_order_32s_1k_omp(
14216 &t,
14217 &mut rust_sa,
14218 n,
14219 k,
14220 &mut rust_buckets,
14221 threads,
14222 &mut rust_state,
14223 );
14224 unsafe {
14225 probe_libsais16_induce_partial_order_32s_1k_omp(
14226 t.as_ptr(),
14227 c_sa.as_mut_ptr(),
14228 n,
14229 k,
14230 c_buckets.as_mut_ptr(),
14231 threads,
14232 );
14233 }
14234 assert_eq!(rust_sa, c_sa);
14235 assert_eq!(rust_buckets, c_buckets);
14236 }
14237
14238 #[test]
14239 fn libsais16_induce_partial_order_16u_omp_matches_c() {
14240 let text = [3, 1, 2, 1, 0, 4, 1, 0];
14241 let n = text.len() as SaSint;
14242 let flags = 0;
14243 let threads = 1;
14244 let mut rust_sa = vec![0; text.len()];
14245 let mut rust_buckets = vec![0; 8 * ALPHABET_SIZE];
14246
14247 let m = count_and_gather_lms_suffixes_16u_omp(
14248 &text,
14249 &mut rust_sa,
14250 n,
14251 &mut rust_buckets[..4 * ALPHABET_SIZE],
14252 threads,
14253 &mut [],
14254 );
14255 let k = initialize_buckets_start_and_end_16u(&mut rust_buckets, None);
14256 assert!(m > 0);
14257 let first_lms_suffix = rust_sa[(n - m) as usize];
14258 let left_suffixes_count = initialize_buckets_for_lms_suffixes_radix_sort_16u(
14259 &text,
14260 &mut rust_buckets,
14261 first_lms_suffix,
14262 );
14263 radix_sort_lms_suffixes_16u_omp(
14264 &text,
14265 &mut rust_sa,
14266 n,
14267 m,
14268 flags,
14269 &mut rust_buckets,
14270 threads,
14271 &mut [],
14272 );
14273 initialize_buckets_for_partial_sorting_16u(
14274 &text,
14275 &mut rust_buckets,
14276 first_lms_suffix,
14277 left_suffixes_count,
14278 );
14279
14280 let mut c_sa = rust_sa.clone();
14281 let mut c_buckets = rust_buckets.clone();
14282 induce_partial_order_16u_omp(
14283 &text,
14284 &mut rust_sa,
14285 n,
14286 k,
14287 flags,
14288 &mut rust_buckets,
14289 first_lms_suffix,
14290 left_suffixes_count,
14291 threads,
14292 );
14293 unsafe {
14294 probe_libsais16_induce_partial_order_16u_omp(
14295 text.as_ptr(),
14296 c_sa.as_mut_ptr(),
14297 n,
14298 k,
14299 flags,
14300 c_buckets.as_mut_ptr(),
14301 first_lms_suffix,
14302 left_suffixes_count,
14303 threads,
14304 );
14305 }
14306
14307 assert_eq!(rust_sa, c_sa);
14308 assert_eq!(rust_buckets, c_buckets);
14309 }
14310
14311 fn final_order_32s_fixture() -> (Vec<SaSint>, Vec<SaSint>) {
14312 (
14313 vec![0, 1, 2, 1, 0, 1, 2, 1, 0],
14314 vec![1, 0, 2, 0, 0, 0, 0, 0, 0],
14315 )
14316 }
14317
14318 fn seed_final_order_bucket_sections(buckets: &mut [SaSint], k: usize, branch_k: usize) {
14319 let left = [0, 1, 3];
14320 let right = [1, 2, 3];
14321 let left_section = match branch_k {
14322 6 => 4 * k,
14323 4 => 2 * k,
14324 2 => k,
14325 _ => 0,
14326 };
14327 let right_section = match branch_k {
14328 6 => 5 * k,
14329 4 => 3 * k,
14330 2 => 0,
14331 _ => 0,
14332 };
14333 buckets[left_section..left_section + k].copy_from_slice(&left);
14334 buckets[right_section..right_section + k].copy_from_slice(&right);
14335 }
14336
14337 #[test]
14338 fn libsais16_induce_final_order_32s_wrappers_match_c() {
14339 let (t, sa) = final_order_32s_fixture();
14340 let n = t.len() as SaSint;
14341 let k = 3;
14342 let threads = 1;
14343
14344 let mut rust_sa = sa.clone();
14345 let mut rust_buckets = vec![0; 6 * k as usize];
14346 seed_final_order_bucket_sections(&mut rust_buckets, k as usize, 6);
14347 let mut c_sa = rust_sa.clone();
14348 let mut c_buckets = rust_buckets.clone();
14349 let mut rust_state = alloc_thread_state(threads).unwrap();
14350 induce_final_order_32s_6k(
14351 &t,
14352 &mut rust_sa,
14353 n,
14354 k,
14355 &mut rust_buckets,
14356 threads,
14357 &mut rust_state,
14358 );
14359 unsafe {
14360 probe_libsais16_induce_final_order_32s_6k(
14361 t.as_ptr(),
14362 c_sa.as_mut_ptr(),
14363 n,
14364 k,
14365 c_buckets.as_mut_ptr(),
14366 threads,
14367 );
14368 }
14369 assert_eq!(rust_sa, c_sa);
14370 assert_eq!(rust_buckets, c_buckets);
14371
14372 let mut rust_sa = sa.clone();
14373 let mut rust_buckets = vec![0; 4 * k as usize];
14374 seed_final_order_bucket_sections(&mut rust_buckets, k as usize, 4);
14375 let mut c_sa = rust_sa.clone();
14376 let mut c_buckets = rust_buckets.clone();
14377 let mut rust_state = alloc_thread_state(threads).unwrap();
14378 induce_final_order_32s_4k(
14379 &t,
14380 &mut rust_sa,
14381 n,
14382 k,
14383 &mut rust_buckets,
14384 threads,
14385 &mut rust_state,
14386 );
14387 unsafe {
14388 probe_libsais16_induce_final_order_32s_4k(
14389 t.as_ptr(),
14390 c_sa.as_mut_ptr(),
14391 n,
14392 k,
14393 c_buckets.as_mut_ptr(),
14394 threads,
14395 );
14396 }
14397 assert_eq!(rust_sa, c_sa);
14398 assert_eq!(rust_buckets, c_buckets);
14399
14400 let mut rust_sa = sa.clone();
14401 let mut rust_buckets = vec![0; 2 * k as usize];
14402 seed_final_order_bucket_sections(&mut rust_buckets, k as usize, 2);
14403 let mut c_sa = rust_sa.clone();
14404 let mut c_buckets = rust_buckets.clone();
14405 let mut rust_state = alloc_thread_state(threads).unwrap();
14406 induce_final_order_32s_2k(
14407 &t,
14408 &mut rust_sa,
14409 n,
14410 k,
14411 &mut rust_buckets,
14412 threads,
14413 &mut rust_state,
14414 );
14415 unsafe {
14416 probe_libsais16_induce_final_order_32s_2k(
14417 t.as_ptr(),
14418 c_sa.as_mut_ptr(),
14419 n,
14420 k,
14421 c_buckets.as_mut_ptr(),
14422 threads,
14423 );
14424 }
14425 assert_eq!(rust_sa, c_sa);
14426 assert_eq!(rust_buckets, c_buckets);
14427
14428 let mut rust_sa = sa;
14429 let mut rust_buckets = vec![0; k as usize];
14430 let mut c_sa = rust_sa.clone();
14431 let mut c_buckets = rust_buckets.clone();
14432 let mut rust_state = alloc_thread_state(threads).unwrap();
14433 induce_final_order_32s_1k(
14434 &t,
14435 &mut rust_sa,
14436 n,
14437 k,
14438 &mut rust_buckets,
14439 threads,
14440 &mut rust_state,
14441 );
14442 unsafe {
14443 probe_libsais16_induce_final_order_32s_1k(
14444 t.as_ptr(),
14445 c_sa.as_mut_ptr(),
14446 n,
14447 k,
14448 c_buckets.as_mut_ptr(),
14449 threads,
14450 );
14451 }
14452 assert_eq!(rust_sa, c_sa);
14453 assert_eq!(rust_buckets, c_buckets);
14454 }
14455
14456 #[test]
14457 fn libsais16_induce_final_order_16u_omp_matches_manual_sequence() {
14458 let (text, mut wrapped_sa, induction_bucket) = final_scan_fixture();
14459 let mut wrapped_buckets = final_order_buckets(&induction_bucket);
14460 let mut c_sa = wrapped_sa.clone();
14461 let mut c_buckets = wrapped_buckets.clone();
14462 let mut wrapped_state = alloc_thread_state(1).unwrap();
14463 let wrapped_index = induce_final_order_16u_omp(
14464 &text,
14465 &mut wrapped_sa,
14466 text.len() as SaSint,
14467 8,
14468 0,
14469 0,
14470 None,
14471 &mut wrapped_buckets,
14472 1,
14473 &mut wrapped_state,
14474 );
14475 let c_index = unsafe {
14476 probe_libsais16_induce_final_order_16u_omp(
14477 text.as_ptr(),
14478 c_sa.as_mut_ptr(),
14479 text.len() as SaSint,
14480 8,
14481 0,
14482 0,
14483 std::ptr::null_mut(),
14484 c_buckets.as_mut_ptr(),
14485 1,
14486 )
14487 };
14488
14489 let (text, mut manual_sa, induction_bucket) = final_scan_fixture();
14490 let mut manual_buckets = final_order_buckets(&induction_bucket);
14491 {
14492 let (left_buckets, right_tail) = manual_buckets.split_at_mut(7 * ALPHABET_SIZE);
14493 final_sorting_scan_left_to_right_16u_omp(
14494 &text,
14495 &mut manual_sa,
14496 text.len() as SaSint,
14497 8,
14498 &mut left_buckets[6 * ALPHABET_SIZE..7 * ALPHABET_SIZE],
14499 1,
14500 );
14501 final_sorting_scan_right_to_left_16u_omp(
14502 &text,
14503 &mut manual_sa,
14504 0,
14505 text.len() as SaSint,
14506 8,
14507 &mut right_tail[..ALPHABET_SIZE],
14508 1,
14509 );
14510 }
14511
14512 assert_eq!(wrapped_index, 0);
14513 assert_eq!(wrapped_index, c_index);
14514 assert_eq!(wrapped_sa, manual_sa);
14515 assert_eq!(wrapped_sa, c_sa);
14516 assert_eq!(wrapped_buckets, manual_buckets);
14517 assert_eq!(wrapped_buckets, c_buckets);
14518
14519 let (text, mut wrapped_sa, induction_bucket) = final_scan_fixture();
14520 let mut wrapped_buckets = final_order_buckets(&induction_bucket);
14521 let mut c_sa = wrapped_sa.clone();
14522 let mut c_buckets = wrapped_buckets.clone();
14523 let mut wrapped_state = alloc_thread_state(1).unwrap();
14524 let wrapped_index = induce_final_order_16u_omp(
14525 &text,
14526 &mut wrapped_sa,
14527 text.len() as SaSint,
14528 8,
14529 LIBSAIS_FLAGS_BWT,
14530 0,
14531 None,
14532 &mut wrapped_buckets,
14533 1,
14534 &mut wrapped_state,
14535 );
14536 let c_index = unsafe {
14537 probe_libsais16_induce_final_order_16u_omp(
14538 text.as_ptr(),
14539 c_sa.as_mut_ptr(),
14540 text.len() as SaSint,
14541 8,
14542 LIBSAIS_FLAGS_BWT,
14543 0,
14544 std::ptr::null_mut(),
14545 c_buckets.as_mut_ptr(),
14546 1,
14547 )
14548 };
14549
14550 let (text, mut manual_sa, induction_bucket) = final_scan_fixture();
14551 let mut manual_buckets = final_order_buckets(&induction_bucket);
14552 let manual_index = {
14553 let (left_buckets, right_tail) = manual_buckets.split_at_mut(7 * ALPHABET_SIZE);
14554 final_bwt_scan_left_to_right_16u_omp(
14555 &text,
14556 &mut manual_sa,
14557 text.len() as SaSint,
14558 8,
14559 &mut left_buckets[6 * ALPHABET_SIZE..7 * ALPHABET_SIZE],
14560 1,
14561 );
14562 final_bwt_scan_right_to_left_16u_omp(
14563 &text,
14564 &mut manual_sa,
14565 text.len() as SaSint,
14566 8,
14567 &mut right_tail[..ALPHABET_SIZE],
14568 1,
14569 )
14570 };
14571
14572 assert_eq!(wrapped_index, manual_index);
14573 assert_eq!(wrapped_index, c_index);
14574 assert_eq!(wrapped_sa, manual_sa);
14575 assert_eq!(wrapped_sa, c_sa);
14576 assert_eq!(wrapped_buckets, manual_buckets);
14577 assert_eq!(wrapped_buckets, c_buckets);
14578
14579 let (text, mut wrapped_sa, induction_bucket) = final_scan_fixture();
14580 let mut wrapped_buckets = final_order_buckets(&induction_bucket);
14581 let mut c_sa = wrapped_sa.clone();
14582 let mut c_buckets = wrapped_buckets.clone();
14583 let mut wrapped_state = alloc_thread_state(1).unwrap();
14584 let mut wrapped_i = vec![-1; 8];
14585 let mut c_i = wrapped_i.clone();
14586 let wrapped_index = induce_final_order_16u_omp(
14587 &text,
14588 &mut wrapped_sa,
14589 text.len() as SaSint,
14590 8,
14591 LIBSAIS_FLAGS_BWT,
14592 2,
14593 Some(&mut wrapped_i),
14594 &mut wrapped_buckets,
14595 1,
14596 &mut wrapped_state,
14597 );
14598 let c_index = unsafe {
14599 probe_libsais16_induce_final_order_16u_omp(
14600 text.as_ptr(),
14601 c_sa.as_mut_ptr(),
14602 text.len() as SaSint,
14603 8,
14604 LIBSAIS_FLAGS_BWT,
14605 2,
14606 c_i.as_mut_ptr(),
14607 c_buckets.as_mut_ptr(),
14608 1,
14609 )
14610 };
14611
14612 let (text, mut manual_sa, induction_bucket) = final_scan_fixture();
14613 let mut manual_buckets = final_order_buckets(&induction_bucket);
14614 let mut manual_i = vec![-1; 8];
14615 {
14616 let (left_buckets, right_tail) = manual_buckets.split_at_mut(7 * ALPHABET_SIZE);
14617 final_bwt_aux_scan_left_to_right_16u_omp(
14618 &text,
14619 &mut manual_sa,
14620 text.len() as SaSint,
14621 8,
14622 1,
14623 &mut manual_i,
14624 &mut left_buckets[6 * ALPHABET_SIZE..7 * ALPHABET_SIZE],
14625 1,
14626 );
14627 final_bwt_aux_scan_right_to_left_16u_omp(
14628 &text,
14629 &mut manual_sa,
14630 text.len() as SaSint,
14631 8,
14632 1,
14633 &mut manual_i,
14634 &mut right_tail[..ALPHABET_SIZE],
14635 1,
14636 );
14637 }
14638
14639 assert_eq!(wrapped_index, 0);
14640 assert_eq!(wrapped_index, c_index);
14641 assert_eq!(wrapped_sa, manual_sa);
14642 assert_eq!(wrapped_sa, c_sa);
14643 assert_eq!(wrapped_buckets, manual_buckets);
14644 assert_eq!(wrapped_buckets, c_buckets);
14645 assert_eq!(wrapped_i, manual_i);
14646 assert_eq!(wrapped_i, c_i);
14647 }
14648
14649 #[test]
14650 fn libsais16_main_16u_matches_public_c_suffix_array_paths() {
14651 let text = [3, 1, 4, 1, 5, 9, 0, 2];
14652 let n = text.len() as SaSint;
14653 let fs = 32;
14654 let mut rust_sa = vec![0; text.len() + fs as usize];
14655 let mut rust_buckets = vec![0; 8 * ALPHABET_SIZE];
14656 let mut rust_freq = vec![0; ALPHABET_SIZE];
14657 let mut rust_state = alloc_thread_state(1).unwrap();
14658 let rust_index = main_16u(
14659 &text,
14660 &mut rust_sa,
14661 n,
14662 &mut rust_buckets,
14663 0,
14664 0,
14665 None,
14666 fs,
14667 Some(&mut rust_freq),
14668 1,
14669 &mut rust_state,
14670 );
14671
14672 let mut c_sa = vec![0; text.len() + fs as usize];
14673 let mut c_freq = vec![0; ALPHABET_SIZE];
14674 let c_index = unsafe {
14675 probe_public_libsais16_freq(
14676 text.as_ptr(),
14677 c_sa.as_mut_ptr(),
14678 n,
14679 fs,
14680 c_freq.as_mut_ptr(),
14681 )
14682 };
14683
14684 assert_eq!(rust_index, c_index);
14685 assert_eq!(&rust_sa[..text.len()], &c_sa[..text.len()]);
14686 assert_eq!(rust_freq, c_freq);
14687
14688 let text = [2, 1, 0, 2, 0];
14689 let n = text.len() as SaSint;
14690 let fs = 24;
14691 let mut rust_sa = vec![0; text.len() + fs as usize];
14692 let mut rust_buckets = vec![0; 8 * ALPHABET_SIZE];
14693 let mut rust_freq = vec![0; ALPHABET_SIZE];
14694 let mut rust_state = alloc_thread_state(1).unwrap();
14695 let rust_index = main_16u(
14696 &text,
14697 &mut rust_sa,
14698 n,
14699 &mut rust_buckets,
14700 LIBSAIS_FLAGS_GSA,
14701 0,
14702 None,
14703 fs,
14704 Some(&mut rust_freq),
14705 1,
14706 &mut rust_state,
14707 );
14708
14709 let mut c_sa = vec![0; text.len() + fs as usize];
14710 let mut c_freq = vec![0; ALPHABET_SIZE];
14711 let c_index = unsafe {
14712 probe_public_libsais16_gsa_freq(
14713 text.as_ptr(),
14714 c_sa.as_mut_ptr(),
14715 n,
14716 fs,
14717 c_freq.as_mut_ptr(),
14718 )
14719 };
14720
14721 assert_eq!(rust_index, c_index);
14722 assert_eq!(&rust_sa[..text.len()], &c_sa[..text.len()]);
14723 assert_eq!(rust_freq, c_freq);
14724 }
14725
14726 fn make_main_32s_stress_text(len: usize, alphabet: SaSint) -> Vec<SaSint> {
14727 let mut state: u32 = 0x1357_9bdf;
14728 let mut t = Vec::with_capacity(len + 1);
14729
14730 for i in 0..len {
14731 state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223);
14732 let mut value = ((state >> 16) % (alphabet as u32 - 1)) as SaSint + 1;
14733 if i % 17 < 8 {
14734 value = ((i / 17) as SaSint % 11) + 1;
14735 }
14736 if i % 29 < 10 {
14737 value = (((i / 29) as SaSint * 3) % 19) + 1;
14738 }
14739 if i % 64 >= 48 {
14740 value = t[i - 48];
14741 }
14742 t.push(value);
14743 }
14744
14745 t.push(0);
14746 t
14747 }
14748
14749 fn make_recursive_main_32s_text(repeats: usize) -> Vec<SaSint> {
14750 let motif = [9, 4, 9, 2, 9, 4, 9, 1];
14751 let mut t = Vec::with_capacity(repeats * motif.len() + 1);
14752 for _ in 0..repeats {
14753 t.extend_from_slice(&motif);
14754 }
14755 t.push(0);
14756 t
14757 }
14758
14759 fn assert_main_32s_entry_matches_c(mut t: Vec<SaSint>, k: SaSint, fs: SaSint) {
14760 let n = t.len() as SaSint;
14761 let threads = 1;
14762 let mut sa = vec![0; t.len() + fs as usize];
14763 let initial_t = t.clone();
14764 let initial_sa = sa.clone();
14765
14766 let c_result = unsafe {
14767 probe_libsais16_main_32s_entry(t.as_mut_ptr(), sa.as_mut_ptr(), n, k, fs, threads)
14768 };
14769 let c_t = t.clone();
14770 let c_sa = sa.clone();
14771
14772 t.copy_from_slice(&initial_t);
14773 sa.copy_from_slice(&initial_sa);
14774
14775 let mut thread_state = alloc_thread_state(threads).unwrap();
14776 let rust_result = main_32s_entry(
14777 t.as_mut_ptr(),
14778 &mut sa,
14779 n,
14780 k,
14781 fs,
14782 threads,
14783 &mut thread_state,
14784 );
14785
14786 assert_eq!(rust_result, c_result);
14787 assert_eq!(t, c_t);
14788 assert_eq!(sa, c_sa);
14789 }
14790
14791 #[test]
14792 fn libsais16_main_32s_entry_matches_c_for_local_32s_paths() {
14793 assert_main_32s_entry_matches_c(make_main_32s_stress_text(1024, 300), 300, 2048);
14794 assert_main_32s_entry_matches_c(make_main_32s_stress_text(1024, 400), 400, 2048);
14795 assert_main_32s_entry_matches_c(make_main_32s_stress_text(1024, 700), 700, 2048);
14796 assert_main_32s_entry_matches_c(make_main_32s_stress_text(1024, 1501), 1501, 2048);
14797 assert_main_32s_entry_matches_c(make_recursive_main_32s_text(24), 300, 0);
14798 assert_main_32s_entry_matches_c(make_recursive_main_32s_text(24), 1501, 0);
14799 }
14800
14801 #[test]
14802 fn libsais16_final_bwt_scan_left_to_right_16u_matches_c() {
14803 let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
14804 let mut c_sa = rust_sa.clone();
14805 let mut c_bucket = rust_bucket.clone();
14806
14807 final_bwt_scan_left_to_right_16u(&text, &mut rust_sa, &mut rust_bucket, 0, 6);
14808 unsafe {
14809 probe_libsais16_final_bwt_scan_left_to_right_16u(
14810 text.as_ptr(),
14811 c_sa.as_mut_ptr(),
14812 c_bucket.as_mut_ptr(),
14813 0,
14814 6,
14815 );
14816 }
14817
14818 assert_eq!(rust_sa, c_sa);
14819 assert_eq!(rust_bucket, c_bucket);
14820 }
14821
14822 #[test]
14823 fn libsais16_final_bwt_scan_right_to_left_16u_matches_c() {
14824 let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
14825 let mut c_sa = rust_sa.clone();
14826 let mut c_bucket = rust_bucket.clone();
14827
14828 let rust_index =
14829 final_bwt_scan_right_to_left_16u(&text, &mut rust_sa, &mut rust_bucket, 0, 6);
14830 let c_index = unsafe {
14831 probe_libsais16_final_bwt_scan_right_to_left_16u(
14832 text.as_ptr(),
14833 c_sa.as_mut_ptr(),
14834 c_bucket.as_mut_ptr(),
14835 0,
14836 6,
14837 )
14838 };
14839
14840 assert_eq!(rust_index, c_index);
14841 assert_eq!(rust_sa, c_sa);
14842 assert_eq!(rust_bucket, c_bucket);
14843 }
14844
14845 #[test]
14846 fn libsais16_final_bwt_aux_scan_left_to_right_16u_matches_c() {
14847 let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
14848 let mut c_sa = rust_sa.clone();
14849 let mut c_bucket = rust_bucket.clone();
14850 let mut rust_i = vec![-1; 8];
14851 let mut c_i = rust_i.clone();
14852
14853 final_bwt_aux_scan_left_to_right_16u(
14854 &text,
14855 &mut rust_sa,
14856 1,
14857 &mut rust_i,
14858 &mut rust_bucket,
14859 0,
14860 6,
14861 );
14862 unsafe {
14863 probe_libsais16_final_bwt_aux_scan_left_to_right_16u(
14864 text.as_ptr(),
14865 c_sa.as_mut_ptr(),
14866 1,
14867 c_i.as_mut_ptr(),
14868 c_bucket.as_mut_ptr(),
14869 0,
14870 6,
14871 );
14872 }
14873
14874 assert_eq!(rust_sa, c_sa);
14875 assert_eq!(rust_bucket, c_bucket);
14876 assert_eq!(rust_i, c_i);
14877 }
14878
14879 #[test]
14880 fn libsais16_final_bwt_aux_scan_right_to_left_16u_matches_c() {
14881 let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
14882 let mut c_sa = rust_sa.clone();
14883 let mut c_bucket = rust_bucket.clone();
14884 let mut rust_i = vec![-1; 8];
14885 let mut c_i = rust_i.clone();
14886
14887 final_bwt_aux_scan_right_to_left_16u(
14888 &text,
14889 &mut rust_sa,
14890 1,
14891 &mut rust_i,
14892 &mut rust_bucket,
14893 0,
14894 6,
14895 );
14896 unsafe {
14897 probe_libsais16_final_bwt_aux_scan_right_to_left_16u(
14898 text.as_ptr(),
14899 c_sa.as_mut_ptr(),
14900 1,
14901 c_i.as_mut_ptr(),
14902 c_bucket.as_mut_ptr(),
14903 0,
14904 6,
14905 );
14906 }
14907
14908 assert_eq!(rust_sa, c_sa);
14909 assert_eq!(rust_bucket, c_bucket);
14910 assert_eq!(rust_i, c_i);
14911 }
14912
14913 #[test]
14914 fn libsais16_renumber_lms_suffixes_16u_matches_c() {
14915 let m = 6;
14916 let mut rust_sa = vec![0; 20];
14917 rust_sa[..m].copy_from_slice(&[2, 4 | SAINT_MIN, 6, 8 | SAINT_MIN, 10, 12 | SAINT_MIN]);
14918 let mut c_sa = rust_sa.clone();
14919
14920 let rust_name = renumber_lms_suffixes_16u(&mut rust_sa, m as SaSint, 5, 0, m as SaSint);
14921 let c_name = unsafe {
14922 probe_libsais16_renumber_lms_suffixes_16u(
14923 c_sa.as_mut_ptr(),
14924 m as SaSint,
14925 5,
14926 0,
14927 m as SaSint,
14928 )
14929 };
14930
14931 assert_eq!(rust_name, c_name);
14932 assert_eq!(rust_sa, c_sa);
14933 }
14934
14935 fn lms_interval_fixture() -> (Vec<SaSint>, Vec<SaSint>) {
14936 let mut sa = vec![-7; 16];
14937 sa[4..8].copy_from_slice(&[41, 42, 61, 62]);
14938
14939 let mut buckets = vec![0; 8 * ALPHABET_SIZE];
14940 buckets[buckets_index2(2, 1)] = 0;
14941 buckets[buckets_index2(3, 1)] = 2;
14942 buckets[buckets_index2(4, 1)] = 2;
14943 buckets[buckets_index2(5, 1)] = 2;
14944 buckets[buckets_index2(6, 1)] = 4;
14945 buckets[buckets_index2(7, 1)] = 4;
14946 buckets[7 * ALPHABET_SIZE + 2] = 6;
14947 buckets[7 * ALPHABET_SIZE + 5] = 12;
14948
14949 (sa, buckets)
14950 }
14951
14952 #[test]
14953 fn libsais16_place_lms_suffixes_interval_16u_matches_c() {
14954 for flags in [0, LIBSAIS_FLAGS_GSA] {
14955 let (mut rust_sa, mut rust_buckets) = lms_interval_fixture();
14956 let mut c_sa = rust_sa.clone();
14957 let mut c_buckets = rust_buckets.clone();
14958
14959 place_lms_suffixes_interval_16u(&mut rust_sa, 16, 8, flags, &mut rust_buckets);
14960 unsafe {
14961 probe_libsais16_place_lms_suffixes_interval_16u(
14962 c_sa.as_mut_ptr(),
14963 16,
14964 8,
14965 flags,
14966 c_buckets.as_mut_ptr(),
14967 );
14968 }
14969
14970 assert_eq!(rust_sa, c_sa);
14971 assert_eq!(rust_buckets, c_buckets);
14972 }
14973 }
14974
14975 #[test]
14976 fn libsais16_bwt_copy_16u_matches_c() {
14977 let mut a = vec![0, 1, 65535, 65536, -1, -2, 70000, 17, 131071, -65536];
14978 let mut rust_u = vec![999; a.len()];
14979 let mut c_u = rust_u.clone();
14980
14981 bwt_copy_16u(&mut rust_u, &a, a.len() as SaSint);
14982 unsafe {
14983 probe_libsais16_bwt_copy_16u(c_u.as_mut_ptr(), a.as_mut_ptr(), a.len() as SaSint);
14984 }
14985
14986 assert_eq!(rust_u, c_u);
14987 }
14988
14989 #[test]
14990 fn libsais16_early_omp_wrappers_match_c() {
14991 let text = [3, 1, 2, 1, 0, 4, 1, 0];
14992 let n = text.len() as SaSint;
14993
14994 let mut rust_sa = vec![-99; text.len()];
14995 let mut c_sa = rust_sa.clone();
14996 gather_lms_suffixes_16u_omp(&text, &mut rust_sa, n, 1, &mut []);
14997 unsafe {
14998 probe_libsais16_gather_lms_suffixes_16u_omp(text.as_ptr(), c_sa.as_mut_ptr(), n, 1);
14999 }
15000 assert_eq!(rust_sa, c_sa);
15001
15002 let mut rust_sa = vec![-99; text.len()];
15003 let mut c_sa = rust_sa.clone();
15004 let mut rust_buckets = vec![-1; 4 * ALPHABET_SIZE];
15005 let mut c_buckets = rust_buckets.clone();
15006 let rust_m = count_and_gather_lms_suffixes_16u_omp(
15007 &text,
15008 &mut rust_sa,
15009 n,
15010 &mut rust_buckets,
15011 1,
15012 &mut [],
15013 );
15014 let c_m = unsafe {
15015 probe_libsais16_count_and_gather_lms_suffixes_16u_omp(
15016 text.as_ptr(),
15017 c_sa.as_mut_ptr(),
15018 n,
15019 c_buckets.as_mut_ptr(),
15020 1,
15021 )
15022 };
15023 assert_eq!(rust_m, c_m);
15024 assert_eq!(rust_sa, c_sa);
15025 assert_eq!(rust_buckets, c_buckets);
15026
15027 let mut rust_buckets = vec![0; 8 * ALPHABET_SIZE];
15028 let m = count_and_gather_lms_suffixes_16u(
15029 &text,
15030 &mut rust_sa,
15031 n,
15032 &mut rust_buckets[..4 * ALPHABET_SIZE],
15033 0,
15034 n,
15035 );
15036 initialize_buckets_start_and_end_16u(&mut rust_buckets, None);
15037 let first_lms_suffix = rust_sa[(n - m) as usize];
15038 initialize_buckets_for_lms_suffixes_radix_sort_16u(
15039 &text,
15040 &mut rust_buckets,
15041 first_lms_suffix,
15042 );
15043 let mut c_sa = rust_sa.clone();
15044 let mut c_buckets = rust_buckets.clone();
15045 radix_sort_lms_suffixes_16u_omp(
15046 &text,
15047 &mut rust_sa,
15048 n,
15049 m,
15050 0,
15051 &mut rust_buckets,
15052 1,
15053 &mut [],
15054 );
15055 unsafe {
15056 probe_libsais16_radix_sort_lms_suffixes_16u_omp(
15057 text.as_ptr(),
15058 c_sa.as_mut_ptr(),
15059 n,
15060 m,
15061 0,
15062 c_buckets.as_mut_ptr(),
15063 1,
15064 );
15065 }
15066 assert_eq!(rust_sa, c_sa);
15067 assert_eq!(rust_buckets, c_buckets);
15068 }
15069
15070 #[test]
15071 fn libsais16_early_omp_wrappers_use_block_partition_for_large_inputs() {
15072 let n = 65_600usize;
15073 let text: Vec<u16> = (0..n)
15074 .map(|i| 1 + ((i * 37 + i / 17) % 509) as u16)
15075 .collect();
15076
15077 let mut gathered_threaded = vec![-99; n];
15078 let mut gathered_scalar = vec![-99; n];
15079 let mut thread_state = alloc_thread_state(4).unwrap();
15080 let mut count_sa = vec![-99; n];
15081 let mut count_buckets = vec![0; 4 * ALPHABET_SIZE];
15082 count_and_gather_lms_suffixes_16u_omp(
15083 &text,
15084 &mut count_sa,
15085 n as SaSint,
15086 &mut count_buckets,
15087 4,
15088 &mut thread_state,
15089 );
15090 gather_lms_suffixes_16u_omp(
15091 &text,
15092 &mut gathered_threaded,
15093 n as SaSint,
15094 4,
15095 &mut thread_state,
15096 );
15097 gather_lms_suffixes_16u(
15098 &text,
15099 &mut gathered_scalar,
15100 n as SaSint,
15101 n as SaSint - 1,
15102 0,
15103 n as SaSint,
15104 );
15105 assert_eq!(gathered_threaded, gathered_scalar);
15106
15107 let mut sa_threaded = vec![-99; n];
15108 let mut sa_scalar = vec![-99; n];
15109 let mut buckets_threaded = vec![0; 4 * ALPHABET_SIZE];
15110 let mut buckets_scalar = vec![0; 4 * ALPHABET_SIZE];
15111 let m_threaded = count_and_gather_lms_suffixes_16u_omp(
15112 &text,
15113 &mut sa_threaded,
15114 n as SaSint,
15115 &mut buckets_threaded,
15116 4,
15117 &mut thread_state,
15118 );
15119 let m_scalar = count_and_gather_lms_suffixes_16u(
15120 &text,
15121 &mut sa_scalar,
15122 n as SaSint,
15123 &mut buckets_scalar,
15124 0,
15125 n as SaSint,
15126 );
15127 assert_eq!(m_threaded, m_scalar);
15128 assert_eq!(
15129 &sa_threaded[n - m_threaded as usize..],
15130 &sa_scalar[n - m_scalar as usize..]
15131 );
15132 assert_eq!(buckets_threaded, buckets_scalar);
15133 }
15134
15135 #[test]
15136 fn libsais16_late_omp_wrappers_match_c() {
15137 let m = 6;
15138 let mut rust_sa = vec![0; 20];
15139 rust_sa[..m].copy_from_slice(&[2, 4 | SAINT_MIN, 6, 8 | SAINT_MIN, 10, 12 | SAINT_MIN]);
15140 let mut c_sa = rust_sa.clone();
15141 let mut rust_thread_state = alloc_thread_state(1).unwrap();
15142 let rust_name =
15143 renumber_lms_suffixes_16u_omp(&mut rust_sa, m as SaSint, 1, &mut rust_thread_state);
15144 let c_name = unsafe {
15145 probe_libsais16_renumber_lms_suffixes_16u_omp(c_sa.as_mut_ptr(), m as SaSint, 1)
15146 };
15147 assert_eq!(rust_name, c_name);
15148 assert_eq!(rust_sa, c_sa);
15149
15150 let mut a = vec![0, 1, 65535, 65536, -1, -2, 70000, 17, 131071, -65536];
15151 let mut rust_u = vec![999; a.len()];
15152 let mut c_u = rust_u.clone();
15153 bwt_copy_16u_omp(&mut rust_u, &a, a.len() as SaSint, 1);
15154 unsafe {
15155 probe_libsais16_bwt_copy_16u_omp(
15156 c_u.as_mut_ptr(),
15157 a.as_mut_ptr(),
15158 a.len() as SaSint,
15159 1,
15160 );
15161 }
15162 assert_eq!(rust_u, c_u);
15163 }
15164
15165 #[test]
15166 fn libsais16_gather_marked_lms_suffixes_matches_c() {
15167 let mut rust_sa = vec![0, 0, 3 | SAINT_MIN, 4, 5 | SAINT_MIN, 6, -7, 8];
15168 let mut c_sa = rust_sa.clone();
15169
15170 let rust_l = gather_marked_lms_suffixes(&mut rust_sa, 2, 8, 0, 4) as SaSint;
15171 let c_l =
15172 unsafe { probe_libsais16_gather_marked_lms_suffixes(c_sa.as_mut_ptr(), 2, 8, 0, 4) };
15173
15174 assert_eq!(rust_l, c_l);
15175 assert_eq!(rust_sa, c_sa);
15176 }
15177
15178 #[test]
15179 fn libsais16_gather_marked_lms_suffixes_omp_matches_c() {
15180 let mut rust_sa = vec![0; 10];
15181 rust_sa[4..8].copy_from_slice(&[2 | SAINT_MIN, 4, 6 | SAINT_MIN, 8]);
15182 let mut c_sa = rust_sa.clone();
15183
15184 let mut rust_thread_state = alloc_thread_state(1).unwrap();
15185 gather_marked_lms_suffixes_omp(&mut rust_sa, 8, 4, 2, 1, &mut rust_thread_state);
15186 unsafe {
15187 probe_libsais16_gather_marked_lms_suffixes_omp(c_sa.as_mut_ptr(), 8, 4, 2, 1);
15188 }
15189
15190 assert_eq!(rust_sa, c_sa);
15191 }
15192
15193 #[test]
15194 fn libsais16_renumber_and_gather_lms_suffixes_omp_matches_c() {
15195 let mut rust_sa = vec![0; 10];
15196 rust_sa[..4].copy_from_slice(&[2, 4 | SAINT_MIN, 6, 8 | SAINT_MIN]);
15197 let mut c_sa = rust_sa.clone();
15198
15199 let mut rust_thread_state = alloc_thread_state(1).unwrap();
15200 let rust_name =
15201 renumber_and_gather_lms_suffixes_omp(&mut rust_sa, 8, 4, 2, 1, &mut rust_thread_state);
15202 let c_name = unsafe {
15203 probe_libsais16_renumber_and_gather_lms_suffixes_omp(c_sa.as_mut_ptr(), 8, 4, 2, 1)
15204 };
15205
15206 assert_eq!(rust_name, c_name);
15207 assert_eq!(rust_sa, c_sa);
15208 }
15209
15210 #[test]
15211 fn libsais16_reconstruct_lms_suffixes_matches_c() {
15212 let mut rust_sa = vec![2, 0, 1, 77, 88, 10, 11, 12];
15213 let mut c_sa = rust_sa.clone();
15214
15215 reconstruct_lms_suffixes(&mut rust_sa, 8, 3, 0, 3);
15216 unsafe {
15217 probe_libsais16_reconstruct_lms_suffixes(c_sa.as_mut_ptr(), 8, 3, 0, 3);
15218 }
15219
15220 assert_eq!(rust_sa, c_sa);
15221
15222 let mut rust_sa = vec![2, 0, 1, 77, 88, 10, 11, 12];
15223 let mut c_sa = rust_sa.clone();
15224 reconstruct_lms_suffixes_omp(&mut rust_sa, 8, 3, 1);
15225 unsafe {
15226 probe_libsais16_reconstruct_lms_suffixes_omp(c_sa.as_mut_ptr(), 8, 3, 1);
15227 }
15228
15229 assert_eq!(rust_sa, c_sa);
15230 }
15231
15232 #[test]
15233 fn libsais16_lms_late_omp_wrappers_use_block_partition() {
15234 let m = 65_536usize;
15235 let mut scalar = vec![0; 2 * m + 8];
15236 for i in 0..m {
15237 let value = (2 * i) as SaSint;
15238 scalar[i] = if i % 7 == 0 { value | SAINT_MIN } else { value };
15239 }
15240 let mut threaded = scalar.clone();
15241
15242 let mut scalar_state = alloc_thread_state(1).unwrap();
15243 let mut threaded_state = alloc_thread_state(4).unwrap();
15244 let scalar_name =
15245 renumber_lms_suffixes_16u_omp(&mut scalar, m as SaSint, 1, &mut scalar_state);
15246 let threaded_name =
15247 renumber_lms_suffixes_16u_omp(&mut threaded, m as SaSint, 4, &mut threaded_state);
15248 assert_eq!(threaded_name, scalar_name);
15249 assert_eq!(threaded, scalar);
15250
15251 let n = 131_072usize;
15252 let m = 65_536usize;
15253 let fs = 128usize;
15254 let mut scalar = vec![0; n + fs];
15255 for i in 0..(n >> 1) {
15256 let value = (i as SaSint + 1) & SAINT_MAX;
15257 scalar[m + i] = if i % 7 == 0 { value | SAINT_MIN } else { value };
15258 }
15259 let marked_count = (0..(n >> 1)).filter(|i| i % 7 == 0).count();
15260 let mut threaded = scalar.clone();
15261
15262 let mut scalar_state = alloc_thread_state(1).unwrap();
15263 let mut threaded_state = alloc_thread_state(4).unwrap();
15264 gather_marked_lms_suffixes_omp(
15265 &mut scalar,
15266 n as SaSint,
15267 m as SaSint,
15268 fs as SaSint,
15269 1,
15270 &mut scalar_state,
15271 );
15272 gather_marked_lms_suffixes_omp(
15273 &mut threaded,
15274 n as SaSint,
15275 m as SaSint,
15276 fs as SaSint,
15277 4,
15278 &mut threaded_state,
15279 );
15280 assert_eq!(
15281 &threaded[n + fs - marked_count..n + fs],
15282 &scalar[n + fs - marked_count..n + fs]
15283 );
15284
15285 let m = 65_536usize;
15286 let n = 2 * m;
15287 let mut scalar = vec![0; n];
15288 for i in 0..m {
15289 scalar[i] = i as SaSint;
15290 scalar[n - m + i] = 1_000_000 + i as SaSint;
15291 }
15292 let mut threaded = scalar.clone();
15293
15294 reconstruct_lms_suffixes_omp(&mut scalar, n as SaSint, m as SaSint, 1);
15295 reconstruct_lms_suffixes_omp(&mut threaded, n as SaSint, m as SaSint, 4);
15296 assert_eq!(threaded, scalar);
15297 }
15298
15299 #[test]
15300 fn libsais16_distinct_lms_helpers_match_c() {
15301 let m = 6;
15302 let mut rust_sa = vec![0; 18];
15303 rust_sa[..m].copy_from_slice(&[
15304 2 | SAINT_MIN,
15305 4 | SAINT_MIN,
15306 6,
15307 8 | SAINT_MIN,
15308 10,
15309 12 | SAINT_MIN,
15310 ]);
15311 let mut c_sa = rust_sa.clone();
15312 let rust_name =
15313 renumber_distinct_lms_suffixes_32s_4k(&mut rust_sa, m as SaSint, 1, 0, m as isize);
15314 let c_name = unsafe {
15315 probe_libsais16_renumber_distinct_lms_suffixes_32s_4k(
15316 c_sa.as_mut_ptr(),
15317 m as SaSint,
15318 1,
15319 0,
15320 m as SaSint,
15321 )
15322 };
15323 assert_eq!(rust_name, c_name);
15324 assert_eq!(rust_sa, c_sa);
15325
15326 let mut rust_sa = vec![0; 12];
15327 rust_sa[m..m + 6].copy_from_slice(&[SAINT_MIN | 1, 0, SAINT_MIN | 2, 0, 3, 0]);
15328 let mut c_sa = rust_sa.clone();
15329 mark_distinct_lms_suffixes_32s(&mut rust_sa, m as SaSint, 0, 6);
15330 unsafe {
15331 probe_libsais16_mark_distinct_lms_suffixes_32s(c_sa.as_mut_ptr(), m as SaSint, 0, 6);
15332 }
15333 assert_eq!(rust_sa, c_sa);
15334
15335 let mut rust_sa = vec![0; 12];
15336 rust_sa[m..m + 6].copy_from_slice(&[SAINT_MIN | 1, 7, SAINT_MIN | 2, 0, -5, 9]);
15337 let mut c_sa = rust_sa.clone();
15338 clamp_lms_suffixes_length_32s(&mut rust_sa, m as SaSint, 0, 6);
15339 unsafe {
15340 probe_libsais16_clamp_lms_suffixes_length_32s(c_sa.as_mut_ptr(), m as SaSint, 0, 6);
15341 }
15342 assert_eq!(rust_sa, c_sa);
15343 }
15344
15345 #[test]
15346 fn libsais16_distinct_lms_omp_wrappers_match_c() {
15347 let n = 12;
15348 let m = 6;
15349 let mut rust_sa = vec![0; 18];
15350 rust_sa[..m].copy_from_slice(&[
15351 2 | SAINT_MIN,
15352 4 | SAINT_MIN,
15353 6,
15354 8 | SAINT_MIN,
15355 10,
15356 12 | SAINT_MIN,
15357 ]);
15358 let mut c_sa = rust_sa.clone();
15359 let mut rust_thread_state = alloc_thread_state(1).unwrap();
15360 let rust_name = renumber_distinct_lms_suffixes_32s_4k_omp(
15361 &mut rust_sa,
15362 m as SaSint,
15363 1,
15364 &mut rust_thread_state,
15365 );
15366 let c_name = unsafe {
15367 probe_libsais16_renumber_distinct_lms_suffixes_32s_4k_omp(
15368 c_sa.as_mut_ptr(),
15369 m as SaSint,
15370 1,
15371 )
15372 };
15373 assert_eq!(rust_name, c_name);
15374 assert_eq!(rust_sa, c_sa);
15375
15376 let mut rust_sa = vec![0; 18];
15377 rust_sa[m..m + 6].copy_from_slice(&[SAINT_MIN | 1, 0, SAINT_MIN | 2, 0, 3, 0]);
15378 let mut c_sa = rust_sa.clone();
15379 mark_distinct_lms_suffixes_32s_omp(&mut rust_sa, n, m as SaSint, 1);
15380 unsafe {
15381 probe_libsais16_mark_distinct_lms_suffixes_32s_omp(
15382 c_sa.as_mut_ptr(),
15383 n,
15384 m as SaSint,
15385 1,
15386 );
15387 }
15388 assert_eq!(rust_sa, c_sa);
15389
15390 let mut rust_sa = vec![0; 18];
15391 rust_sa[m..m + 6].copy_from_slice(&[SAINT_MIN | 1, 7, SAINT_MIN | 2, 0, -5, 9]);
15392 let mut c_sa = rust_sa.clone();
15393 clamp_lms_suffixes_length_32s_omp(&mut rust_sa, n, m as SaSint, 1);
15394 unsafe {
15395 probe_libsais16_clamp_lms_suffixes_length_32s_omp(c_sa.as_mut_ptr(), n, m as SaSint, 1);
15396 }
15397 assert_eq!(rust_sa, c_sa);
15398
15399 let mut rust_sa = vec![0; 18];
15400 rust_sa[..m].copy_from_slice(&[
15401 2 | SAINT_MIN,
15402 4 | SAINT_MIN,
15403 6,
15404 8 | SAINT_MIN,
15405 10,
15406 12 | SAINT_MIN,
15407 ]);
15408 let mut c_sa = rust_sa.clone();
15409 let mut rust_thread_state = alloc_thread_state(1).unwrap();
15410 let rust_name = renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(
15411 &mut rust_sa,
15412 n,
15413 m as SaSint,
15414 1,
15415 &mut rust_thread_state,
15416 );
15417 let c_name = unsafe {
15418 probe_libsais16_renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(
15419 c_sa.as_mut_ptr(),
15420 n,
15421 m as SaSint,
15422 1,
15423 )
15424 };
15425 assert_eq!(rust_name, c_name);
15426 assert_eq!(rust_sa, c_sa);
15427 }
15428
15429 #[test]
15430 fn libsais16_distinct_lms_omp_wrappers_use_block_partition() {
15431 let m = 65_536usize;
15432 let mut scalar = vec![0; 2 * m];
15433 for i in 0..m {
15434 let value = (2 * i) as SaSint;
15435 scalar[i] = if i % 7 == 0 { value | SAINT_MIN } else { value };
15436 }
15437 let mut threaded = scalar.clone();
15438
15439 let mut scalar_state = alloc_thread_state(1).unwrap();
15440 let mut threaded_state = alloc_thread_state(4).unwrap();
15441 let scalar_name = renumber_distinct_lms_suffixes_32s_4k_omp(
15442 &mut scalar,
15443 m as SaSint,
15444 1,
15445 &mut scalar_state,
15446 );
15447 let threaded_name = renumber_distinct_lms_suffixes_32s_4k_omp(
15448 &mut threaded,
15449 m as SaSint,
15450 4,
15451 &mut threaded_state,
15452 );
15453 assert_eq!(threaded_name, scalar_name);
15454 assert_eq!(threaded, scalar);
15455
15456 let n = 131_072usize;
15457 let m = 65_536usize;
15458 let mut scalar = vec![0; n];
15459 for i in 0..(n >> 1) {
15460 scalar[m + i] = if i % 5 == 0 {
15461 SAINT_MIN | (i as SaSint + 1)
15462 } else if i % 11 == 0 {
15463 0
15464 } else {
15465 i as SaSint + 1
15466 };
15467 }
15468 let mut threaded = scalar.clone();
15469 mark_distinct_lms_suffixes_32s_omp(&mut scalar, n as SaSint, m as SaSint, 1);
15470 mark_distinct_lms_suffixes_32s_omp(&mut threaded, n as SaSint, m as SaSint, 4);
15471 assert_eq!(&threaded[m..n], &scalar[m..n]);
15472
15473 let mut scalar = vec![0; n];
15474 for i in 0..(n >> 1) {
15475 scalar[m + i] = if i % 5 == 0 {
15476 SAINT_MIN | (i as SaSint + 1)
15477 } else {
15478 i as SaSint + 1
15479 };
15480 }
15481 let mut threaded = scalar.clone();
15482 clamp_lms_suffixes_length_32s_omp(&mut scalar, n as SaSint, m as SaSint, 1);
15483 clamp_lms_suffixes_length_32s_omp(&mut threaded, n as SaSint, m as SaSint, 4);
15484 assert_eq!(&threaded[m..n], &scalar[m..n]);
15485 }
15486
15487 #[test]
15488 fn libsais16_unique_nonunique_lms_helpers_match_c() {
15489 let m = 4;
15490 let mut rust_t = vec![0; 12];
15491 let mut rust_sa = vec![0; 12];
15492 rust_sa[..m].copy_from_slice(&[2, 4, 6, 8]);
15493 rust_sa[m + 1] = SAINT_MIN | 11;
15494 rust_sa[m + 2] = 22;
15495 rust_sa[m + 3] = SAINT_MIN | 33;
15496 rust_sa[m + 4] = 44;
15497 let mut c_t = rust_t.clone();
15498 let mut c_sa = rust_sa.clone();
15499
15500 let rust_f = renumber_unique_and_nonunique_lms_suffixes_32s(
15501 &mut rust_t,
15502 &mut rust_sa,
15503 m as SaSint,
15504 0,
15505 0,
15506 m as isize,
15507 );
15508 let c_f = unsafe {
15509 probe_libsais16_renumber_unique_and_nonunique_lms_suffixes_32s(
15510 c_t.as_mut_ptr(),
15511 c_sa.as_mut_ptr(),
15512 m as SaSint,
15513 0,
15514 0,
15515 m as SaSint,
15516 )
15517 };
15518 assert_eq!(rust_f, c_f);
15519 assert_eq!(rust_t, c_t);
15520 assert_eq!(rust_sa, c_sa);
15521
15522 let mut rust_sa = vec![0; 10];
15523 rust_sa[m..m + 4].copy_from_slice(&[SAINT_MIN | 3, 4, SAINT_MIN | 5, 6]);
15524 let mut c_sa = rust_sa.clone();
15525 let mut rust_l = m as isize;
15526 let mut rust_r = 10isize;
15527 let mut c_l = rust_l as SaSint;
15528 let mut c_r = rust_r as SaSint;
15529 compact_unique_and_nonunique_lms_suffixes_32s(
15530 &mut rust_sa,
15531 m as SaSint,
15532 &mut rust_l,
15533 &mut rust_r,
15534 0,
15535 4,
15536 );
15537 unsafe {
15538 probe_libsais16_compact_unique_and_nonunique_lms_suffixes_32s(
15539 c_sa.as_mut_ptr(),
15540 m as SaSint,
15541 &mut c_l,
15542 &mut c_r,
15543 0,
15544 4,
15545 );
15546 }
15547 assert_eq!(rust_l as SaSint, c_l);
15548 assert_eq!(rust_r as SaSint, c_r);
15549 assert_eq!(rust_sa, c_sa);
15550 }
15551
15552 #[test]
15553 fn libsais16_unique_nonunique_lms_omp_wrappers_match_c() {
15554 let n = 8;
15555 let m = 4;
15556 let fs = 4;
15557 let mut rust_t = vec![0; 12];
15558 let mut rust_sa = vec![0; 12];
15559 rust_sa[..m].copy_from_slice(&[2, 4, 6, 8]);
15560 rust_sa[m + 1] = SAINT_MIN | 11;
15561 rust_sa[m + 2] = 22;
15562 rust_sa[m + 3] = SAINT_MIN | 33;
15563 rust_sa[m + 4] = 44;
15564 let mut c_t = rust_t.clone();
15565 let mut c_sa = rust_sa.clone();
15566
15567 let rust_f = renumber_unique_and_nonunique_lms_suffixes_32s_omp(
15568 &mut rust_t,
15569 &mut rust_sa,
15570 m as SaSint,
15571 1,
15572 );
15573 let c_f = unsafe {
15574 probe_libsais16_renumber_unique_and_nonunique_lms_suffixes_32s_omp(
15575 c_t.as_mut_ptr(),
15576 c_sa.as_mut_ptr(),
15577 m as SaSint,
15578 1,
15579 )
15580 };
15581 assert_eq!(rust_f, c_f);
15582 assert_eq!(rust_t, c_t);
15583 assert_eq!(rust_sa, c_sa);
15584
15585 let mut rust_sa = vec![0; 12];
15586 rust_sa[m..m + 4].copy_from_slice(&[SAINT_MIN | 3, 4, SAINT_MIN | 5, 6]);
15587 rust_sa[m - 2..m].copy_from_slice(&[101, 102]);
15588 let mut c_sa = rust_sa.clone();
15589 compact_unique_and_nonunique_lms_suffixes_32s_omp(&mut rust_sa, n, m as SaSint, fs, 2, 1);
15590 unsafe {
15591 probe_libsais16_compact_unique_and_nonunique_lms_suffixes_32s_omp(
15592 c_sa.as_mut_ptr(),
15593 n,
15594 m as SaSint,
15595 fs,
15596 2,
15597 1,
15598 );
15599 }
15600 assert_eq!(rust_sa, c_sa);
15601
15602 let mut rust_t = vec![0; 12];
15603 let mut rust_sa = vec![0; 12];
15604 rust_sa[..m].copy_from_slice(&[2, 4, 6, 8]);
15605 rust_sa[m + 1] = SAINT_MIN | 11;
15606 rust_sa[m + 2] = 22;
15607 rust_sa[m + 3] = SAINT_MIN | 33;
15608 rust_sa[m + 4] = 44;
15609 let mut c_t = rust_t.clone();
15610 let mut c_sa = rust_sa.clone();
15611 let rust_f = compact_lms_suffixes_32s_omp(&mut rust_t, &mut rust_sa, n, m as SaSint, fs, 1);
15612 let c_f = unsafe {
15613 probe_libsais16_compact_lms_suffixes_32s_omp(
15614 c_t.as_mut_ptr(),
15615 c_sa.as_mut_ptr(),
15616 n,
15617 m as SaSint,
15618 fs,
15619 1,
15620 )
15621 };
15622 assert_eq!(rust_f, c_f);
15623 assert_eq!(rust_t, c_t);
15624 assert_eq!(rust_sa, c_sa);
15625 }
15626
15627 #[test]
15628 fn libsais16_unique_nonunique_lms_omp_wrappers_use_block_partition() {
15629 let m = 65_536usize;
15630 let mut scalar_t = vec![0; 2 * m];
15631 let mut scalar_sa = vec![0; 2 * m];
15632 for i in 0..m {
15633 scalar_sa[i] = (2 * i) as SaSint;
15634 scalar_sa[m + i] = if i % 5 == 0 {
15635 SAINT_MIN | (i as SaSint + 3)
15636 } else {
15637 i as SaSint + 3
15638 };
15639 }
15640 let mut threaded_t = scalar_t.clone();
15641 let mut threaded_sa = scalar_sa.clone();
15642
15643 let scalar_f = renumber_unique_and_nonunique_lms_suffixes_32s_omp(
15644 &mut scalar_t,
15645 &mut scalar_sa,
15646 m as SaSint,
15647 1,
15648 );
15649 let threaded_f = renumber_unique_and_nonunique_lms_suffixes_32s_omp(
15650 &mut threaded_t,
15651 &mut threaded_sa,
15652 m as SaSint,
15653 4,
15654 );
15655 assert_eq!(threaded_f, scalar_f);
15656 assert_eq!(threaded_t, scalar_t);
15657 assert_eq!(threaded_sa, scalar_sa);
15658
15659 let n = 131_072usize;
15660 let m = 4_096usize;
15661 let fs = 8_192usize;
15662 let mut scalar_sa = vec![0; n + fs];
15663 for i in 0..(n >> 1) {
15664 scalar_sa[m + i] = if i % 32 == 0 {
15665 SAINT_MIN | (i as SaSint + 1)
15666 } else {
15667 i as SaSint + 1
15668 };
15669 }
15670 let f = 1_024usize;
15671 for i in 0..f {
15672 scalar_sa[m - f + i] = 1_000_000 + i as SaSint;
15673 }
15674 let mut threaded_sa = scalar_sa.clone();
15675
15676 compact_unique_and_nonunique_lms_suffixes_32s_omp(
15677 &mut scalar_sa,
15678 n as SaSint,
15679 m as SaSint,
15680 fs as SaSint,
15681 f as SaSint,
15682 1,
15683 );
15684 compact_unique_and_nonunique_lms_suffixes_32s_omp(
15685 &mut threaded_sa,
15686 n as SaSint,
15687 m as SaSint,
15688 fs as SaSint,
15689 f as SaSint,
15690 4,
15691 );
15692 assert_eq!(&threaded_sa[..m], &scalar_sa[..m]);
15693 assert_eq!(
15694 &threaded_sa[n + fs - m..n + fs],
15695 &scalar_sa[n + fs - m..n + fs]
15696 );
15697 }
15698
15699 #[test]
15700 fn libsais16_merge_lms_helpers_match_c() {
15701 let n = 10;
15702 let m = 3;
15703 let mut rust_t = vec![0; n as usize];
15704 rust_t[1] = SAINT_MIN | 11;
15705 rust_t[3] = SAINT_MIN | 22;
15706 rust_t[7] = SAINT_MIN | 33;
15707 let mut rust_sa = vec![0; n as usize];
15708 rust_sa[6..10].copy_from_slice(&[2, 5, 8, 9]);
15709 let mut c_t = rust_t.clone();
15710 let mut c_sa = rust_sa.clone();
15711 merge_unique_lms_suffixes_32s(&mut rust_t, &mut rust_sa, n, m, 0, 0, n as isize);
15712 unsafe {
15713 probe_libsais16_merge_unique_lms_suffixes_32s(
15714 c_t.as_mut_ptr(),
15715 c_sa.as_mut_ptr(),
15716 n,
15717 m,
15718 0,
15719 0,
15720 n,
15721 );
15722 }
15723 assert_eq!(rust_t, c_t);
15724 assert_eq!(rust_sa, c_sa);
15725
15726 let n = 10;
15727 let m = 5;
15728 let mut rust_sa = vec![9, 0, 8, 0, 0, 7, 31, 32, 33, 34];
15729 let mut c_sa = rust_sa.clone();
15730 merge_nonunique_lms_suffixes_32s(&mut rust_sa, n, m, 2, 0, m as isize);
15731 unsafe {
15732 probe_libsais16_merge_nonunique_lms_suffixes_32s(c_sa.as_mut_ptr(), n, m, 2, 0, m);
15733 }
15734 assert_eq!(rust_sa, c_sa);
15735 }
15736
15737 #[test]
15738 fn libsais16_merge_lms_omp_wrappers_match_c() {
15739 let n = 12;
15740 let m = 4;
15741 let f = 2;
15742 let mut rust_t = vec![0; n as usize];
15743 rust_t[1] = SAINT_MIN | 11;
15744 rust_t[5] = SAINT_MIN | 22;
15745 let mut rust_sa = vec![0; n as usize];
15746 rust_sa[1] = 41;
15747 rust_sa[7..12].copy_from_slice(&[2, 6, 21, 22, 23]);
15748 let mut c_t = rust_t.clone();
15749 let mut c_sa = rust_sa.clone();
15750 merge_unique_lms_suffixes_32s_omp(&mut rust_t, &mut rust_sa, n, m, 1);
15751 unsafe {
15752 probe_libsais16_merge_unique_lms_suffixes_32s_omp(
15753 c_t.as_mut_ptr(),
15754 c_sa.as_mut_ptr(),
15755 n,
15756 m,
15757 1,
15758 );
15759 }
15760 assert_eq!(rust_t, c_t);
15761 assert_eq!(rust_sa, c_sa);
15762
15763 let mut rust_sa = vec![0, 41, 1, 0, 55, 66, 77, 2, 6, 21, 22, 23];
15764 let mut c_sa = rust_sa.clone();
15765 merge_nonunique_lms_suffixes_32s_omp(&mut rust_sa, n, m, f, 1);
15766 unsafe {
15767 probe_libsais16_merge_nonunique_lms_suffixes_32s_omp(c_sa.as_mut_ptr(), n, m, f, 1);
15768 }
15769 assert_eq!(rust_sa, c_sa);
15770
15771 let mut rust_t = vec![0; n as usize];
15772 rust_t[1] = SAINT_MIN | 11;
15773 rust_t[5] = SAINT_MIN | 22;
15774 let mut rust_sa = vec![0; n as usize];
15775 rust_sa[1] = 41;
15776 rust_sa[7..12].copy_from_slice(&[2, 6, 21, 22, 23]);
15777 let mut c_t = rust_t.clone();
15778 let mut c_sa = rust_sa.clone();
15779 merge_compacted_lms_suffixes_32s_omp(&mut rust_t, &mut rust_sa, n, m, f, 1);
15780 unsafe {
15781 probe_libsais16_merge_compacted_lms_suffixes_32s_omp(
15782 c_t.as_mut_ptr(),
15783 c_sa.as_mut_ptr(),
15784 n,
15785 m,
15786 f,
15787 1,
15788 );
15789 }
15790 assert_eq!(rust_t, c_t);
15791 assert_eq!(rust_sa, c_sa);
15792 }
15793
15794 #[test]
15795 fn libsais16_merge_lms_omp_wrappers_use_block_partition() {
15796 let n = 65_536usize;
15797 let m = 10_000usize;
15798 let mut scalar_t = vec![0; n];
15799 for i in (0..n).step_by(17) {
15800 scalar_t[i] = SAINT_MIN | (i as SaSint + 1);
15801 }
15802 let unique_count = scalar_t.iter().filter(|&&value| value < 0).count();
15803 let mut scalar_sa = vec![0; n];
15804 let source = n - m - 1;
15805 for i in 0..=unique_count {
15806 scalar_sa[source + i] = ((i * 13 + 7) % n) as SaSint;
15807 }
15808 let mut threaded_t = scalar_t.clone();
15809 let mut threaded_sa = scalar_sa.clone();
15810
15811 merge_unique_lms_suffixes_32s_omp(
15812 &mut scalar_t,
15813 &mut scalar_sa,
15814 n as SaSint,
15815 m as SaSint,
15816 1,
15817 );
15818 merge_unique_lms_suffixes_32s_omp(
15819 &mut threaded_t,
15820 &mut threaded_sa,
15821 n as SaSint,
15822 m as SaSint,
15823 4,
15824 );
15825 assert_eq!(threaded_t, scalar_t);
15826 assert_eq!(threaded_sa, scalar_sa);
15827
15828 let n = 131_072usize;
15829 let m = 65_536usize;
15830 let f = 100usize;
15831 let mut scalar_sa = vec![1; n];
15832 for i in (0..m).step_by(9) {
15833 scalar_sa[i] = 0;
15834 }
15835 let zero_count = scalar_sa[..m].iter().filter(|&&value| value == 0).count();
15836 let source = n - m - 1 + f;
15837 for i in 0..=zero_count {
15838 scalar_sa[source + i] = 2_000_000 + i as SaSint;
15839 }
15840 let mut threaded_sa = scalar_sa.clone();
15841
15842 merge_nonunique_lms_suffixes_32s_omp(
15843 &mut scalar_sa,
15844 n as SaSint,
15845 m as SaSint,
15846 f as SaSint,
15847 1,
15848 );
15849 merge_nonunique_lms_suffixes_32s_omp(
15850 &mut threaded_sa,
15851 n as SaSint,
15852 m as SaSint,
15853 f as SaSint,
15854 4,
15855 );
15856 assert_eq!(threaded_sa, scalar_sa);
15857 }
15858
15859 #[test]
15860 fn libsais16_radix_sort_lms_suffixes_32s_match_c() {
15861 let t = vec![0, 1, 2, 3, 1, 2, 3, 0];
15862 let mut rust_sa = vec![0, 0, 0, 0, 0, 1, 2, 3];
15863 let mut c_sa = rust_sa.clone();
15864 let mut rust_bucket = vec![0, 6, 7, 8];
15865 let mut c_bucket = rust_bucket.clone();
15866 radix_sort_lms_suffixes_32s_6k(&t, &mut rust_sa, &mut rust_bucket, 5, 3);
15867 unsafe {
15868 probe_libsais16_radix_sort_lms_suffixes_32s_6k(
15869 t.as_ptr(),
15870 c_sa.as_mut_ptr(),
15871 c_bucket.as_mut_ptr(),
15872 5,
15873 3,
15874 );
15875 }
15876 assert_eq!(rust_sa, c_sa);
15877 assert_eq!(rust_bucket, c_bucket);
15878
15879 let mut rust_sa = vec![0, 0, 0, 0, 0, 1, 2, 3];
15880 let mut c_sa = rust_sa.clone();
15881 let mut rust_bucket = vec![0, 0, 6, 0, 7, 0, 8, 0];
15882 let mut c_bucket = rust_bucket.clone();
15883 radix_sort_lms_suffixes_32s_2k(&t, &mut rust_sa, &mut rust_bucket, 5, 3);
15884 unsafe {
15885 probe_libsais16_radix_sort_lms_suffixes_32s_2k(
15886 t.as_ptr(),
15887 c_sa.as_mut_ptr(),
15888 c_bucket.as_mut_ptr(),
15889 5,
15890 3,
15891 );
15892 }
15893 assert_eq!(rust_sa, c_sa);
15894 assert_eq!(rust_bucket, c_bucket);
15895
15896 let mut cache = vec![ThreadCache::default(); 8];
15897 let sa = vec![0, 0, 0, 0, 0, 1, 2, 3];
15898 radix_sort_lms_suffixes_32s_block_gather(&t, &sa, &mut cache, 5, 3);
15899 assert_eq!(cache[5].index, 1);
15900 assert_eq!(cache[5].symbol, 1);
15901 assert_eq!(cache[6].index, 2);
15902 assert_eq!(cache[6].symbol, 2);
15903 assert_eq!(cache[7].index, 3);
15904 assert_eq!(cache[7].symbol, 3);
15905
15906 let mut bucket = vec![0, 6, 7, 8];
15907 radix_sort_lms_suffixes_32s_6k_block_sort(&mut bucket, &mut cache, 5, 3);
15908 assert_eq!(bucket, vec![0, 5, 6, 7]);
15909 assert_eq!(cache[5].symbol, 5);
15910 assert_eq!(cache[6].symbol, 6);
15911 assert_eq!(cache[7].symbol, 7);
15912
15913 let mut cache = vec![ThreadCache::default(); 8];
15914 radix_sort_lms_suffixes_32s_block_gather(&t, &sa, &mut cache, 5, 3);
15915 let mut bucket = vec![0, 0, 6, 0, 7, 0, 8, 0];
15916 radix_sort_lms_suffixes_32s_2k_block_sort(&mut bucket, &mut cache, 5, 3);
15917 assert_eq!(bucket, vec![0, 0, 5, 0, 6, 0, 7, 0]);
15918 assert_eq!(cache[5].symbol, 5);
15919 assert_eq!(cache[6].symbol, 6);
15920 assert_eq!(cache[7].symbol, 7);
15921
15922 let mut rust_sa = vec![0, 0, 0, 0, 0, 1, 2, 3];
15923 let mut c_sa = rust_sa.clone();
15924 let mut rust_bucket = vec![0, 6, 7, 8];
15925 let mut c_bucket = rust_bucket.clone();
15926 radix_sort_lms_suffixes_32s_6k_omp(&t, &mut rust_sa, 8, 4, &mut rust_bucket, 1);
15927 unsafe {
15928 probe_libsais16_radix_sort_lms_suffixes_32s_6k_omp(
15929 t.as_ptr(),
15930 c_sa.as_mut_ptr(),
15931 8,
15932 4,
15933 c_bucket.as_mut_ptr(),
15934 1,
15935 );
15936 }
15937 assert_eq!(rust_sa, c_sa);
15938 assert_eq!(rust_bucket, c_bucket);
15939
15940 let mut rust_sa = vec![0, 0, 0, 0, 0, 1, 2, 3];
15941 let mut c_sa = rust_sa.clone();
15942 let mut rust_bucket = vec![0, 0, 6, 0, 7, 0, 8, 0];
15943 let mut c_bucket = rust_bucket.clone();
15944 radix_sort_lms_suffixes_32s_2k_omp(&t, &mut rust_sa, 8, 4, &mut rust_bucket, 1);
15945 unsafe {
15946 probe_libsais16_radix_sort_lms_suffixes_32s_2k_omp(
15947 t.as_ptr(),
15948 c_sa.as_mut_ptr(),
15949 8,
15950 4,
15951 c_bucket.as_mut_ptr(),
15952 1,
15953 );
15954 }
15955 assert_eq!(rust_sa, c_sa);
15956 assert_eq!(rust_bucket, c_bucket);
15957
15958 let t = vec![2, 1, 3, 1, 0];
15959 let mut rust_sa = vec![0; t.len()];
15960 let mut c_sa = rust_sa.clone();
15961 let mut rust_bucket = vec![0, 2, 4, 5];
15962 let mut c_bucket = rust_bucket.clone();
15963 let rust_m =
15964 radix_sort_lms_suffixes_32s_1k(&t, &mut rust_sa, t.len() as SaSint, &mut rust_bucket);
15965 let c_m = unsafe {
15966 probe_libsais16_radix_sort_lms_suffixes_32s_1k(
15967 t.as_ptr(),
15968 c_sa.as_mut_ptr(),
15969 t.len() as SaSint,
15970 c_bucket.as_mut_ptr(),
15971 )
15972 };
15973 assert_eq!(rust_m, c_m);
15974 assert_eq!(rust_sa, c_sa);
15975 assert_eq!(rust_bucket, c_bucket);
15976 }
15977
15978 #[test]
15979 fn libsais16_radix_sort_set_markers_32s_match_c() {
15980 let mut rust_sa = vec![0; 8];
15981 let mut c_sa = rust_sa.clone();
15982 let mut induction_bucket = vec![1, 3, 5, 7];
15983 radix_sort_set_markers_32s_6k(&mut rust_sa, &induction_bucket, 0, 4);
15984 unsafe {
15985 probe_libsais16_radix_sort_set_markers_32s_6k(
15986 c_sa.as_mut_ptr(),
15987 induction_bucket.as_mut_ptr(),
15988 0,
15989 4,
15990 );
15991 }
15992 assert_eq!(rust_sa, c_sa);
15993
15994 let mut rust_sa = vec![0; 8];
15995 let mut c_sa = rust_sa.clone();
15996 radix_sort_set_markers_32s_6k_omp(&mut rust_sa, 5, &induction_bucket, 1);
15997 unsafe {
15998 probe_libsais16_radix_sort_set_markers_32s_6k_omp(
15999 c_sa.as_mut_ptr(),
16000 5,
16001 induction_bucket.as_mut_ptr(),
16002 1,
16003 );
16004 }
16005 assert_eq!(rust_sa, c_sa);
16006
16007 let mut rust_sa = vec![0; 8];
16008 let mut c_sa = rust_sa.clone();
16009 let mut induction_bucket = vec![1, 0, 3, 0, 5, 0, 7, 0];
16010 radix_sort_set_markers_32s_4k(&mut rust_sa, &induction_bucket, 0, 4);
16011 unsafe {
16012 probe_libsais16_radix_sort_set_markers_32s_4k(
16013 c_sa.as_mut_ptr(),
16014 induction_bucket.as_mut_ptr(),
16015 0,
16016 4,
16017 );
16018 }
16019 assert_eq!(rust_sa, c_sa);
16020
16021 let mut rust_sa = vec![0; 8];
16022 let mut c_sa = rust_sa.clone();
16023 radix_sort_set_markers_32s_4k_omp(&mut rust_sa, 5, &induction_bucket, 1);
16024 unsafe {
16025 probe_libsais16_radix_sort_set_markers_32s_4k_omp(
16026 c_sa.as_mut_ptr(),
16027 5,
16028 induction_bucket.as_mut_ptr(),
16029 1,
16030 );
16031 }
16032 assert_eq!(rust_sa, c_sa);
16033 }
16034
16035 #[test]
16036 fn libsais16_radix_sort_set_markers_32s_omp_partitions_large_inputs() {
16037 let k = 65_600usize;
16038 let induction_bucket_6k: Vec<SaSint> = (0..k).map(|i| i as SaSint).collect();
16039 let mut single = vec![0; k];
16040 let mut threaded = vec![0; k];
16041 radix_sort_set_markers_32s_6k_omp(&mut single, k as SaSint, &induction_bucket_6k, 1);
16042 radix_sort_set_markers_32s_6k_omp(&mut threaded, k as SaSint, &induction_bucket_6k, 4);
16043 assert_eq!(threaded, single);
16044
16045 let mut induction_bucket_4k = vec![0; 2 * k];
16046 for i in 0..k {
16047 induction_bucket_4k[buckets_index2(i, 0)] = i as SaSint;
16048 }
16049 let mut single = vec![0; k];
16050 let mut threaded = vec![0; k];
16051 radix_sort_set_markers_32s_4k_omp(&mut single, k as SaSint, &induction_bucket_4k, 1);
16052 radix_sort_set_markers_32s_4k_omp(&mut threaded, k as SaSint, &induction_bucket_4k, 4);
16053 assert_eq!(threaded, single);
16054 }
16055
16056 #[test]
16057 fn libsais16_partial_sorting_32s_helpers_match_c() {
16058 let k = 3;
16059 let mut rust_sa = vec![0, SAINT_MIN, 2, SAINT_MIN, 4, SAINT_MIN];
16060 let mut c_sa = rust_sa.clone();
16061 let mut buckets = vec![0; 6 * k as usize];
16062 buckets[buckets_index4(1, 0)] = 3;
16063 buckets[buckets_index4(2, 0)] = 6;
16064 buckets[4 * k as usize + buckets_index2(0, 0)] = 0;
16065 buckets[4 * k as usize + buckets_index2(1, 0)] = 1;
16066 partial_sorting_shift_markers_32s_6k_omp(&mut rust_sa, k, &buckets, 1);
16067 unsafe {
16068 probe_libsais16_partial_sorting_shift_markers_32s_6k_omp(
16069 c_sa.as_mut_ptr(),
16070 k,
16071 buckets.as_ptr(),
16072 1,
16073 );
16074 }
16075 assert_eq!(rust_sa, c_sa);
16076
16077 let mut rust_sa = vec![
16078 1 | SUFFIX_GROUP_MARKER,
16079 2,
16080 3 | SUFFIX_GROUP_MARKER,
16081 4 | SUFFIX_GROUP_MARKER,
16082 5,
16083 6,
16084 ];
16085 let mut c_sa = rust_sa.clone();
16086 partial_sorting_shift_markers_32s_4k(&mut rust_sa, 6);
16087 unsafe { probe_libsais16_partial_sorting_shift_markers_32s_4k(c_sa.as_mut_ptr(), 6) };
16088 assert_eq!(rust_sa, c_sa);
16089
16090 let mut rust_buckets = vec![0; 6 * k as usize];
16091 for (i, value) in rust_buckets[4 * k as usize..].iter_mut().enumerate() {
16092 *value = 100 + i as SaSint;
16093 }
16094 let mut c_buckets = rust_buckets.clone();
16095 partial_sorting_shift_buckets_32s_6k(k, &mut rust_buckets);
16096 unsafe { probe_libsais16_partial_sorting_shift_buckets_32s_6k(k, c_buckets.as_mut_ptr()) };
16097 assert_eq!(rust_buckets, c_buckets);
16098
16099 let mut rust_sa = vec![1 | SUFFIX_GROUP_MARKER, -3, 5 | SUFFIX_GROUP_MARKER, -7];
16100 let mut c_sa = rust_sa.clone();
16101 let rust_l = partial_sorting_gather_lms_suffixes_32s_4k(&mut rust_sa, 0, 4);
16102 let c_l = unsafe {
16103 probe_libsais16_partial_sorting_gather_lms_suffixes_32s_4k(c_sa.as_mut_ptr(), 0, 4)
16104 };
16105 assert_eq!(rust_l, c_l);
16106 assert_eq!(rust_sa, c_sa);
16107
16108 let mut rust_sa = vec![1, -3, 5, -7];
16109 let mut c_sa = rust_sa.clone();
16110 let rust_l = partial_sorting_gather_lms_suffixes_32s_1k(&mut rust_sa, 0, 4);
16111 let c_l = unsafe {
16112 probe_libsais16_partial_sorting_gather_lms_suffixes_32s_1k(c_sa.as_mut_ptr(), 0, 4)
16113 };
16114 assert_eq!(rust_l, c_l);
16115 assert_eq!(rust_sa, c_sa);
16116
16117 let mut rust_state = alloc_thread_state(1).unwrap();
16118 let mut rust_sa = vec![1 | SUFFIX_GROUP_MARKER, -3, 5 | SUFFIX_GROUP_MARKER, -7];
16119 let mut c_sa = rust_sa.clone();
16120 partial_sorting_gather_lms_suffixes_32s_4k_omp(&mut rust_sa, 4, 1, &mut rust_state);
16121 unsafe {
16122 probe_libsais16_partial_sorting_gather_lms_suffixes_32s_4k_omp(c_sa.as_mut_ptr(), 4, 1);
16123 }
16124 assert_eq!(rust_sa, c_sa);
16125
16126 let mut rust_state = alloc_thread_state(1).unwrap();
16127 let mut rust_sa = vec![1, -3, 5, -7];
16128 let mut c_sa = rust_sa.clone();
16129 partial_sorting_gather_lms_suffixes_32s_1k_omp(&mut rust_sa, 4, 1, &mut rust_state);
16130 unsafe {
16131 probe_libsais16_partial_sorting_gather_lms_suffixes_32s_1k_omp(c_sa.as_mut_ptr(), 4, 1);
16132 }
16133 assert_eq!(rust_sa, c_sa);
16134 }
16135
16136 #[test]
16137 fn libsais16_partial_sorting_gather_lms_suffixes_32s_omp_uses_block_partition() {
16138 let n = 65_536usize;
16139 let mut base_4k = vec![0; n];
16140 let mut base_1k = vec![0; n];
16141 for i in 0..n {
16142 let value = (i as SaSint + 1) & SAINT_MAX;
16143 base_4k[i] = if i % 7 == 0 {
16144 value | SAINT_MIN | SUFFIX_GROUP_MARKER
16145 } else if i % 11 == 0 {
16146 value | SUFFIX_GROUP_MARKER
16147 } else {
16148 value
16149 };
16150 base_1k[i] = if i % 7 == 0 { value | SAINT_MIN } else { value };
16151 }
16152 let lms_count = base_1k.iter().filter(|&&v| v < 0).count();
16153
16154 let mut scalar = base_4k.clone();
16155 let mut threaded = base_4k;
16156 let mut scalar_state = alloc_thread_state(1).unwrap();
16157 let mut threaded_state = alloc_thread_state(4).unwrap();
16158 partial_sorting_gather_lms_suffixes_32s_4k_omp(
16159 &mut scalar,
16160 n as SaSint,
16161 1,
16162 &mut scalar_state,
16163 );
16164 partial_sorting_gather_lms_suffixes_32s_4k_omp(
16165 &mut threaded,
16166 n as SaSint,
16167 4,
16168 &mut threaded_state,
16169 );
16170 assert_eq!(&threaded[..lms_count], &scalar[..lms_count]);
16171
16172 let mut scalar = base_1k.clone();
16173 let mut threaded = base_1k;
16174 partial_sorting_gather_lms_suffixes_32s_1k_omp(
16175 &mut scalar,
16176 n as SaSint,
16177 1,
16178 &mut scalar_state,
16179 );
16180 partial_sorting_gather_lms_suffixes_32s_1k_omp(
16181 &mut threaded,
16182 n as SaSint,
16183 4,
16184 &mut threaded_state,
16185 );
16186 assert_eq!(&threaded[..lms_count], &scalar[..lms_count]);
16187 }
16188
16189 #[test]
16190 fn libsais16_partial_sorting_32s_block_helpers_behave_like_upstream_shapes() {
16191 let t = vec![0, 1, 2, 1, 0];
16192 let k = 3;
16193
16194 let mut sa = vec![0, 4 | SAINT_MIN, 0];
16195 let mut cache = vec![ThreadCache::default(); sa.len()];
16196 partial_sorting_scan_right_to_left_32s_6k_block_gather(&t, &mut sa, &mut cache, 1, 1);
16197 assert_eq!(cache[1].index, 4 | SAINT_MIN);
16198 assert_eq!(cache[1].symbol, buckets_index4(1, 1) as SaSint);
16199
16200 let mut sa = vec![0, 4 | SUFFIX_GROUP_MARKER, 0];
16201 let mut cache = vec![ThreadCache::default(); sa.len()];
16202 partial_sorting_scan_right_to_left_32s_4k_block_gather(&t, &mut sa, &mut cache, 1, 1);
16203 assert_eq!(sa[1], 0);
16204 assert_eq!(cache[1].index, 4 | SUFFIX_GROUP_MARKER);
16205 assert_eq!(cache[1].symbol, buckets_index2(1, 1) as SaSint);
16206
16207 let mut sa = vec![0, 4, 0];
16208 let mut cache = vec![ThreadCache::default(); sa.len()];
16209 partial_sorting_scan_right_to_left_32s_1k_block_gather(&t, &mut sa, &mut cache, 1, 1);
16210 assert_eq!(sa[1], 0);
16211 assert_eq!(cache[1].index, 3 | SAINT_MIN);
16212 assert_eq!(cache[1].symbol, 1);
16213
16214 let mut sa = vec![4 | SAINT_MIN, 0, 0];
16215 let mut cache = vec![ThreadCache::default(); sa.len()];
16216 partial_sorting_scan_left_to_right_32s_6k_block_gather(&t, &mut sa, &mut cache, 0, 1);
16217 assert_eq!(cache[0].index, 4 | SAINT_MIN);
16218 assert_eq!(cache[0].symbol, buckets_index4(1, 1) as SaSint);
16219
16220 let mut sa = vec![4 | SUFFIX_GROUP_MARKER, 0, 0];
16221 let mut cache = vec![ThreadCache::default(); sa.len()];
16222 partial_sorting_scan_left_to_right_32s_4k_block_gather(&t, &mut sa, &mut cache, 0, 1);
16223 assert_eq!(sa[0], 0);
16224 assert_eq!(cache[0].index, 4 | SUFFIX_GROUP_MARKER);
16225 assert_eq!(cache[0].symbol, buckets_index2(1, 0) as SaSint);
16226
16227 let mut sa = vec![4, 0, 0];
16228 let mut cache = vec![ThreadCache::default(); sa.len()];
16229 partial_sorting_scan_left_to_right_32s_1k_block_gather(&t, &mut sa, &mut cache, 0, 1);
16230 assert_eq!(sa[0], 0);
16231 assert_eq!(cache[0].index, 3);
16232 assert_eq!(cache[0].symbol, 1);
16233
16234 let mut cache = vec![ThreadCache::default(); 3];
16235 cache[1].index = 4 | SAINT_MIN;
16236 cache[1].symbol = buckets_index4(1, 1) as SaSint;
16237 let mut buckets = vec![0; 4 * k];
16238 buckets[buckets_index4(1, 1)] = 2;
16239 let d = partial_sorting_scan_right_to_left_32s_6k_block_sort(
16240 &t,
16241 &mut buckets,
16242 0,
16243 &mut cache,
16244 1,
16245 1,
16246 );
16247 assert_eq!(d, 1);
16248 assert_eq!(cache[1].index, 3 | SAINT_MIN);
16249 assert_eq!(buckets[buckets_index4(1, 1)], 1);
16250 assert_eq!(buckets[buckets_index4(1, 1) + 2], 1);
16251
16252 let mut cache = vec![ThreadCache::default(); 3];
16253 cache[0].index = 4 | SAINT_MIN;
16254 cache[0].symbol = buckets_index4(1, 1) as SaSint;
16255 let mut buckets = vec![0; 4 * k];
16256 buckets[buckets_index4(1, 1)] = 1;
16257 let d = partial_sorting_scan_left_to_right_32s_6k_block_sort(
16258 &t,
16259 &mut buckets,
16260 0,
16261 &mut cache,
16262 0,
16263 1,
16264 );
16265 assert_eq!(d, 1);
16266 assert_eq!(cache[0].index, 3 | SAINT_MIN);
16267 assert_eq!(buckets[buckets_index4(1, 1)], 2);
16268 assert_eq!(buckets[buckets_index4(1, 1) + 2], 1);
16269
16270 let mut cache = vec![ThreadCache::default(); 3];
16271 cache[1].index = 4 | SUFFIX_GROUP_MARKER;
16272 cache[1].symbol = buckets_index2(1, 1) as SaSint;
16273 let mut buckets = vec![0; 4 * k];
16274 buckets[3 * k + 1] = 2;
16275 let d = partial_sorting_scan_right_to_left_32s_4k_block_sort(
16276 &t,
16277 k as SaSint,
16278 &mut buckets,
16279 0,
16280 &mut cache,
16281 1,
16282 1,
16283 );
16284 assert_eq!(d, 1);
16285 assert_eq!(cache[1].symbol, 1);
16286 assert_eq!(buckets[3 * k + 1], 1);
16287
16288 let mut cache = vec![ThreadCache::default(); 3];
16289 cache[0].index = 4 | SUFFIX_GROUP_MARKER;
16290 cache[0].symbol = buckets_index2(1, 0) as SaSint;
16291 let mut buckets = vec![0; 4 * k];
16292 buckets[2 * k + 1] = 1;
16293 let d = partial_sorting_scan_left_to_right_32s_4k_block_sort(
16294 &t,
16295 k as SaSint,
16296 &mut buckets,
16297 0,
16298 &mut cache,
16299 0,
16300 1,
16301 );
16302 assert_eq!(d, 1);
16303 assert_eq!(cache[0].symbol, 1);
16304 assert_eq!(buckets[2 * k + 1], 2);
16305
16306 let mut cache = vec![ThreadCache::default(); 3];
16307 cache[1].index = 4;
16308 cache[1].symbol = 1;
16309 let mut buckets = vec![0; k];
16310 buckets[1] = 2;
16311 partial_sorting_scan_right_to_left_32s_1k_block_sort(&t, &mut buckets, &mut cache, 1, 1);
16312 assert_eq!(cache[1].symbol, 1);
16313 assert_eq!(buckets[1], 1);
16314
16315 let mut cache = vec![ThreadCache::default(); 3];
16316 cache[0].index = 4;
16317 cache[0].symbol = 1;
16318 let mut buckets = vec![0; k];
16319 buckets[1] = 1;
16320 partial_sorting_scan_left_to_right_32s_1k_block_sort(&t, &mut buckets, &mut cache, 0, 1);
16321 assert_eq!(cache[0].symbol, 1);
16322 assert_eq!(buckets[1], 2);
16323 }
16324
16325 #[test]
16326 fn libsais16_partial_sorting_scan_32s_match_c() {
16327 let t = vec![0, 1, 2, 1, 3, 0];
16328 let k = 4;
16329
16330 let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 9, 9];
16331 let mut c_sa = rust_sa.clone();
16332 let mut rust_buckets = vec![0; 6 * k as usize];
16333 rust_buckets[buckets_index4(2, 0)] = 4;
16334 rust_buckets[buckets_index4(1, 1)] = 5;
16335 let mut c_buckets = rust_buckets.clone();
16336 let rust_d =
16337 partial_sorting_scan_left_to_right_32s_6k(&t, &mut rust_sa, &mut rust_buckets, 0, 0, 2);
16338 let c_d = unsafe {
16339 probe_libsais16_partial_sorting_scan_left_to_right_32s_6k(
16340 t.as_ptr(),
16341 c_sa.as_mut_ptr(),
16342 c_buckets.as_mut_ptr(),
16343 0,
16344 0,
16345 2,
16346 )
16347 };
16348 assert_eq!(rust_d, c_d);
16349 assert_eq!(rust_sa, c_sa);
16350 assert_eq!(rust_buckets, c_buckets);
16351
16352 let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 9, 9];
16353 let mut c_sa = rust_sa.clone();
16354 let mut rust_buckets = vec![0; 4 * k as usize];
16355 rust_buckets[2 * k as usize + 2] = 4;
16356 rust_buckets[2 * k as usize + 1] = 5;
16357 let mut c_buckets = rust_buckets.clone();
16358 let rust_d = partial_sorting_scan_left_to_right_32s_4k(
16359 &t,
16360 &mut rust_sa,
16361 k,
16362 &mut rust_buckets,
16363 0,
16364 0,
16365 2,
16366 );
16367 let c_d = unsafe {
16368 probe_libsais16_partial_sorting_scan_left_to_right_32s_4k(
16369 t.as_ptr(),
16370 c_sa.as_mut_ptr(),
16371 k,
16372 c_buckets.as_mut_ptr(),
16373 0,
16374 0,
16375 2,
16376 )
16377 };
16378 assert_eq!(rust_d, c_d);
16379 assert_eq!(rust_sa, c_sa);
16380 assert_eq!(rust_buckets, c_buckets);
16381
16382 let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 9, 9];
16383 let mut c_sa = rust_sa.clone();
16384 let mut rust_buckets = vec![0, 5, 4, 0];
16385 let mut c_buckets = rust_buckets.clone();
16386 partial_sorting_scan_left_to_right_32s_1k(&t, &mut rust_sa, &mut rust_buckets, 0, 2);
16387 unsafe {
16388 probe_libsais16_partial_sorting_scan_left_to_right_32s_1k(
16389 t.as_ptr(),
16390 c_sa.as_mut_ptr(),
16391 c_buckets.as_mut_ptr(),
16392 0,
16393 2,
16394 );
16395 }
16396 assert_eq!(rust_sa, c_sa);
16397 assert_eq!(rust_buckets, c_buckets);
16398
16399 let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 9, 9];
16400 let mut c_sa = rust_sa.clone();
16401 let mut rust_buckets = vec![0; 6 * k as usize];
16402 rust_buckets[buckets_index4(2, 0)] = 7;
16403 rust_buckets[buckets_index4(1, 1)] = 6;
16404 let mut c_buckets = rust_buckets.clone();
16405 let rust_d =
16406 partial_sorting_scan_right_to_left_32s_6k(&t, &mut rust_sa, &mut rust_buckets, 0, 0, 2);
16407 let c_d = unsafe {
16408 probe_libsais16_partial_sorting_scan_right_to_left_32s_6k(
16409 t.as_ptr(),
16410 c_sa.as_mut_ptr(),
16411 c_buckets.as_mut_ptr(),
16412 0,
16413 0,
16414 2,
16415 )
16416 };
16417 assert_eq!(rust_d, c_d);
16418 assert_eq!(rust_sa, c_sa);
16419 assert_eq!(rust_buckets, c_buckets);
16420
16421 let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 9, 9];
16422 let mut c_sa = rust_sa.clone();
16423 let mut rust_buckets = vec![0; 4 * k as usize];
16424 rust_buckets[3 * k as usize + 2] = 7;
16425 rust_buckets[3 * k as usize + 1] = 6;
16426 let mut c_buckets = rust_buckets.clone();
16427 let rust_d = partial_sorting_scan_right_to_left_32s_4k(
16428 &t,
16429 &mut rust_sa,
16430 k,
16431 &mut rust_buckets,
16432 0,
16433 0,
16434 2,
16435 );
16436 let c_d = unsafe {
16437 probe_libsais16_partial_sorting_scan_right_to_left_32s_4k(
16438 t.as_ptr(),
16439 c_sa.as_mut_ptr(),
16440 k,
16441 c_buckets.as_mut_ptr(),
16442 0,
16443 0,
16444 2,
16445 )
16446 };
16447 assert_eq!(rust_d, c_d);
16448 assert_eq!(rust_sa, c_sa);
16449 assert_eq!(rust_buckets, c_buckets);
16450
16451 let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 9, 9];
16452 let mut c_sa = rust_sa.clone();
16453 let mut rust_buckets = vec![0, 6, 7, 0];
16454 let mut c_buckets = rust_buckets.clone();
16455 partial_sorting_scan_right_to_left_32s_1k(&t, &mut rust_sa, &mut rust_buckets, 0, 2);
16456 unsafe {
16457 probe_libsais16_partial_sorting_scan_right_to_left_32s_1k(
16458 t.as_ptr(),
16459 c_sa.as_mut_ptr(),
16460 c_buckets.as_mut_ptr(),
16461 0,
16462 2,
16463 );
16464 }
16465 assert_eq!(rust_sa, c_sa);
16466 assert_eq!(rust_buckets, c_buckets);
16467
16468 let mut state = alloc_thread_state(1).unwrap();
16469 let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 7, 9];
16470 let mut c_sa = rust_sa.clone();
16471 let mut rust_buckets = vec![0; 6 * k as usize];
16472 rust_buckets[buckets_index4(2, 0)] = 4;
16473 rust_buckets[buckets_index4(1, 1)] = 5;
16474 rust_buckets[buckets_index4(3, 0)] = 6;
16475 let mut c_buckets = rust_buckets.clone();
16476 let rust_d = partial_sorting_scan_left_to_right_32s_6k_omp(
16477 &t,
16478 &mut rust_sa,
16479 5,
16480 &mut rust_buckets,
16481 2,
16482 0,
16483 1,
16484 &mut state,
16485 );
16486 let c_d = unsafe {
16487 probe_libsais16_partial_sorting_scan_left_to_right_32s_6k_omp(
16488 t.as_ptr(),
16489 c_sa.as_mut_ptr(),
16490 5,
16491 c_buckets.as_mut_ptr(),
16492 2,
16493 0,
16494 1,
16495 )
16496 };
16497 assert_eq!(rust_d, c_d);
16498 assert_eq!(rust_sa, c_sa);
16499 assert_eq!(rust_buckets, c_buckets);
16500
16501 let mut state = alloc_thread_state(1).unwrap();
16502 let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 7, 9];
16503 let mut c_sa = rust_sa.clone();
16504 let mut rust_buckets = vec![0; 4 * k as usize];
16505 rust_buckets[2 * k as usize + 2] = 4;
16506 rust_buckets[2 * k as usize + 1] = 5;
16507 rust_buckets[2 * k as usize + 3] = 6;
16508 let mut c_buckets = rust_buckets.clone();
16509 let rust_d = partial_sorting_scan_left_to_right_32s_4k_omp(
16510 &t,
16511 &mut rust_sa,
16512 5,
16513 k,
16514 &mut rust_buckets,
16515 0,
16516 1,
16517 &mut state,
16518 );
16519 let c_d = unsafe {
16520 probe_libsais16_partial_sorting_scan_left_to_right_32s_4k_omp(
16521 t.as_ptr(),
16522 c_sa.as_mut_ptr(),
16523 5,
16524 k,
16525 c_buckets.as_mut_ptr(),
16526 0,
16527 1,
16528 )
16529 };
16530 assert_eq!(rust_d, c_d);
16531 assert_eq!(rust_sa, c_sa);
16532 assert_eq!(rust_buckets, c_buckets);
16533
16534 let mut state = alloc_thread_state(1).unwrap();
16535 let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 7, 9];
16536 let mut c_sa = rust_sa.clone();
16537 let mut rust_buckets = vec![0, 5, 4, 6];
16538 let mut c_buckets = rust_buckets.clone();
16539 partial_sorting_scan_left_to_right_32s_1k_omp(
16540 &t,
16541 &mut rust_sa,
16542 5,
16543 &mut rust_buckets,
16544 1,
16545 &mut state,
16546 );
16547 unsafe {
16548 probe_libsais16_partial_sorting_scan_left_to_right_32s_1k_omp(
16549 t.as_ptr(),
16550 c_sa.as_mut_ptr(),
16551 5,
16552 c_buckets.as_mut_ptr(),
16553 1,
16554 );
16555 }
16556 assert_eq!(rust_sa, c_sa);
16557 assert_eq!(rust_buckets, c_buckets);
16558
16559 let mut state = alloc_thread_state(1).unwrap();
16560 let mut rust_sa = vec![0, 0, 3, 4, 9, 9, 9, 9];
16561 let mut c_sa = rust_sa.clone();
16562 let mut rust_buckets = vec![0; 6 * k as usize];
16563 rust_buckets[buckets_index4(2, 0)] = 7;
16564 rust_buckets[buckets_index4(1, 1)] = 6;
16565 let mut c_buckets = rust_buckets.clone();
16566 let rust_d = partial_sorting_scan_right_to_left_32s_6k_omp(
16567 &t,
16568 &mut rust_sa,
16569 5,
16570 &mut rust_buckets,
16571 1,
16572 1,
16573 0,
16574 1,
16575 &mut state,
16576 );
16577 let c_d = unsafe {
16578 probe_libsais16_partial_sorting_scan_right_to_left_32s_6k_omp(
16579 t.as_ptr(),
16580 c_sa.as_mut_ptr(),
16581 5,
16582 c_buckets.as_mut_ptr(),
16583 1,
16584 1,
16585 0,
16586 1,
16587 )
16588 };
16589 assert_eq!(rust_d, c_d);
16590 assert_eq!(rust_sa, c_sa);
16591 assert_eq!(rust_buckets, c_buckets);
16592
16593 let mut state = alloc_thread_state(1).unwrap();
16594 let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 9, 9];
16595 let mut c_sa = rust_sa.clone();
16596 let mut rust_buckets = vec![0; 4 * k as usize];
16597 rust_buckets[3 * k as usize + 2] = 7;
16598 rust_buckets[3 * k as usize + 1] = 6;
16599 let mut c_buckets = rust_buckets.clone();
16600 let rust_d = partial_sorting_scan_right_to_left_32s_4k_omp(
16601 &t,
16602 &mut rust_sa,
16603 2,
16604 k,
16605 &mut rust_buckets,
16606 0,
16607 1,
16608 &mut state,
16609 );
16610 let c_d = unsafe {
16611 probe_libsais16_partial_sorting_scan_right_to_left_32s_4k_omp(
16612 t.as_ptr(),
16613 c_sa.as_mut_ptr(),
16614 2,
16615 k,
16616 c_buckets.as_mut_ptr(),
16617 0,
16618 1,
16619 )
16620 };
16621 assert_eq!(rust_d, c_d);
16622 assert_eq!(rust_sa, c_sa);
16623 assert_eq!(rust_buckets, c_buckets);
16624
16625 let mut state = alloc_thread_state(1).unwrap();
16626 let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 9, 9];
16627 let mut c_sa = rust_sa.clone();
16628 let mut rust_buckets = vec![0, 6, 7, 0];
16629 let mut c_buckets = rust_buckets.clone();
16630 partial_sorting_scan_right_to_left_32s_1k_omp(
16631 &t,
16632 &mut rust_sa,
16633 2,
16634 &mut rust_buckets,
16635 1,
16636 &mut state,
16637 );
16638 unsafe {
16639 probe_libsais16_partial_sorting_scan_right_to_left_32s_1k_omp(
16640 t.as_ptr(),
16641 c_sa.as_mut_ptr(),
16642 2,
16643 c_buckets.as_mut_ptr(),
16644 1,
16645 );
16646 }
16647 assert_eq!(rust_sa, c_sa);
16648 assert_eq!(rust_buckets, c_buckets);
16649 }
16650
16651 #[test]
16652 fn libsais16_place_lms_suffixes_histogram_32s_match_c() {
16653 let n = 12;
16654 let k = 4;
16655 let m = 4;
16656 let mut rust_sa = vec![101, 102, 103, 104, 9, 9, 9, 9, 9, 9, 9, 9];
16657 let mut c_sa = rust_sa.clone();
16658 let mut buckets = vec![0; 2 * k as usize];
16659 buckets[buckets_index2(1, 0)] = 7;
16660 buckets[buckets_index2(1, 1)] = 2;
16661 buckets[buckets_index2(2, 0)] = 10;
16662 buckets[buckets_index2(2, 1)] = 1;
16663 place_lms_suffixes_histogram_32s_2k(&mut rust_sa, n, k, m, &buckets);
16664 unsafe {
16665 probe_libsais16_place_lms_suffixes_histogram_32s_2k(
16666 c_sa.as_mut_ptr(),
16667 n,
16668 k,
16669 m,
16670 buckets.as_ptr(),
16671 );
16672 }
16673 assert_eq!(rust_sa, c_sa);
16674
16675 let mut rust_sa = vec![101, 102, 103, 104, 9, 9, 9, 9, 9, 9, 9, 9];
16676 let mut c_sa = rust_sa.clone();
16677 let mut buckets = vec![0; 4 * k as usize];
16678 buckets[buckets_index2(1, 1)] = 2;
16679 buckets[buckets_index2(2, 1)] = 1;
16680 buckets[3 * k as usize + 1] = 7;
16681 buckets[3 * k as usize + 2] = 10;
16682 place_lms_suffixes_histogram_32s_4k(&mut rust_sa, n, k, m, &buckets);
16683 unsafe {
16684 probe_libsais16_place_lms_suffixes_histogram_32s_4k(
16685 c_sa.as_mut_ptr(),
16686 n,
16687 k,
16688 m,
16689 buckets.as_ptr(),
16690 );
16691 }
16692 assert_eq!(rust_sa, c_sa);
16693
16694 let mut rust_sa = vec![101, 102, 103, 104, 9, 9, 9, 9, 9, 9, 9, 9];
16695 let mut c_sa = rust_sa.clone();
16696 let mut buckets = vec![0; 6 * k as usize];
16697 buckets[buckets_index4(1, 1)] = 2;
16698 buckets[buckets_index4(2, 1)] = 1;
16699 buckets[5 * k as usize + 1] = 7;
16700 buckets[5 * k as usize + 2] = 10;
16701 place_lms_suffixes_histogram_32s_6k(&mut rust_sa, n, k, m, &buckets);
16702 unsafe {
16703 probe_libsais16_place_lms_suffixes_histogram_32s_6k(
16704 c_sa.as_mut_ptr(),
16705 n,
16706 k,
16707 m,
16708 buckets.as_ptr(),
16709 );
16710 }
16711 assert_eq!(rust_sa, c_sa);
16712 }
16713
16714 #[test]
16715 fn libsais16_count_gather_lms_suffixes_32s_match_c() {
16716 let t = vec![2, 1, 3, 1, 2, 0, 1, 0];
16717 let n = t.len() as SaSint;
16718 let k = 4;
16719
16720 let mut rust_sa = vec![0; t.len()];
16721 let mut c_sa = rust_sa.clone();
16722 let rust_m = gather_lms_suffixes_32s(&t, &mut rust_sa, n);
16723 let c_m =
16724 unsafe { probe_libsais16_gather_lms_suffixes_32s(t.as_ptr(), c_sa.as_mut_ptr(), n) };
16725 assert_eq!(rust_m, c_m);
16726 assert_eq!(rust_sa, c_sa);
16727
16728 let compact_t = vec![2, SAINT_MIN | 1, 3, 1, SAINT_MIN | 2, 0, 1, 0];
16729 let mut rust_sa = vec![0; compact_t.len()];
16730 let mut c_sa = rust_sa.clone();
16731 let rust_m = gather_compacted_lms_suffixes_32s(&compact_t, &mut rust_sa, n);
16732 let c_m = unsafe {
16733 probe_libsais16_gather_compacted_lms_suffixes_32s(
16734 compact_t.as_ptr(),
16735 c_sa.as_mut_ptr(),
16736 n,
16737 )
16738 };
16739 assert_eq!(rust_m, c_m);
16740 assert_eq!(rust_sa, c_sa);
16741
16742 let mut rust_buckets = vec![99; 2 * k as usize];
16743 let mut c_buckets = rust_buckets.clone();
16744 count_lms_suffixes_32s_2k(&t, n, k, &mut rust_buckets);
16745 unsafe {
16746 probe_libsais16_count_lms_suffixes_32s_2k(t.as_ptr(), n, k, c_buckets.as_mut_ptr());
16747 }
16748 assert_eq!(rust_buckets, c_buckets);
16749
16750 let mut rust_sa = vec![0; t.len()];
16751 let mut c_sa = rust_sa.clone();
16752 let mut rust_buckets = vec![0; 2 * k as usize];
16753 let mut c_buckets = rust_buckets.clone();
16754 let rust_m = count_and_gather_lms_suffixes_32s_2k(
16755 &t,
16756 &mut rust_sa,
16757 n,
16758 k,
16759 &mut rust_buckets,
16760 0,
16761 n as isize,
16762 );
16763 let c_m = unsafe {
16764 probe_libsais16_count_and_gather_lms_suffixes_32s_2k(
16765 t.as_ptr(),
16766 c_sa.as_mut_ptr(),
16767 n,
16768 k,
16769 c_buckets.as_mut_ptr(),
16770 0,
16771 n,
16772 )
16773 };
16774 assert_eq!(rust_m, c_m);
16775 assert_eq!(rust_sa, c_sa);
16776 assert_eq!(rust_buckets, c_buckets);
16777
16778 let mut rust_sa = vec![0; compact_t.len()];
16779 let mut c_sa = rust_sa.clone();
16780 let mut rust_buckets = vec![0; 2 * k as usize];
16781 let mut c_buckets = rust_buckets.clone();
16782 let rust_m = count_and_gather_compacted_lms_suffixes_32s_2k(
16783 &compact_t,
16784 &mut rust_sa,
16785 n,
16786 k,
16787 &mut rust_buckets,
16788 0,
16789 n as isize,
16790 );
16791 let c_m = unsafe {
16792 probe_libsais16_count_and_gather_compacted_lms_suffixes_32s_2k(
16793 compact_t.as_ptr(),
16794 c_sa.as_mut_ptr(),
16795 n,
16796 k,
16797 c_buckets.as_mut_ptr(),
16798 0,
16799 n,
16800 )
16801 };
16802 assert_eq!(rust_m, c_m);
16803 assert_eq!(rust_sa, c_sa);
16804 assert_eq!(rust_buckets, c_buckets);
16805 }
16806
16807 #[test]
16808 fn libsais16_small_openmp_leaf_helpers_match_upstream_shapes() {
16809 let sa = [-1, 0, 3, SAINT_MIN, 0, 7, -5];
16810 assert_eq!(count_negative_marked_suffixes(&sa, 1, 5), 1);
16811 assert_eq!(count_zero_marked_suffixes(&sa, 1, 5), 2);
16812
16813 let mut buckets = vec![1, 2, 3, 0, 4, 5, 6, 0, 7, 8, 9, 0, 10, 11, 12, 0];
16814 accumulate_counts_s32_4(&mut buckets, 12, 3, 4);
16815 assert_eq!(&buckets[12..15], &[22, 26, 30]);
16816
16817 let mut many = Vec::new();
16818 for bucket in 0..10 {
16819 many.extend([bucket, bucket + 1, bucket + 2, 0]);
16820 }
16821 accumulate_counts_s32(&mut many, 36, 3, 4, 10);
16822 assert_eq!(&many[36..39], &[45, 55, 65]);
16823
16824 let t = [1, SAINT_MIN | 2, 0];
16825 let mut compacted_buckets = vec![0; 6];
16826 count_compacted_lms_suffixes_32s_2k(&t, t.len() as SaSint, 3, &mut compacted_buckets);
16827 assert_eq!(compacted_buckets, vec![1, 0, 1, 0, 0, 1]);
16828
16829 let unique_sa = [0, 2, 4, 6, 0, -10, 20, -30];
16830 assert_eq!(count_unique_suffixes(&unique_sa, 4, 0, 4), 2);
16831
16832 assert_eq!(get_bucket_stride(20_000, 1000, 4), 1024);
16833 assert_eq!(get_bucket_stride(3024, 1001, 4), 1008);
16834 assert_eq!(get_bucket_stride(3000, 1001, 4), 1001);
16835 }
16836
16837 #[test]
16838 fn libsais16_count_gather_lms_suffixes_32s_omp_wrappers_match_c() {
16839 let t = vec![2, 1, 3, 1, 2, 0, 1, 0];
16840 let n = t.len() as SaSint;
16841 let k = 4;
16842 let mut rust_sa = vec![0; t.len()];
16843 let mut c_sa = rust_sa.clone();
16844 let mut rust_buckets = vec![0; 2 * k as usize];
16845 let mut c_buckets = rust_buckets.clone();
16846 let mut rust_state = alloc_thread_state(1).unwrap();
16847 let rust_m = count_and_gather_lms_suffixes_32s_2k_omp(
16848 &t,
16849 &mut rust_sa,
16850 n,
16851 k,
16852 &mut rust_buckets,
16853 0,
16854 1,
16855 &mut rust_state,
16856 );
16857 let c_m = unsafe {
16858 probe_libsais16_count_and_gather_lms_suffixes_32s_2k_omp(
16859 t.as_ptr(),
16860 c_sa.as_mut_ptr(),
16861 n,
16862 k,
16863 c_buckets.as_mut_ptr(),
16864 0,
16865 1,
16866 )
16867 };
16868 assert_eq!(rust_m, c_m);
16869 assert_eq!(rust_sa, c_sa);
16870 assert_eq!(rust_buckets, c_buckets);
16871
16872 let compact_t = vec![2, SAINT_MIN | 1, 3, 1, SAINT_MIN | 2, 0, 1, 0];
16873 let mut rust_sa = vec![0; compact_t.len()];
16874 let mut c_sa = rust_sa.clone();
16875 let mut rust_buckets = vec![0; 2 * k as usize];
16876 let mut c_buckets = rust_buckets.clone();
16877 let mut rust_state = alloc_thread_state(1).unwrap();
16878 count_and_gather_compacted_lms_suffixes_32s_2k_omp(
16879 &compact_t,
16880 &mut rust_sa,
16881 n,
16882 k,
16883 &mut rust_buckets,
16884 0,
16885 1,
16886 &mut rust_state,
16887 );
16888 unsafe {
16889 probe_libsais16_count_and_gather_compacted_lms_suffixes_32s_2k_omp(
16890 compact_t.as_ptr(),
16891 c_sa.as_mut_ptr(),
16892 n,
16893 k,
16894 c_buckets.as_mut_ptr(),
16895 0,
16896 1,
16897 );
16898 }
16899 assert_eq!(rust_sa, c_sa);
16900 assert_eq!(rust_buckets, c_buckets);
16901 }
16902
16903 #[test]
16904 fn libsais16_count_gather_lms_suffixes_32s_4k_match_c() {
16905 let t = vec![2, 1, 3, 1, 2, 0, 1, 0];
16906 let n = t.len() as SaSint;
16907 let k = 4;
16908
16909 let mut rust_buckets = vec![77; 4 * k as usize];
16910 let mut c_buckets = vec![0; 4 * k as usize];
16911 let mut c_sa_for_count = vec![0; t.len()];
16912 count_lms_suffixes_32s_4k(&t, n, k, &mut rust_buckets);
16913 unsafe {
16914 probe_libsais16_count_and_gather_lms_suffixes_32s_4k(
16915 t.as_ptr(),
16916 c_sa_for_count.as_mut_ptr(),
16917 n,
16918 k,
16919 c_buckets.as_mut_ptr(),
16920 0,
16921 n,
16922 );
16923 }
16924 assert_eq!(rust_buckets, c_buckets);
16925
16926 let mut rust_sa = vec![0; t.len()];
16927 let mut c_sa = rust_sa.clone();
16928 let mut rust_buckets = vec![0; 4 * k as usize];
16929 let mut c_buckets = rust_buckets.clone();
16930 let rust_m = count_and_gather_lms_suffixes_32s_4k(
16931 &t,
16932 &mut rust_sa,
16933 n,
16934 k,
16935 &mut rust_buckets,
16936 0,
16937 n as isize,
16938 );
16939 let c_m = unsafe {
16940 probe_libsais16_count_and_gather_lms_suffixes_32s_4k(
16941 t.as_ptr(),
16942 c_sa.as_mut_ptr(),
16943 n,
16944 k,
16945 c_buckets.as_mut_ptr(),
16946 0,
16947 n,
16948 )
16949 };
16950 assert_eq!(rust_m, c_m);
16951 assert_eq!(rust_sa, c_sa);
16952 assert_eq!(rust_buckets, c_buckets);
16953
16954 let mut rust_sa = vec![0; t.len()];
16955 let mut c_sa = rust_sa.clone();
16956 let mut rust_buckets = vec![0; 4 * k as usize];
16957 let mut c_buckets = rust_buckets.clone();
16958 let mut rust_state = alloc_thread_state(1).unwrap();
16959 let rust_m = count_and_gather_lms_suffixes_32s_4k_omp(
16960 &t,
16961 &mut rust_sa,
16962 n,
16963 k,
16964 &mut rust_buckets,
16965 0,
16966 1,
16967 &mut rust_state,
16968 );
16969 let c_m = unsafe {
16970 probe_libsais16_count_and_gather_lms_suffixes_32s_4k_omp(
16971 t.as_ptr(),
16972 c_sa.as_mut_ptr(),
16973 n,
16974 k,
16975 c_buckets.as_mut_ptr(),
16976 0,
16977 1,
16978 )
16979 };
16980 assert_eq!(rust_m, c_m);
16981 assert_eq!(rust_sa, c_sa);
16982 assert_eq!(rust_buckets, c_buckets);
16983
16984 let mut rust_buckets = vec![91; k as usize];
16985 let mut c_buckets = rust_buckets.clone();
16986 count_suffixes_32s(&t, n, k, &mut rust_buckets);
16987 unsafe {
16988 probe_libsais16_count_suffixes_32s(t.as_ptr(), n, k, c_buckets.as_mut_ptr());
16989 }
16990 assert_eq!(rust_buckets, c_buckets);
16991 }
16992
16993 #[test]
16994 fn libsais16_initialize_buckets_32s_match_c() {
16995 let k = 4;
16996
16997 let base_6k = vec![
16998 1, 2, 0, 1, 0, 1, 2, 0, 3, 0, 1, 1, 2, 1, 0, 0, 9, 9, 9, 9, 8, 8, 8, 8,
16999 ];
17000 let mut rust = base_6k.clone();
17001 let mut c = base_6k.clone();
17002 initialize_buckets_start_and_end_32s_6k(k, &mut rust);
17003 unsafe { probe_libsais16_initialize_buckets_start_and_end_32s_6k(k, c.as_mut_ptr()) };
17004 assert_eq!(rust, c);
17005
17006 let base_4k = vec![1, 2, 0, 1, 3, 0, 2, 1, 9, 9, 9, 9, 8, 8, 8, 8];
17007 let mut rust = base_4k.clone();
17008 let mut c = base_4k.clone();
17009 initialize_buckets_start_and_end_32s_4k(k, &mut rust);
17010 unsafe { probe_libsais16_initialize_buckets_start_and_end_32s_4k(k, c.as_mut_ptr()) };
17011 assert_eq!(rust, c);
17012
17013 let base_2k = vec![1, 2, 0, 1, 3, 0, 2, 1];
17014 let mut rust = base_2k.clone();
17015 let mut c = base_2k.clone();
17016 initialize_buckets_end_32s_2k(k, &mut rust);
17017 unsafe { probe_libsais16_initialize_buckets_end_32s_2k(k, c.as_mut_ptr()) };
17018 assert_eq!(rust, c);
17019
17020 let mut rust = base_2k.clone();
17021 let mut c = base_2k.clone();
17022 initialize_buckets_start_and_end_32s_2k(k, &mut rust);
17023 unsafe { probe_libsais16_initialize_buckets_start_and_end_32s_2k(k, c.as_mut_ptr()) };
17024 assert_eq!(rust, c);
17025
17026 let base_1k = vec![2, 1, 3, 2];
17027 let mut rust = base_1k.clone();
17028 let mut c = base_1k.clone();
17029 initialize_buckets_start_32s_1k(k, &mut rust);
17030 unsafe { probe_libsais16_initialize_buckets_start_32s_1k(k, c.as_mut_ptr()) };
17031 assert_eq!(rust, c);
17032
17033 let mut rust = base_1k.clone();
17034 let mut c = base_1k.clone();
17035 initialize_buckets_end_32s_1k(k, &mut rust);
17036 unsafe { probe_libsais16_initialize_buckets_end_32s_1k(k, c.as_mut_ptr()) };
17037 assert_eq!(rust, c);
17038
17039 let t = vec![2, 1, 3, 1, 2, 0, 1, 0];
17040 let mut rust = vec![1, 2, 0, 1, 3, 0, 2, 1];
17041 let mut c = rust.clone();
17042 initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(&t, k, &mut rust, 4);
17043 unsafe {
17044 probe_libsais16_initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(
17045 t.as_ptr(),
17046 k,
17047 c.as_mut_ptr(),
17048 4,
17049 );
17050 }
17051 assert_eq!(rust, c);
17052
17053 let mut rust = vec![
17054 1, 2, 0, 1, 3, 0, 2, 1, 1, 0, 2, 0, 0, 1, 1, 0, 9, 9, 9, 9, 8, 8, 8, 8,
17055 ];
17056 let mut c = rust.clone();
17057 let rust_sum = initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(&t, k, &mut rust, 4);
17058 let c_sum = unsafe {
17059 probe_libsais16_initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(
17060 t.as_ptr(),
17061 k,
17062 c.as_mut_ptr(),
17063 4,
17064 )
17065 };
17066 assert_eq!(rust_sum, c_sum);
17067 assert_eq!(rust, c);
17068
17069 let mut rust = base_4k.clone();
17070 let mut c = base_4k;
17071 initialize_buckets_for_radix_and_partial_sorting_32s_4k(&t, k, &mut rust, 4);
17072 unsafe {
17073 probe_libsais16_initialize_buckets_for_radix_and_partial_sorting_32s_4k(
17074 t.as_ptr(),
17075 k,
17076 c.as_mut_ptr(),
17077 4,
17078 );
17079 }
17080 assert_eq!(rust, c);
17081 }
17082
17083 #[test]
17084 fn libsais16_place_lms_suffixes_interval_32s_match_c() {
17085 let n = 12;
17086 let k = 4;
17087 let m = 4;
17088
17089 let mut rust_sa = vec![101, 102, 103, 104, 9, 9, 9, 9, 9, 9, 9, 9];
17090 let mut c_sa = rust_sa.clone();
17091 let mut buckets = vec![0; 4 * k as usize];
17092 buckets[buckets_index2(0, 1)] = 2;
17093 buckets[buckets_index2(1, 1)] = 2;
17094 buckets[buckets_index2(2, 1)] = 3;
17095 buckets[buckets_index2(2, 1) + buckets_index2(1, 0)] = 4;
17096 buckets[3 * k as usize + 1] = 7;
17097 buckets[3 * k as usize + 2] = 10;
17098 place_lms_suffixes_interval_32s_4k(&mut rust_sa, n, k, m, &buckets);
17099 unsafe {
17100 probe_libsais16_place_lms_suffixes_interval_32s_4k(
17101 c_sa.as_mut_ptr(),
17102 n,
17103 k,
17104 m,
17105 buckets.as_ptr(),
17106 );
17107 }
17108 assert_eq!(rust_sa, c_sa);
17109
17110 let mut rust_sa = vec![101, 102, 103, 104, 9, 9, 9, 9, 9, 9, 9, 9];
17111 let mut c_sa = rust_sa.clone();
17112 let mut buckets = vec![0; 2 * k as usize];
17113 buckets[buckets_index2(1, 0)] = 7;
17114 buckets[buckets_index2(0, 1)] = 1;
17115 buckets[buckets_index2(1, 1)] = 1;
17116 buckets[buckets_index2(2, 0)] = 10;
17117 buckets[buckets_index2(2, 1)] = 2;
17118 buckets[buckets_index2(3, 1)] = 3;
17119 place_lms_suffixes_interval_32s_2k(&mut rust_sa, n, k, m, &buckets);
17120 unsafe {
17121 probe_libsais16_place_lms_suffixes_interval_32s_2k(
17122 c_sa.as_mut_ptr(),
17123 n,
17124 k,
17125 m,
17126 buckets.as_ptr(),
17127 );
17128 }
17129 assert_eq!(rust_sa, c_sa);
17130
17131 let t = vec![0, 1, 2, 1, 2, 3, 1, 3, 0, 0, 0, 0];
17132 let mut rust_sa = vec![1, 3, 4, 7, 9, 9, 9, 9, 9, 9, 9, 9];
17133 let mut c_sa = rust_sa.clone();
17134 let rust_buckets = vec![0, 3, 6, 10];
17135 let mut c_buckets = rust_buckets.clone();
17136 place_lms_suffixes_interval_32s_1k(&t, &mut rust_sa, k, m, &rust_buckets);
17137 unsafe {
17138 probe_libsais16_place_lms_suffixes_interval_32s_1k(
17139 t.as_ptr(),
17140 c_sa.as_mut_ptr(),
17141 k,
17142 m,
17143 c_buckets.as_mut_ptr(),
17144 );
17145 }
17146 assert_eq!(rust_sa, c_sa);
17147 assert_eq!(rust_buckets, c_buckets);
17148 }
17149
17150 #[test]
17151 fn libsais16_renumber_and_mark_distinct_lms_suffixes_32s_1k_matches_c() {
17152 let rust_t = vec![2, 1, 3, 1, 2, 0, 1, 0];
17153 let n = rust_t.len() as SaSint;
17154 let mut probe_sa = vec![0; rust_t.len()];
17155 let m = gather_lms_suffixes_32s(&rust_t, &mut probe_sa, n);
17156 let mut rust_sa = vec![0; rust_t.len()];
17157 let mut c_t = rust_t.clone();
17158 let mut c_sa = rust_sa.clone();
17159
17160 let rust_name =
17161 renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(&rust_t, &mut rust_sa, n, m, 1);
17162 let c_name = unsafe {
17163 probe_libsais16_renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(
17164 c_t.as_mut_ptr(),
17165 c_sa.as_mut_ptr(),
17166 n,
17167 m,
17168 1,
17169 )
17170 };
17171 assert_eq!(rust_name, c_name);
17172 assert_eq!(rust_t, c_t);
17173 assert_eq!(rust_sa, c_sa);
17174 }
17175
17176 #[test]
17177 fn libsais16_reconstruct_compacted_lms_suffixes_32s_match_c() {
17178 let n = 8;
17179 let k = 4;
17180 let fs = 0;
17181 let f = 0;
17182 let mut m_probe_sa = vec![0; n as usize];
17183 let m = gather_lms_suffixes_32s(&[2, 1, 3, 1, 2, 0, 1, 0], &mut m_probe_sa, n);
17184
17185 let mut rust_t = vec![2, 1, 3, 1, 2, 0, 1, 0];
17186 let mut c_t = rust_t.clone();
17187 let mut rust_sa = vec![0; n as usize];
17188 let mut c_sa = rust_sa.clone();
17189 let mut rust_buckets = vec![0; 2 * k as usize];
17190 let mut c_buckets = rust_buckets.clone();
17191 let mut rust_thread_state = alloc_thread_state(1).unwrap();
17192 reconstruct_compacted_lms_suffixes_32s_2k_omp(
17193 &mut rust_t,
17194 &mut rust_sa,
17195 n,
17196 k,
17197 m,
17198 fs,
17199 f,
17200 &mut rust_buckets,
17201 0,
17202 1,
17203 &mut rust_thread_state,
17204 );
17205 unsafe {
17206 probe_libsais16_reconstruct_compacted_lms_suffixes_32s_2k_omp(
17207 c_t.as_mut_ptr(),
17208 c_sa.as_mut_ptr(),
17209 n,
17210 k,
17211 m,
17212 fs,
17213 f,
17214 c_buckets.as_mut_ptr(),
17215 0,
17216 1,
17217 );
17218 }
17219 assert_eq!(rust_t, c_t);
17220 assert_eq!(rust_sa, c_sa);
17221 assert_eq!(rust_buckets, c_buckets);
17222
17223 let mut rust_t = vec![2, 1, 3, 1, 2, 0, 1, 0];
17224 let mut c_t = rust_t.clone();
17225 let mut rust_sa = vec![0; n as usize];
17226 let mut c_sa = rust_sa.clone();
17227 reconstruct_compacted_lms_suffixes_32s_1k_omp(&mut rust_t, &mut rust_sa, n, m, fs, f, 1);
17228 unsafe {
17229 probe_libsais16_reconstruct_compacted_lms_suffixes_32s_1k_omp(
17230 c_t.as_mut_ptr(),
17231 c_sa.as_mut_ptr(),
17232 n,
17233 m,
17234 fs,
17235 f,
17236 1,
17237 );
17238 }
17239 assert_eq!(rust_t, c_t);
17240 assert_eq!(rust_sa, c_sa);
17241 }
17242
17243 #[test]
17244 fn libsais16_partial_omp_wrappers_match_c() {
17245 let (text, mut rust_sa, mut rust_buckets) = partial_scan_fixture();
17246 let mut c_sa = rust_sa.clone();
17247 let mut c_buckets = rust_buckets.clone();
17248
17249 let rust_d = partial_sorting_scan_left_to_right_16u_omp(
17250 &text,
17251 &mut rust_sa,
17252 text.len() as SaSint,
17253 8,
17254 &mut rust_buckets,
17255 5,
17256 3,
17257 1,
17258 );
17259 let c_d = unsafe {
17260 probe_libsais16_partial_sorting_scan_left_to_right_16u_omp(
17261 text.as_ptr(),
17262 c_sa.as_mut_ptr(),
17263 text.len() as SaSint,
17264 8,
17265 c_buckets.as_mut_ptr(),
17266 5,
17267 3,
17268 1,
17269 )
17270 };
17271 assert_eq!(rust_d, c_d);
17272 assert_eq!(rust_sa, c_sa);
17273 assert_eq!(rust_buckets, c_buckets);
17274
17275 let (text, mut rust_sa, mut rust_buckets) = partial_scan_fixture();
17276 rust_sa[6..10].copy_from_slice(&[3, 5 | SAINT_MIN, 7, 9 | SAINT_MIN]);
17277 let mut c_sa = rust_sa.clone();
17278 let mut c_buckets = rust_buckets.clone();
17279 partial_sorting_scan_right_to_left_16u_omp(
17280 &text,
17281 &mut rust_sa,
17282 text.len() as SaSint,
17283 8,
17284 &mut rust_buckets,
17285 0,
17286 5,
17287 3,
17288 1,
17289 );
17290 unsafe {
17291 probe_libsais16_partial_sorting_scan_right_to_left_16u_omp(
17292 text.as_ptr(),
17293 c_sa.as_mut_ptr(),
17294 text.len() as SaSint,
17295 8,
17296 c_buckets.as_mut_ptr(),
17297 0,
17298 5,
17299 3,
17300 1,
17301 );
17302 }
17303 assert_eq!(rust_sa, c_sa);
17304 assert_eq!(rust_buckets, c_buckets);
17305
17306 let (text, mut rust_sa, mut rust_buckets) = partial_scan_fixture();
17307 rust_sa[6..10].copy_from_slice(&[3, 5 | SAINT_MIN, 7, 9 | SAINT_MIN]);
17308 let mut c_sa = rust_sa.clone();
17309 let mut c_buckets = rust_buckets.clone();
17310 partial_gsa_scan_right_to_left_16u_omp(
17311 &text,
17312 &mut rust_sa,
17313 text.len() as SaSint,
17314 8,
17315 &mut rust_buckets,
17316 0,
17317 5,
17318 3,
17319 1,
17320 );
17321 unsafe {
17322 probe_libsais16_partial_gsa_scan_right_to_left_16u_omp(
17323 text.as_ptr(),
17324 c_sa.as_mut_ptr(),
17325 text.len() as SaSint,
17326 8,
17327 c_buckets.as_mut_ptr(),
17328 0,
17329 5,
17330 3,
17331 1,
17332 );
17333 }
17334 assert_eq!(rust_sa, c_sa);
17335 assert_eq!(rust_buckets, c_buckets);
17336 }
17337
17338 #[test]
17339 fn libsais16_final_omp_wrappers_match_c() {
17340 let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
17341 let mut c_sa = rust_sa.clone();
17342 let mut c_bucket = rust_bucket.clone();
17343 final_bwt_scan_left_to_right_16u_omp(
17344 &text,
17345 &mut rust_sa,
17346 text.len() as SaSint,
17347 8,
17348 &mut rust_bucket,
17349 1,
17350 );
17351 unsafe {
17352 probe_libsais16_final_bwt_scan_left_to_right_16u_omp(
17353 text.as_ptr(),
17354 c_sa.as_mut_ptr(),
17355 text.len() as SaSint,
17356 8,
17357 c_bucket.as_mut_ptr(),
17358 1,
17359 );
17360 }
17361 assert_eq!(rust_sa, c_sa);
17362 assert_eq!(rust_bucket, c_bucket);
17363
17364 let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
17365 let mut c_sa = rust_sa.clone();
17366 let mut c_bucket = rust_bucket.clone();
17367 let mut rust_i = vec![-1; 8];
17368 let mut c_i = rust_i.clone();
17369 final_bwt_aux_scan_left_to_right_16u_omp(
17370 &text,
17371 &mut rust_sa,
17372 text.len() as SaSint,
17373 8,
17374 1,
17375 &mut rust_i,
17376 &mut rust_bucket,
17377 1,
17378 );
17379 unsafe {
17380 probe_libsais16_final_bwt_aux_scan_left_to_right_16u_omp(
17381 text.as_ptr(),
17382 c_sa.as_mut_ptr(),
17383 text.len() as SaSint,
17384 8,
17385 1,
17386 c_i.as_mut_ptr(),
17387 c_bucket.as_mut_ptr(),
17388 1,
17389 );
17390 }
17391 assert_eq!(rust_sa, c_sa);
17392 assert_eq!(rust_bucket, c_bucket);
17393 assert_eq!(rust_i, c_i);
17394
17395 let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
17396 let mut c_sa = rust_sa.clone();
17397 let mut c_bucket = rust_bucket.clone();
17398 final_sorting_scan_left_to_right_16u_omp(
17399 &text,
17400 &mut rust_sa,
17401 text.len() as SaSint,
17402 8,
17403 &mut rust_bucket,
17404 1,
17405 );
17406 unsafe {
17407 probe_libsais16_final_sorting_scan_left_to_right_16u_omp(
17408 text.as_ptr(),
17409 c_sa.as_mut_ptr(),
17410 text.len() as SaSint,
17411 8,
17412 c_bucket.as_mut_ptr(),
17413 1,
17414 );
17415 }
17416 assert_eq!(rust_sa, c_sa);
17417 assert_eq!(rust_bucket, c_bucket);
17418
17419 let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
17420 let mut c_sa = rust_sa.clone();
17421 let mut c_bucket = rust_bucket.clone();
17422 let rust_index = final_bwt_scan_right_to_left_16u_omp(
17423 &text,
17424 &mut rust_sa,
17425 text.len() as SaSint,
17426 8,
17427 &mut rust_bucket,
17428 1,
17429 );
17430 let c_index = unsafe {
17431 probe_libsais16_final_bwt_scan_right_to_left_16u_omp(
17432 text.as_ptr(),
17433 c_sa.as_mut_ptr(),
17434 text.len() as SaSint,
17435 8,
17436 c_bucket.as_mut_ptr(),
17437 1,
17438 )
17439 };
17440 assert_eq!(rust_index, c_index);
17441 assert_eq!(rust_sa, c_sa);
17442 assert_eq!(rust_bucket, c_bucket);
17443
17444 let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
17445 let mut c_sa = rust_sa.clone();
17446 let mut c_bucket = rust_bucket.clone();
17447 let mut rust_i = vec![-1; 8];
17448 let mut c_i = rust_i.clone();
17449 final_bwt_aux_scan_right_to_left_16u_omp(
17450 &text,
17451 &mut rust_sa,
17452 text.len() as SaSint,
17453 8,
17454 1,
17455 &mut rust_i,
17456 &mut rust_bucket,
17457 1,
17458 );
17459 unsafe {
17460 probe_libsais16_final_bwt_aux_scan_right_to_left_16u_omp(
17461 text.as_ptr(),
17462 c_sa.as_mut_ptr(),
17463 text.len() as SaSint,
17464 8,
17465 1,
17466 c_i.as_mut_ptr(),
17467 c_bucket.as_mut_ptr(),
17468 1,
17469 );
17470 }
17471 assert_eq!(rust_sa, c_sa);
17472 assert_eq!(rust_bucket, c_bucket);
17473 assert_eq!(rust_i, c_i);
17474
17475 let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
17476 let mut c_sa = rust_sa.clone();
17477 let mut c_bucket = rust_bucket.clone();
17478 final_sorting_scan_right_to_left_16u_omp(&text, &mut rust_sa, 0, 6, 8, &mut rust_bucket, 1);
17479 unsafe {
17480 probe_libsais16_final_sorting_scan_right_to_left_16u_omp(
17481 text.as_ptr(),
17482 c_sa.as_mut_ptr(),
17483 0,
17484 6,
17485 8,
17486 c_bucket.as_mut_ptr(),
17487 1,
17488 );
17489 }
17490 assert_eq!(rust_sa, c_sa);
17491 assert_eq!(rust_bucket, c_bucket);
17492
17493 let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
17494 let mut c_sa = rust_sa.clone();
17495 let mut c_bucket = rust_bucket.clone();
17496 final_gsa_scan_right_to_left_16u_omp(&text, &mut rust_sa, 0, 6, 8, &mut rust_bucket, 1);
17497 unsafe {
17498 probe_libsais16_final_gsa_scan_right_to_left_16u_omp(
17499 text.as_ptr(),
17500 c_sa.as_mut_ptr(),
17501 0,
17502 6,
17503 8,
17504 c_bucket.as_mut_ptr(),
17505 1,
17506 );
17507 }
17508 assert_eq!(rust_sa, c_sa);
17509 assert_eq!(rust_bucket, c_bucket);
17510 }
17511
17512 #[test]
17513 fn libsais16_matches_bruteforce() {
17514 let t = [3, 1, 4, 1, 5, 9, 0, 2];
17515 let mut sa = vec![0; t.len()];
17516 let mut freq = vec![0; ALPHABET_SIZE];
17517 assert_eq!(libsais16(&t, &mut sa, 0, Some(&mut freq)), 0);
17518 assert_eq!(sa, brute_sa(&t));
17519 assert_eq!(freq[1], 2);
17520 assert_eq!(freq[9], 1);
17521 }
17522
17523 #[test]
17524 fn libsais16_bwt_round_trips() {
17525 let t = [2, 1, 3, 1, 2, 4, 1, 0];
17526 let mut bwt = vec![0; t.len()];
17527 let mut work = vec![0; t.len()];
17528 let primary = libsais16_bwt(&t, &mut bwt, &mut work, 0, None);
17529 assert!(primary > 0);
17530
17531 let mut restored = vec![0; t.len()];
17532 assert_eq!(
17533 libsais16_unbwt(&bwt, &mut restored, &mut work, None, primary),
17534 0
17535 );
17536 assert_eq!(restored, t);
17537 }
17538
17539 #[test]
17540 fn libsais16_plcp_lcp_are_consistent() {
17541 let t = [2, 1, 2, 1, 0];
17542 let sa = brute_sa(&t);
17543 let mut plcp = vec![0; t.len()];
17544 let mut lcp = vec![0; t.len()];
17545 assert_eq!(libsais16_plcp(&t, &sa, &mut plcp), 0);
17546 assert_eq!(libsais16_lcp(&plcp, &sa, &mut lcp), 0);
17547 assert_eq!(lcp[0], 0);
17548
17549 let mut named_plcp = vec![0; t.len()];
17550 assert_eq!(
17551 compute_phi_omp(&sa, &mut named_plcp, t.len() as SaSint, 1),
17552 0
17553 );
17554 assert_eq!(
17555 compute_plcp_omp(&t, &mut named_plcp, t.len() as SaSint, 1),
17556 0
17557 );
17558 assert_eq!(named_plcp, plcp);
17559
17560 let mut named_lcp = vec![0; t.len()];
17561 assert_eq!(
17562 compute_lcp_omp(&named_plcp, &sa, &mut named_lcp, t.len() as SaSint, 1),
17563 0
17564 );
17565 assert_eq!(named_lcp, lcp);
17566
17567 let mut gsa_plcp = vec![0; t.len()];
17568 let mut named_gsa_plcp = vec![0; t.len()];
17569 assert_eq!(libsais16_plcp_gsa(&t, &sa, &mut gsa_plcp), 0);
17570 assert_eq!(
17571 compute_phi_omp(&sa, &mut named_gsa_plcp, t.len() as SaSint, 1),
17572 0
17573 );
17574 assert_eq!(
17575 compute_plcp_gsa_omp(&t, &mut named_gsa_plcp, t.len() as SaSint, 1),
17576 0
17577 );
17578 assert_eq!(named_gsa_plcp, gsa_plcp);
17579 }
17580
17581 #[test]
17582 fn libsais16_bwt_copy_16u_omp_uses_block_partition_for_large_inputs() {
17583 let n = 65_600usize;
17584 let a: Vec<SaSint> = (0..n).map(|i| (i * 17) as SaSint).collect();
17585 let mut threaded = vec![0; n];
17586 let mut sequential = vec![0; n];
17587
17588 bwt_copy_16u_omp(&mut threaded, &a, n as SaSint, 4);
17589 bwt_copy_16u(&mut sequential, &a, n as SaSint);
17590
17591 assert_eq!(threaded, sequential);
17592 }
17593
17594 #[test]
17595 fn libsais16_plcp_lcp_omp_wrappers_match_single_thread_on_large_inputs() {
17596 let n = 65_600usize;
17597 let text: Vec<u16> = (0..n).map(|i| 1 + (i % 251) as u16).collect();
17598 let sa: Vec<SaSint> = (0..n as SaSint).collect();
17599
17600 let mut plcp_single = vec![0; n];
17601 let mut plcp_threaded = vec![0; n];
17602 assert_eq!(compute_phi_omp(&sa, &mut plcp_single, n as SaSint, 1), 0);
17603 assert_eq!(compute_phi_omp(&sa, &mut plcp_threaded, n as SaSint, 4), 0);
17604 assert_eq!(plcp_threaded, plcp_single);
17605
17606 assert_eq!(compute_plcp_omp(&text, &mut plcp_single, n as SaSint, 1), 0);
17607 assert_eq!(
17608 compute_plcp_omp(&text, &mut plcp_threaded, n as SaSint, 4),
17609 0
17610 );
17611 assert_eq!(plcp_threaded, plcp_single);
17612
17613 let mut lcp_single = vec![0; n];
17614 let mut lcp_threaded = vec![0; n];
17615 assert_eq!(
17616 compute_lcp_omp(&plcp_single, &sa, &mut lcp_single, n as SaSint, 1),
17617 0
17618 );
17619 assert_eq!(
17620 compute_lcp_omp(&plcp_threaded, &sa, &mut lcp_threaded, n as SaSint, 4),
17621 0
17622 );
17623 assert_eq!(lcp_threaded, lcp_single);
17624 }
17625
17626 #[test]
17627 fn libsais16_context_allocates_upstream_shaped_buffers() {
17628 let ctx = create_ctx().unwrap();
17629 assert_eq!(ctx.threads, 1);
17630 assert_eq!(ctx.buckets.len(), 8 * ALPHABET_SIZE);
17631 assert!(ctx.thread_state.is_none());
17632
17633 let ctx = create_ctx_omp(2).unwrap();
17634 assert_eq!(ctx.threads, 2);
17635 assert_eq!(ctx.buckets.len(), 8 * ALPHABET_SIZE);
17636 let thread_state = ctx.thread_state.as_ref().unwrap();
17637 assert_eq!(thread_state.len(), 2);
17638 assert_eq!(thread_state[0].buckets.len(), 4 * ALPHABET_SIZE);
17639 assert_eq!(thread_state[0].cache_entries, PER_THREAD_CACHE_SIZE);
17640
17641 let ctx = create_ctx_omp(0).unwrap();
17642 assert_eq!(ctx.threads, 1);
17643 assert!(ctx.thread_state.is_none());
17644 }
17645
17646 #[test]
17647 fn libsais16_unbwt_context_allocates_upstream_shaped_buffers() {
17648 let ctx = unbwt_create_ctx().unwrap();
17649 assert_eq!(ctx.threads, 1);
17650 assert_eq!(ctx.bucket2.len(), ALPHABET_SIZE);
17651 assert_eq!(ctx.fastbits.len(), 1 + (1 << UNBWT_FASTBITS));
17652 assert!(ctx.buckets.is_none());
17653
17654 let ctx = unbwt_create_ctx_omp(3).unwrap();
17655 assert_eq!(ctx.threads, 3);
17656 assert_eq!(ctx.bucket2.len(), ALPHABET_SIZE);
17657 assert_eq!(ctx.fastbits.len(), 1 + (1 << UNBWT_FASTBITS));
17658 assert_eq!(ctx.buckets.as_ref().unwrap().len(), 3 * ALPHABET_SIZE);
17659 }
17660
17661 #[test]
17662 fn libsais16_named_unbwt_helpers_follow_decode_shapes() {
17663 let t = [0, 1, 2];
17664 let mut p = vec![usize::MAX; 4];
17665 let mut bucket2 = vec![0; ALPHABET_SIZE];
17666 bucket2[0] = 1;
17667 bucket2[1] = 2;
17668 bucket2[2] = 3;
17669 unbwt_calculate_P(&t, &mut p, &mut bucket2, 2, 1, 3);
17670 assert_eq!(p[2], 1);
17671 assert_eq!(p[3], 3);
17672
17673 let p = [1usize, 2, 0];
17674 let mut bucket2 = vec![3; ALPHABET_SIZE];
17675 bucket2[0] = 1;
17676 bucket2[1] = 2;
17677 bucket2[2] = 3;
17678 let fastbits = vec![0; 3];
17679
17680 let mut u = vec![99; 3];
17681 let mut i0 = 0;
17682 unbwt_decode_1(&mut u, &p, &bucket2, &fastbits, 0, &mut i0, 3);
17683 assert_eq!(u, vec![0, 1, 2]);
17684 assert_eq!(i0, 0);
17685
17686 let mut u = vec![99; 6];
17687 let (mut i0, mut i1) = (0, 1);
17688 unbwt_decode_2(&mut u, &p, &bucket2, &fastbits, 0, 3, &mut i0, &mut i1, 2);
17689 assert_eq!(&u[..2], &[0, 1]);
17690 assert_eq!(&u[3..5], &[1, 2]);
17691 assert_eq!((i0, i1), (2, 0));
17692
17693 let mut u = vec![99; 8];
17694 let mut cursors = [0; 8];
17695 unbwt_decode_8(&mut u, &p, &bucket2, &fastbits, 0, 1, &mut cursors, 1);
17696 assert_eq!(u, vec![0; 8]);
17697 assert_eq!(cursors, [1; 8]);
17698 }
17699
17700 #[test]
17701 fn libsais16_unbwt_init_parallel_uses_block_partition() {
17702 let n = 70_003usize;
17703 let t: Vec<u16> = (0..n)
17704 .map(|i| ((i.wrapping_mul(37).wrapping_add(i >> 3)) % 251) as u16)
17705 .collect();
17706 let i = [12_345];
17707
17708 let mut single_p = vec![0; n + 1];
17709 let mut threaded_p = vec![0; n + 1];
17710 let mut single_bucket2 = vec![0; ALPHABET_SIZE];
17711 let mut threaded_bucket2 = vec![0; ALPHABET_SIZE];
17712 let mut single_fastbits = vec![0; 1 + (1 << UNBWT_FASTBITS)];
17713 let mut threaded_fastbits = vec![0; 1 + (1 << UNBWT_FASTBITS)];
17714 let mut buckets = vec![0; 4 * ALPHABET_SIZE];
17715
17716 unbwt_init_single(
17717 &t,
17718 &mut single_p,
17719 None,
17720 &i,
17721 &mut single_bucket2,
17722 &mut single_fastbits,
17723 );
17724 unbwt_init_parallel(
17725 &t,
17726 &mut threaded_p,
17727 None,
17728 &i,
17729 &mut threaded_bucket2,
17730 &mut threaded_fastbits,
17731 &mut buckets,
17732 4,
17733 );
17734
17735 assert_eq!(threaded_p, single_p);
17736 assert_eq!(threaded_bucket2, single_bucket2);
17737 assert_eq!(threaded_fastbits, single_fastbits);
17738 }
17739
17740 fn assert_libsais16_matches_c(text: &[u16]) {
17741 let mut rust_sa = vec![0; text.len()];
17742 let mut c_sa = vec![0; text.len()];
17743
17744 let rust_rc = libsais16(text, &mut rust_sa, 0, None);
17745 let c_rc = unsafe {
17746 probe_public_libsais16(text.as_ptr(), c_sa.as_mut_ptr(), text.len() as SaSint, 0)
17747 };
17748
17749 assert_eq!(rust_rc, c_rc);
17750 assert_eq!(rust_sa, c_sa);
17751 }
17752
17753 fn assert_libsais16_gsa_matches_c(text: &[u16]) {
17754 let mut rust_sa = vec![0; text.len()];
17755 let mut c_sa = vec![0; text.len()];
17756
17757 let rust_rc = libsais16_gsa(text, &mut rust_sa, 0, None);
17758 let c_rc = unsafe {
17759 probe_public_libsais16_gsa(text.as_ptr(), c_sa.as_mut_ptr(), text.len() as SaSint, 0)
17760 };
17761
17762 assert_eq!(rust_rc, c_rc);
17763 assert_eq!(rust_sa, c_sa);
17764 }
17765
17766 fn assert_libsais16_int_matches_c(text: &[SaSint], k: SaSint) {
17767 let mut rust_t = text.to_vec();
17768 let mut c_t = text.to_vec();
17769 let mut rust_sa = vec![0; text.len()];
17770 let mut c_sa = vec![0; text.len()];
17771
17772 let rust_rc = libsais16_int(&mut rust_t, &mut rust_sa, k, 0);
17773 let c_rc = unsafe {
17774 probe_public_libsais16_int(
17775 c_t.as_mut_ptr(),
17776 c_sa.as_mut_ptr(),
17777 c_t.len() as SaSint,
17778 k,
17779 0,
17780 )
17781 };
17782
17783 assert_eq!(rust_rc, c_rc);
17784 assert_eq!(rust_t, c_t);
17785 assert_eq!(rust_sa, c_sa);
17786 }
17787
17788 fn assert_libsais16_bwt_matches_c(text: &[u16]) {
17789 let mut rust_u = vec![0; text.len()];
17790 let mut rust_a = vec![0; text.len()];
17791 let mut c_u = vec![0; text.len()];
17792 let mut c_a = vec![0; text.len()];
17793
17794 let rust_rc = libsais16_bwt(text, &mut rust_u, &mut rust_a, 0, None);
17795 let c_rc = unsafe {
17796 probe_public_libsais16_bwt(
17797 text.as_ptr(),
17798 c_u.as_mut_ptr(),
17799 c_a.as_mut_ptr(),
17800 text.len() as SaSint,
17801 0,
17802 )
17803 };
17804
17805 assert_eq!(rust_rc, c_rc);
17806 assert_eq!(rust_u, c_u);
17807 }
17808
17809 fn assert_libsais16_bwt_aux_matches_c(text: &[u16], r: SaSint) {
17810 let aux_len = if text.is_empty() {
17811 0
17812 } else {
17813 (text.len() - 1) / r as usize + 1
17814 };
17815 let mut rust_u = vec![0; text.len()];
17816 let mut rust_a = vec![0; text.len()];
17817 let mut rust_i = vec![0; aux_len];
17818 let mut c_u = vec![0; text.len()];
17819 let mut c_a = vec![0; text.len()];
17820 let mut c_i = vec![0; aux_len];
17821
17822 let rust_rc = libsais16_bwt_aux(text, &mut rust_u, &mut rust_a, 0, None, r, &mut rust_i);
17823 let c_rc = unsafe {
17824 probe_public_libsais16_bwt_aux(
17825 text.as_ptr(),
17826 c_u.as_mut_ptr(),
17827 c_a.as_mut_ptr(),
17828 text.len() as SaSint,
17829 0,
17830 r,
17831 c_i.as_mut_ptr(),
17832 )
17833 };
17834
17835 assert_eq!(rust_rc, c_rc);
17836 assert_eq!(rust_u, c_u);
17837 assert_eq!(rust_i, c_i);
17838 }
17839
17840 fn assert_libsais16_freq_outputs_match_c(text: &[u16], gsa_text: &[u16]) {
17841 let mut rust_sa = vec![0; text.len()];
17842 let mut c_sa = vec![0; text.len()];
17843 let mut rust_freq = vec![-1; ALPHABET_SIZE];
17844 let mut c_freq = vec![-1; ALPHABET_SIZE];
17845
17846 let rust_rc = libsais16(text, &mut rust_sa, 0, Some(&mut rust_freq));
17847 let c_rc = unsafe {
17848 probe_public_libsais16_freq(
17849 text.as_ptr(),
17850 c_sa.as_mut_ptr(),
17851 text.len() as SaSint,
17852 0,
17853 c_freq.as_mut_ptr(),
17854 )
17855 };
17856 assert_eq!(rust_rc, c_rc);
17857 assert_eq!(rust_sa, c_sa);
17858 assert_eq!(rust_freq, c_freq);
17859
17860 let mut rust_gsa = vec![0; gsa_text.len()];
17861 let mut c_gsa = vec![0; gsa_text.len()];
17862 rust_freq.fill(-1);
17863 c_freq.fill(-1);
17864 let rust_rc = libsais16_gsa(gsa_text, &mut rust_gsa, 0, Some(&mut rust_freq));
17865 let c_rc = unsafe {
17866 probe_public_libsais16_gsa_freq(
17867 gsa_text.as_ptr(),
17868 c_gsa.as_mut_ptr(),
17869 gsa_text.len() as SaSint,
17870 0,
17871 c_freq.as_mut_ptr(),
17872 )
17873 };
17874 assert_eq!(rust_rc, c_rc);
17875 assert_eq!(rust_gsa, c_gsa);
17876 assert_eq!(rust_freq, c_freq);
17877
17878 let mut rust_u = vec![0; text.len()];
17879 let mut rust_a = vec![0; text.len()];
17880 let mut c_u = vec![0; text.len()];
17881 let mut c_a = vec![0; text.len()];
17882 rust_freq.fill(-1);
17883 c_freq.fill(-1);
17884 let rust_rc = libsais16_bwt(text, &mut rust_u, &mut rust_a, 0, Some(&mut rust_freq));
17885 let c_rc = unsafe {
17886 probe_public_libsais16_bwt_freq(
17887 text.as_ptr(),
17888 c_u.as_mut_ptr(),
17889 c_a.as_mut_ptr(),
17890 text.len() as SaSint,
17891 0,
17892 c_freq.as_mut_ptr(),
17893 )
17894 };
17895 assert_eq!(rust_rc, c_rc);
17896 assert_eq!(rust_u, c_u);
17897 assert_eq!(rust_freq, c_freq);
17898
17899 let r = 4;
17900 let aux_len = (text.len() - 1) / r as usize + 1;
17901 let mut rust_i = vec![0; aux_len];
17902 let mut c_i = vec![0; aux_len];
17903 rust_freq.fill(-1);
17904 c_freq.fill(-1);
17905 let rust_rc = libsais16_bwt_aux(
17906 text,
17907 &mut rust_u,
17908 &mut rust_a,
17909 0,
17910 Some(&mut rust_freq),
17911 r,
17912 &mut rust_i,
17913 );
17914 let c_rc = unsafe {
17915 probe_public_libsais16_bwt_aux_freq(
17916 text.as_ptr(),
17917 c_u.as_mut_ptr(),
17918 c_a.as_mut_ptr(),
17919 text.len() as SaSint,
17920 0,
17921 c_freq.as_mut_ptr(),
17922 r,
17923 c_i.as_mut_ptr(),
17924 )
17925 };
17926 assert_eq!(rust_rc, c_rc);
17927 assert_eq!(rust_u, c_u);
17928 assert_eq!(rust_i, c_i);
17929 assert_eq!(rust_freq, c_freq);
17930 }
17931
17932 fn assert_libsais16_unbwt_matches_c(text: &[u16]) {
17933 let mut bwt = vec![0; text.len()];
17934 let mut work = vec![0; text.len()];
17935 let primary = libsais16_bwt(text, &mut bwt, &mut work, 0, None);
17936 assert!(primary >= 0);
17937
17938 let mut rust_u = vec![0; text.len()];
17939 let mut rust_a = vec![0; text.len() + 1];
17940 let mut c_u = vec![0; text.len()];
17941 let mut c_a = vec![0; text.len() + 1];
17942
17943 let rust_rc = libsais16_unbwt(&bwt, &mut rust_u, &mut rust_a, None, primary);
17944 let c_rc = unsafe {
17945 probe_public_libsais16_unbwt(
17946 bwt.as_ptr(),
17947 c_u.as_mut_ptr(),
17948 c_a.as_mut_ptr(),
17949 bwt.len() as SaSint,
17950 primary,
17951 )
17952 };
17953
17954 assert_eq!(rust_rc, c_rc);
17955 assert_eq!(rust_u, c_u);
17956 assert_eq!(rust_u, text);
17957 }
17958
17959 fn assert_libsais16_unbwt_aux_matches_c(text: &[u16], r: SaSint) {
17960 let mut bwt = vec![0; text.len()];
17961 let mut work = vec![0; text.len()];
17962 let mut aux = vec![0; (text.len() - 1) / r as usize + 1];
17963 let bwt_rc = libsais16_bwt_aux(text, &mut bwt, &mut work, 0, None, r, &mut aux);
17964 assert_eq!(bwt_rc, 0);
17965
17966 let mut rust_u = vec![0; text.len()];
17967 let mut rust_a = vec![0; text.len() + 1];
17968 let mut c_u = vec![0; text.len()];
17969 let mut c_a = vec![0; text.len() + 1];
17970
17971 let rust_rc = libsais16_unbwt_aux(&bwt, &mut rust_u, &mut rust_a, None, r, &aux);
17972 let c_rc = unsafe {
17973 probe_public_libsais16_unbwt_aux(
17974 bwt.as_ptr(),
17975 c_u.as_mut_ptr(),
17976 c_a.as_mut_ptr(),
17977 bwt.len() as SaSint,
17978 r,
17979 aux.as_ptr(),
17980 )
17981 };
17982
17983 assert_eq!(rust_rc, c_rc);
17984 assert_eq!(rust_u, c_u);
17985 assert_eq!(rust_u, text);
17986 }
17987
17988 fn assert_libsais16_unbwt_freq_matches_c(text: &[u16]) {
17989 let mut freq = vec![0; ALPHABET_SIZE];
17990 let mut bwt = vec![0; text.len()];
17991 let mut work = vec![0; text.len()];
17992 let primary = libsais16_bwt(text, &mut bwt, &mut work, 0, Some(&mut freq));
17993 assert!(primary >= 0);
17994
17995 let mut rust_u = vec![0; text.len()];
17996 let mut rust_a = vec![0; text.len() + 1];
17997 let mut c_u = vec![0; text.len()];
17998 let mut c_a = vec![0; text.len() + 1];
17999
18000 let rust_rc = libsais16_unbwt(&bwt, &mut rust_u, &mut rust_a, Some(&freq), primary);
18001 let c_rc = unsafe {
18002 probe_public_libsais16_unbwt_freq(
18003 bwt.as_ptr(),
18004 c_u.as_mut_ptr(),
18005 c_a.as_mut_ptr(),
18006 bwt.len() as SaSint,
18007 freq.as_ptr(),
18008 primary,
18009 )
18010 };
18011 assert_eq!(rust_rc, c_rc);
18012 assert_eq!(rust_u, c_u);
18013 assert_eq!(rust_u, text);
18014
18015 let r = 4;
18016 let mut aux = vec![0; (text.len() - 1) / r as usize + 1];
18017 let bwt_rc = libsais16_bwt_aux(text, &mut bwt, &mut work, 0, Some(&mut freq), r, &mut aux);
18018 assert_eq!(bwt_rc, 0);
18019
18020 rust_u.fill(0);
18021 rust_a.fill(0);
18022 c_u.fill(0);
18023 c_a.fill(0);
18024 let rust_rc = libsais16_unbwt_aux(&bwt, &mut rust_u, &mut rust_a, Some(&freq), r, &aux);
18025 let c_rc = unsafe {
18026 probe_public_libsais16_unbwt_aux_freq(
18027 bwt.as_ptr(),
18028 c_u.as_mut_ptr(),
18029 c_a.as_mut_ptr(),
18030 bwt.len() as SaSint,
18031 freq.as_ptr(),
18032 r,
18033 aux.as_ptr(),
18034 )
18035 };
18036 assert_eq!(rust_rc, c_rc);
18037 assert_eq!(rust_u, c_u);
18038 assert_eq!(rust_u, text);
18039 }
18040
18041 fn assert_libsais16_plcp_lcp_matches_c(text: &[u16]) {
18042 let mut sa = vec![0; text.len()];
18043 assert_eq!(libsais16(text, &mut sa, 0, None), 0);
18044
18045 let mut rust_plcp = vec![0; text.len()];
18046 let mut c_plcp = vec![0; text.len()];
18047 let rust_rc = libsais16_plcp(text, &sa, &mut rust_plcp);
18048 let c_rc = unsafe {
18049 probe_public_libsais16_plcp(
18050 text.as_ptr(),
18051 sa.as_ptr(),
18052 c_plcp.as_mut_ptr(),
18053 text.len() as SaSint,
18054 )
18055 };
18056 assert_eq!(rust_rc, c_rc);
18057 assert_eq!(rust_plcp, c_plcp);
18058
18059 let mut rust_lcp = vec![0; text.len()];
18060 let mut c_lcp = vec![0; text.len()];
18061 let rust_rc = libsais16_lcp(&rust_plcp, &sa, &mut rust_lcp);
18062 let c_rc = unsafe {
18063 probe_public_libsais16_lcp(
18064 c_plcp.as_ptr(),
18065 sa.as_ptr(),
18066 c_lcp.as_mut_ptr(),
18067 text.len() as SaSint,
18068 )
18069 };
18070 assert_eq!(rust_rc, c_rc);
18071 assert_eq!(rust_lcp, c_lcp);
18072 }
18073
18074 fn assert_libsais16_plcp_gsa_matches_c(text: &[u16]) {
18075 let mut sa = vec![0; text.len()];
18076 assert_eq!(libsais16_gsa(text, &mut sa, 0, None), 0);
18077
18078 let mut rust_plcp = vec![0; text.len()];
18079 let mut c_plcp = vec![0; text.len()];
18080 let rust_rc = libsais16_plcp_gsa(text, &sa, &mut rust_plcp);
18081 let c_rc = unsafe {
18082 probe_public_libsais16_plcp_gsa(
18083 text.as_ptr(),
18084 sa.as_ptr(),
18085 c_plcp.as_mut_ptr(),
18086 text.len() as SaSint,
18087 )
18088 };
18089 assert_eq!(rust_rc, c_rc);
18090 assert_eq!(rust_plcp, c_plcp);
18091 }
18092
18093 #[test]
18094 fn public_libsais16_matches_upstream_c() {
18095 for text in [
18096 [].as_slice(),
18097 &[1][..],
18098 &[2, 1, 3, 1, 2, 0],
18099 &[2, 1, 3, 1, 2, 4, 1, 0],
18100 &[65_535, 1, 65_534, 1, 0],
18101 &[7, 7, 7, 7, 7, 0],
18102 ] {
18103 assert_libsais16_matches_c(text);
18104 }
18105 }
18106
18107 #[test]
18108 fn public_libsais16_bwt_matches_upstream_c() {
18109 for text in [
18110 [].as_slice(),
18111 &[1][..],
18112 &[2, 1, 3, 1, 2, 0],
18113 &[2, 1, 3, 1, 2, 4, 1, 0],
18114 &[65_535, 1, 65_534, 1, 0],
18115 &[7, 7, 7, 7, 7, 0],
18116 ] {
18117 assert_libsais16_bwt_matches_c(text);
18118 }
18119 }
18120
18121 #[test]
18122 fn public_libsais16_gsa_matches_upstream_c() {
18123 for text in [&[0][..], &[2, 1, 0], &[2, 1, 0, 3, 1, 0], &[7, 7, 0, 7, 0]] {
18124 assert_libsais16_gsa_matches_c(text);
18125 }
18126 }
18127
18128 #[test]
18129 fn public_libsais16_int_matches_upstream_c() {
18130 for (text, k) in [
18131 (&[][..], 0),
18132 (&[0][..], 1),
18133 (&[1, 2, 1, 0][..], 3),
18134 (&[2, 1, 2, 1, 0][..], 3),
18135 (&[3, 3, 3, 2, 1, 0][..], 4),
18136 ] {
18137 assert_libsais16_int_matches_c(text, k);
18138 }
18139 }
18140
18141 #[test]
18142 fn public_libsais16_plcp_lcp_matches_upstream_c() {
18143 for text in [
18144 &[2, 1, 3, 1, 2, 0][..],
18145 &[2, 1, 3, 1, 2, 4, 1, 0],
18146 &[65_535, 1, 65_534, 1, 0],
18147 &[7, 7, 7, 7, 7, 0],
18148 ] {
18149 assert_libsais16_plcp_lcp_matches_c(text);
18150 }
18151 }
18152
18153 #[test]
18154 fn public_libsais16_plcp_gsa_matches_upstream_c() {
18155 for text in [&[0][..], &[2, 1, 0], &[2, 1, 0, 3, 1, 0], &[7, 7, 0, 7, 0]] {
18156 assert_libsais16_plcp_gsa_matches_c(text);
18157 }
18158 }
18159
18160 #[test]
18161 fn public_libsais16_bwt_aux_matches_upstream_c() {
18162 for text in [
18163 &[2, 1, 3, 1, 2, 0][..],
18164 &[2, 1, 3, 1, 2, 4, 1, 0],
18165 &[65_535, 1, 65_534, 1, 0],
18166 &[7, 7, 7, 7, 7, 0],
18167 ] {
18168 assert_libsais16_bwt_aux_matches_c(text, 4);
18169 }
18170 }
18171
18172 #[test]
18173 fn public_libsais16_frequency_outputs_match_upstream_c() {
18174 assert_libsais16_freq_outputs_match_c(&[65_535, 1, 2, 1, 0], &[65_535, 1, 0, 2, 1, 0]);
18175 }
18176
18177 #[test]
18178 fn public_libsais16_unbwt_with_frequency_matches_upstream_c() {
18179 assert_libsais16_unbwt_freq_matches_c(&[65_535, 1, 2, 1, 0]);
18180 }
18181
18182 #[test]
18183 fn public_libsais16_unbwt_matches_upstream_c() {
18184 for text in [
18185 &[1][..],
18186 &[2, 1, 3, 1, 2, 0],
18187 &[2, 1, 3, 1, 2, 4, 1, 0],
18188 &[65_535, 1, 65_534, 1, 0],
18189 &[7, 7, 7, 7, 7, 0],
18190 ] {
18191 assert_libsais16_unbwt_matches_c(text);
18192 }
18193 }
18194
18195 #[test]
18196 fn public_libsais16_unbwt_aux_matches_upstream_c() {
18197 for text in [
18198 &[2, 1, 3, 1, 2, 0][..],
18199 &[2, 1, 3, 1, 2, 4, 1, 0],
18200 &[65_535, 1, 65_534, 1, 0],
18201 &[7, 7, 7, 7, 7, 0],
18202 ] {
18203 assert_libsais16_unbwt_aux_matches_c(text, 4);
18204 }
18205 }
18206
18207 #[test]
18208 fn public_libsais16_unbwt_aux_exercises_decode_dispatch_cases() {
18209 for len in [2usize, 5, 9, 13, 17, 21, 25, 29, 33, 37] {
18210 let text = (0..len)
18211 .map(|i| ((i * 37 + 11) % 65_535 + 1) as u16)
18212 .collect::<Vec<_>>();
18213 assert_libsais16_unbwt_aux_matches_c(&text, 4);
18214 }
18215 }
18216
18217 #[test]
18218 fn libsais16_lcp_helpers_reject_invalid_suffix_entries() {
18219 let text = [2, 1, 2, 1, 0];
18220 let mut plcp = vec![0; text.len()];
18221 let mut lcp = vec![0; text.len()];
18222
18223 assert_eq!(libsais16_plcp(&text, &[0, 1, -1, 3, 4], &mut plcp), -1);
18224 assert_eq!(libsais16_plcp(&text, &[0, 1, 2, 3, 5], &mut plcp), -1);
18225 assert_eq!(libsais16_lcp(&plcp, &[0, 1, -1, 3, 4], &mut lcp), -1);
18226 assert_eq!(libsais16_lcp(&plcp, &[0, 1, 2, 3, 5], &mut lcp), -1);
18227 }
18228
18229 #[test]
18230 fn libsais16_rejects_invalid_public_arguments() {
18231 let text = [2, 1, 3, 1, 2, 0];
18232 let int_text = [1, 2, 1, 0];
18233 let mut int_text_for_short_sa = int_text.to_vec();
18234 let mut int_text_for_negative_fs = int_text.to_vec();
18235 let mut sa = vec![0; text.len() - 1];
18236 let mut int_sa = vec![0; int_text.len() - 1];
18237 let mut full_int_sa = vec![0; int_text.len()];
18238 let mut freq = vec![0; ALPHABET_SIZE - 1];
18239 let mut u = vec![0; text.len() - 1];
18240 let mut a = vec![0; text.len() - 1];
18241 let mut full_u = vec![0; text.len()];
18242 let mut full_a = vec![0; text.len()];
18243 let mut aux = vec![0; 1];
18244
18245 assert_eq!(libsais16(&text, &mut sa, 0, None), -1);
18246 assert_eq!(libsais16(&text, &mut full_a, 0, Some(&mut freq)), -1);
18247 assert_eq!(libsais16_gsa(&[1, 2, 3], &mut full_a[..3], 0, None), -1);
18248 assert_eq!(
18249 libsais16_int(&mut int_text_for_short_sa, &mut int_sa, 3, 0),
18250 -1
18251 );
18252 assert_eq!(
18253 libsais16_int(&mut int_text_for_negative_fs, &mut full_int_sa, 3, -1),
18254 -1
18255 );
18256 assert_eq!(libsais16_bwt(&text, &mut u, &mut full_a, 0, None), -1);
18257 assert_eq!(libsais16_bwt(&text, &mut full_u, &mut a, 0, None), -1);
18258 assert_eq!(
18259 libsais16_bwt_aux(&text, &mut full_u, &mut full_a, 0, None, 0, &mut aux),
18260 -1
18261 );
18262 assert_eq!(
18263 libsais16_bwt_aux(&text, &mut full_u, &mut full_a, 0, None, 3, &mut aux),
18264 -1
18265 );
18266 assert_eq!(
18267 libsais16_bwt_aux(&text, &mut full_u, &mut full_a, 0, None, 4, &mut aux),
18268 -1
18269 );
18270 assert_eq!(create_ctx_omp(-1), None);
18271 assert_eq!(unbwt_create_ctx_omp(-1), None);
18272 }
18273
18274 #[test]
18275 fn libsais16_unbwt_rejects_invalid_public_arguments() {
18276 let text = [2, 1, 3, 1, 2, 0];
18277 let mut bwt = vec![0; text.len()];
18278 let mut work = vec![0; text.len()];
18279 let primary = libsais16_bwt(&text, &mut bwt, &mut work, 0, None);
18280
18281 let mut short_u = vec![0; text.len() - 1];
18282 let mut short_a = vec![0; text.len() - 1];
18283 let mut full_u = vec![0; text.len()];
18284 let mut full_a = vec![0; text.len()];
18285 let short_freq = vec![0; ALPHABET_SIZE - 1];
18286 let short_aux = vec![primary];
18287 let bad_aux = vec![0, 0];
18288 let good_aux = vec![primary, 4];
18289
18290 assert_eq!(
18291 libsais16_unbwt(&bwt, &mut short_u, &mut full_a, None, primary),
18292 -1
18293 );
18294 assert_eq!(
18295 libsais16_unbwt(&bwt, &mut full_u, &mut short_a, None, primary),
18296 -1
18297 );
18298 assert_eq!(
18299 libsais16_unbwt(&bwt, &mut full_u, &mut full_a, Some(&short_freq), primary),
18300 -1
18301 );
18302 assert_eq!(libsais16_unbwt(&bwt, &mut full_u, &mut full_a, None, 0), -1);
18303 assert_eq!(
18304 libsais16_unbwt(
18305 &bwt,
18306 &mut full_u,
18307 &mut full_a,
18308 None,
18309 text.len() as SaSint + 1
18310 ),
18311 -1
18312 );
18313 assert_eq!(
18314 libsais16_unbwt_aux(&bwt, &mut full_u, &mut full_a, None, 0, &good_aux),
18315 -1
18316 );
18317 assert_eq!(
18318 libsais16_unbwt_aux(&bwt, &mut full_u, &mut full_a, None, 3, &good_aux),
18319 -1
18320 );
18321 assert_eq!(
18322 libsais16_unbwt_aux(&bwt, &mut full_u, &mut full_a, None, 4, &short_aux),
18323 -1
18324 );
18325 assert_eq!(
18326 libsais16_unbwt_aux(&bwt, &mut full_u, &mut full_a, None, 4, &bad_aux),
18327 -1
18328 );
18329 }
18330
18331 #[test]
18332 fn libsais16_ctx_rejects_invalid_public_arguments() {
18333 let text = [2, 1, 3, 1, 2, 0];
18334 let mut ctx = create_ctx().unwrap();
18335 let mut sa = vec![0; text.len() - 1];
18336 let mut freq = vec![0; ALPHABET_SIZE - 1];
18337 let mut u = vec![0; text.len() - 1];
18338 let mut a = vec![0; text.len() - 1];
18339 let mut full_u = vec![0; text.len()];
18340 let mut full_a = vec![0; text.len()];
18341 let mut aux = vec![0; 1];
18342
18343 assert_eq!(libsais16_ctx(&mut ctx, &text, &mut sa, 0, None), -1);
18344 assert_eq!(
18345 libsais16_ctx(&mut ctx, &text, &mut full_a, 0, Some(&mut freq)),
18346 -1
18347 );
18348 assert_eq!(
18349 libsais16_gsa_ctx(&mut ctx, &[1, 2, 3], &mut full_a[..3], 0, None),
18350 -1
18351 );
18352 assert_eq!(
18353 libsais16_bwt_ctx(&mut ctx, &text, &mut u, &mut full_a, 0, None),
18354 -1
18355 );
18356 assert_eq!(
18357 libsais16_bwt_ctx(&mut ctx, &text, &mut full_u, &mut a, 0, None),
18358 -1
18359 );
18360 assert_eq!(
18361 libsais16_bwt_aux_ctx(
18362 &mut ctx,
18363 &text,
18364 &mut full_u,
18365 &mut full_a,
18366 0,
18367 None,
18368 0,
18369 &mut aux
18370 ),
18371 -1
18372 );
18373 assert_eq!(
18374 libsais16_bwt_aux_ctx(
18375 &mut ctx,
18376 &text,
18377 &mut full_u,
18378 &mut full_a,
18379 0,
18380 None,
18381 3,
18382 &mut aux
18383 ),
18384 -1
18385 );
18386 assert_eq!(
18387 libsais16_bwt_aux_ctx(
18388 &mut ctx,
18389 &text,
18390 &mut full_u,
18391 &mut full_a,
18392 0,
18393 None,
18394 4,
18395 &mut aux
18396 ),
18397 -1
18398 );
18399
18400 let mut default_ctx = Context::default();
18401 assert_eq!(
18402 libsais16_ctx(&mut default_ctx, &text, &mut full_a, 0, None),
18403 -2
18404 );
18405
18406 let mut bad_bucket_ctx = create_ctx().unwrap();
18407 bad_bucket_ctx.buckets.clear();
18408 assert_eq!(
18409 libsais16_ctx(&mut bad_bucket_ctx, &text, &mut full_a, 0, None),
18410 -2
18411 );
18412
18413 let mut short_thread_state_ctx = create_ctx_omp(2).unwrap();
18414 short_thread_state_ctx
18415 .thread_state
18416 .as_mut()
18417 .unwrap()
18418 .truncate(1);
18419 assert_eq!(
18420 libsais16_ctx(&mut short_thread_state_ctx, &text, &mut full_a, 0, None),
18421 -2
18422 );
18423 }
18424
18425 #[test]
18426 fn libsais16_unbwt_ctx_rejects_invalid_public_arguments() {
18427 let text = [2, 1, 3, 1, 2, 0];
18428 let mut bwt = vec![0; text.len()];
18429 let mut work = vec![0; text.len()];
18430 let primary = libsais16_bwt(&text, &mut bwt, &mut work, 0, None);
18431 let mut ctx = unbwt_create_ctx().unwrap();
18432
18433 let mut short_u = vec![0; text.len() - 1];
18434 let mut short_a = vec![0; text.len() - 1];
18435 let mut full_u = vec![0; text.len()];
18436 let mut full_a = vec![0; text.len()];
18437 let short_freq = vec![0; ALPHABET_SIZE - 1];
18438 let short_aux = vec![primary];
18439 let bad_aux = vec![0, 0];
18440 let good_aux = vec![primary, 4];
18441
18442 assert_eq!(
18443 libsais16_unbwt_ctx(&mut ctx, &bwt, &mut short_u, &mut full_a, None, primary),
18444 -1
18445 );
18446 assert_eq!(
18447 libsais16_unbwt_ctx(&mut ctx, &bwt, &mut full_u, &mut short_a, None, primary),
18448 -1
18449 );
18450 assert_eq!(
18451 libsais16_unbwt_ctx(
18452 &mut ctx,
18453 &bwt,
18454 &mut full_u,
18455 &mut full_a,
18456 Some(&short_freq),
18457 primary
18458 ),
18459 -1
18460 );
18461 assert_eq!(
18462 libsais16_unbwt_ctx(&mut ctx, &bwt, &mut full_u, &mut full_a, None, 0),
18463 -1
18464 );
18465 assert_eq!(
18466 libsais16_unbwt_aux_ctx(&mut ctx, &bwt, &mut full_u, &mut full_a, None, 0, &good_aux),
18467 -1
18468 );
18469 assert_eq!(
18470 libsais16_unbwt_aux_ctx(&mut ctx, &bwt, &mut full_u, &mut full_a, None, 3, &good_aux),
18471 -1
18472 );
18473 assert_eq!(
18474 libsais16_unbwt_aux_ctx(
18475 &mut ctx,
18476 &bwt,
18477 &mut full_u,
18478 &mut full_a,
18479 None,
18480 4,
18481 &short_aux
18482 ),
18483 -1
18484 );
18485 assert_eq!(
18486 libsais16_unbwt_aux_ctx(&mut ctx, &bwt, &mut full_u, &mut full_a, None, 4, &bad_aux),
18487 -1
18488 );
18489 }
18490
18491 #[test]
18492 fn libsais16_context_wrappers_match_direct_calls() {
18493 let text = [2, 1, 3, 1, 2, 0];
18494 let mut ctx = create_ctx().unwrap();
18495
18496 let mut direct_sa = vec![0; text.len()];
18497 let mut ctx_sa = vec![0; text.len()];
18498 assert_eq!(libsais16(&text, &mut direct_sa, 0, None), 0);
18499 assert_eq!(libsais16_ctx(&mut ctx, &text, &mut ctx_sa, 0, None), 0);
18500 assert_eq!(ctx_sa, direct_sa);
18501
18502 let mut direct_bwt = vec![0; text.len()];
18503 let mut direct_work = vec![0; text.len()];
18504 let mut ctx_bwt = vec![0; text.len()];
18505 let mut ctx_work = vec![0; text.len()];
18506 assert_eq!(
18507 libsais16_bwt(&text, &mut direct_bwt, &mut direct_work, 0, None),
18508 libsais16_bwt_ctx(&mut ctx, &text, &mut ctx_bwt, &mut ctx_work, 0, None)
18509 );
18510 assert_eq!(ctx_bwt, direct_bwt);
18511
18512 let mut direct_aux = vec![0; 2];
18513 let mut ctx_aux = vec![0; 2];
18514 assert_eq!(
18515 libsais16_bwt_aux(
18516 &text,
18517 &mut direct_bwt,
18518 &mut direct_work,
18519 0,
18520 None,
18521 4,
18522 &mut direct_aux
18523 ),
18524 libsais16_bwt_aux_ctx(
18525 &mut ctx,
18526 &text,
18527 &mut ctx_bwt,
18528 &mut ctx_work,
18529 0,
18530 None,
18531 4,
18532 &mut ctx_aux
18533 )
18534 );
18535 assert_eq!(ctx_bwt, direct_bwt);
18536 assert_eq!(ctx_aux, direct_aux);
18537 }
18538
18539 #[test]
18540 fn libsais16_unbwt_context_wrappers_match_direct_calls() {
18541 let text = [2, 1, 3, 1, 2, 0];
18542 let mut bwt = vec![0; text.len()];
18543 let mut work = vec![0; text.len()];
18544 let primary = libsais16_bwt(&text, &mut bwt, &mut work, 0, None);
18545
18546 let mut ctx = unbwt_create_ctx().unwrap();
18547 let mut direct = vec![0; text.len()];
18548 let mut direct_work = vec![0; text.len()];
18549 let mut via_ctx = vec![0; text.len()];
18550 let mut ctx_work = vec![0; text.len()];
18551
18552 assert_eq!(
18553 libsais16_unbwt(&bwt, &mut direct, &mut direct_work, None, primary),
18554 0
18555 );
18556 assert_eq!(
18557 libsais16_unbwt_ctx(&mut ctx, &bwt, &mut via_ctx, &mut ctx_work, None, primary),
18558 0
18559 );
18560 assert_eq!(via_ctx, direct);
18561
18562 let mut aux = vec![0; 2];
18563 assert_eq!(
18564 libsais16_bwt_aux(&text, &mut bwt, &mut work, 0, None, 4, &mut aux),
18565 0
18566 );
18567 assert_eq!(
18568 libsais16_unbwt_aux(&bwt, &mut direct, &mut direct_work, None, 4, &aux),
18569 0
18570 );
18571 assert_eq!(
18572 libsais16_unbwt_aux_ctx(&mut ctx, &bwt, &mut via_ctx, &mut ctx_work, None, 4, &aux),
18573 0
18574 );
18575 assert_eq!(via_ctx, direct);
18576 }
18577
18578 #[test]
18579 fn libsais16_ctx_frequency_wrappers_match_direct_calls() {
18580 let text = [2, 1, 3, 1, 2, 0];
18581 let gsa_text = [2, 1, 0, 3, 1, 0];
18582 let mut ctx = create_ctx().unwrap();
18583
18584 let mut direct_sa = vec![0; text.len()];
18585 let mut ctx_sa = vec![0; text.len()];
18586 let mut direct_freq = vec![-1; ALPHABET_SIZE];
18587 let mut ctx_freq = vec![-1; ALPHABET_SIZE];
18588 assert_eq!(
18589 libsais16(&text, &mut direct_sa, 0, Some(&mut direct_freq)),
18590 0
18591 );
18592 assert_eq!(
18593 libsais16_ctx(&mut ctx, &text, &mut ctx_sa, 0, Some(&mut ctx_freq)),
18594 0
18595 );
18596 assert_eq!(ctx_sa, direct_sa);
18597 assert_eq!(ctx_freq, direct_freq);
18598
18599 let mut direct_gsa = vec![0; gsa_text.len()];
18600 let mut ctx_gsa = vec![0; gsa_text.len()];
18601 direct_freq.fill(-1);
18602 ctx_freq.fill(-1);
18603 assert_eq!(
18604 libsais16_gsa(&gsa_text, &mut direct_gsa, 0, Some(&mut direct_freq)),
18605 0
18606 );
18607 assert_eq!(
18608 libsais16_gsa_ctx(&mut ctx, &gsa_text, &mut ctx_gsa, 0, Some(&mut ctx_freq)),
18609 0
18610 );
18611 assert_eq!(ctx_gsa, direct_gsa);
18612 assert_eq!(ctx_freq, direct_freq);
18613
18614 let mut direct_bwt = vec![0; text.len()];
18615 let mut direct_work = vec![0; text.len()];
18616 let mut ctx_bwt = vec![0; text.len()];
18617 let mut ctx_work = vec![0; text.len()];
18618 direct_freq.fill(-1);
18619 ctx_freq.fill(-1);
18620 assert_eq!(
18621 libsais16_bwt(
18622 &text,
18623 &mut direct_bwt,
18624 &mut direct_work,
18625 0,
18626 Some(&mut direct_freq)
18627 ),
18628 libsais16_bwt_ctx(
18629 &mut ctx,
18630 &text,
18631 &mut ctx_bwt,
18632 &mut ctx_work,
18633 0,
18634 Some(&mut ctx_freq)
18635 )
18636 );
18637 assert_eq!(ctx_bwt, direct_bwt);
18638 assert_eq!(ctx_freq, direct_freq);
18639
18640 let mut direct_aux = vec![0; 2];
18641 let mut ctx_aux = vec![0; 2];
18642 direct_freq.fill(-1);
18643 ctx_freq.fill(-1);
18644 assert_eq!(
18645 libsais16_bwt_aux(
18646 &text,
18647 &mut direct_bwt,
18648 &mut direct_work,
18649 0,
18650 Some(&mut direct_freq),
18651 4,
18652 &mut direct_aux
18653 ),
18654 libsais16_bwt_aux_ctx(
18655 &mut ctx,
18656 &text,
18657 &mut ctx_bwt,
18658 &mut ctx_work,
18659 0,
18660 Some(&mut ctx_freq),
18661 4,
18662 &mut ctx_aux
18663 )
18664 );
18665 assert_eq!(ctx_bwt, direct_bwt);
18666 assert_eq!(ctx_aux, direct_aux);
18667 assert_eq!(ctx_freq, direct_freq);
18668 }
18669
18670 #[test]
18671 fn libsais16_unbwt_ctx_frequency_wrappers_match_direct_calls() {
18672 let text = [2, 1, 3, 1, 2, 0];
18673 let mut freq = vec![0; ALPHABET_SIZE];
18674 let mut bwt = vec![0; text.len()];
18675 let mut work = vec![0; text.len()];
18676 let primary = libsais16_bwt(&text, &mut bwt, &mut work, 0, Some(&mut freq));
18677 assert!(primary >= 0);
18678
18679 let mut ctx = unbwt_create_ctx().unwrap();
18680 let mut direct = vec![0; text.len()];
18681 let mut direct_work = vec![0; text.len() + 1];
18682 let mut via_ctx = vec![0; text.len()];
18683 let mut ctx_work = vec![0; text.len() + 1];
18684 assert_eq!(
18685 libsais16_unbwt(&bwt, &mut direct, &mut direct_work, Some(&freq), primary),
18686 libsais16_unbwt_ctx(
18687 &mut ctx,
18688 &bwt,
18689 &mut via_ctx,
18690 &mut ctx_work,
18691 Some(&freq),
18692 primary
18693 )
18694 );
18695 assert_eq!(via_ctx, direct);
18696 assert_eq!(via_ctx, text);
18697
18698 let mut aux = vec![0; (text.len() - 1) / 4 + 1];
18699 assert_eq!(
18700 libsais16_bwt_aux(&text, &mut bwt, &mut work, 0, Some(&mut freq), 4, &mut aux),
18701 0
18702 );
18703 direct.fill(0);
18704 direct_work.fill(0);
18705 via_ctx.fill(0);
18706 ctx_work.fill(0);
18707 assert_eq!(
18708 libsais16_unbwt_aux(&bwt, &mut direct, &mut direct_work, Some(&freq), 4, &aux),
18709 libsais16_unbwt_aux_ctx(
18710 &mut ctx,
18711 &bwt,
18712 &mut via_ctx,
18713 &mut ctx_work,
18714 Some(&freq),
18715 4,
18716 &aux
18717 )
18718 );
18719 assert_eq!(via_ctx, direct);
18720 assert_eq!(via_ctx, text);
18721 }
18722
18723 #[test]
18724 fn libsais16_omp_wrappers_match_direct_calls_and_reject_negative_threads() {
18725 let text = [2, 1, 3, 1, 2, 0];
18726 let gsa_text = [2, 1, 0, 3, 1, 0];
18727 let mut direct_sa = vec![0; text.len()];
18728 let mut omp_sa = vec![0; text.len()];
18729 assert_eq!(libsais16(&text, &mut direct_sa, 0, None), 0);
18730 assert_eq!(libsais16_omp(&text, &mut omp_sa, 0, None, 2), 0);
18731 assert_eq!(omp_sa, direct_sa);
18732 assert_eq!(libsais16_omp(&text, &mut omp_sa, 0, None, -1), -1);
18733
18734 let mut direct_gsa = vec![0; gsa_text.len()];
18735 let mut omp_gsa = vec![0; gsa_text.len()];
18736 assert_eq!(libsais16_gsa(&gsa_text, &mut direct_gsa, 0, None), 0);
18737 assert_eq!(libsais16_gsa_omp(&gsa_text, &mut omp_gsa, 0, None, 2), 0);
18738 assert_eq!(omp_gsa, direct_gsa);
18739 assert_eq!(libsais16_gsa_omp(&gsa_text, &mut omp_gsa, 0, None, -1), -1);
18740
18741 let int_text = [1, 2, 1, 0];
18742 let mut direct_int_text = int_text.to_vec();
18743 let mut omp_int_text = int_text.to_vec();
18744 let mut direct_int_sa = vec![0; int_text.len()];
18745 let mut omp_int_sa = vec![0; int_text.len()];
18746 assert_eq!(
18747 libsais16_int(&mut direct_int_text, &mut direct_int_sa, 3, 0),
18748 0
18749 );
18750 assert_eq!(
18751 libsais16_int_omp(&mut omp_int_text, &mut omp_int_sa, 3, 0, 2),
18752 0
18753 );
18754 assert_eq!(omp_int_text, direct_int_text);
18755 assert_eq!(omp_int_sa, direct_int_sa);
18756 assert_eq!(
18757 libsais16_int_omp(&mut omp_int_text, &mut omp_int_sa, 3, 0, -1),
18758 -1
18759 );
18760
18761 let mut direct_bwt = vec![0; text.len()];
18762 let mut direct_work = vec![0; text.len()];
18763 let mut omp_bwt = vec![0; text.len()];
18764 let mut omp_work = vec![0; text.len()];
18765 assert_eq!(
18766 libsais16_bwt(&text, &mut direct_bwt, &mut direct_work, 0, None),
18767 libsais16_bwt_omp(&text, &mut omp_bwt, &mut omp_work, 0, None, 2)
18768 );
18769 assert_eq!(omp_bwt, direct_bwt);
18770 assert_eq!(
18771 libsais16_bwt_omp(&text, &mut omp_bwt, &mut omp_work, 0, None, -1),
18772 -1
18773 );
18774
18775 let mut direct_aux = vec![0; 2];
18776 let mut omp_aux = vec![0; 2];
18777 assert_eq!(
18778 libsais16_bwt_aux(
18779 &text,
18780 &mut direct_bwt,
18781 &mut direct_work,
18782 0,
18783 None,
18784 4,
18785 &mut direct_aux
18786 ),
18787 libsais16_bwt_aux_omp(
18788 &text,
18789 &mut omp_bwt,
18790 &mut omp_work,
18791 0,
18792 None,
18793 4,
18794 &mut omp_aux,
18795 2
18796 )
18797 );
18798 assert_eq!(omp_bwt, direct_bwt);
18799 assert_eq!(omp_aux, direct_aux);
18800 assert_eq!(
18801 libsais16_bwt_aux_omp(
18802 &text,
18803 &mut omp_bwt,
18804 &mut omp_work,
18805 0,
18806 None,
18807 4,
18808 &mut omp_aux,
18809 -1
18810 ),
18811 -1
18812 );
18813 }
18814
18815 #[test]
18816 fn libsais16_omp_frequency_wrappers_match_direct_calls() {
18817 let text = [2, 1, 3, 1, 2, 0];
18818 let gsa_text = [2, 1, 0, 3, 1, 0];
18819 let mut direct_sa = vec![0; text.len()];
18820 let mut omp_sa = vec![0; text.len()];
18821 let mut direct_freq = vec![-1; ALPHABET_SIZE];
18822 let mut omp_freq = vec![-1; ALPHABET_SIZE];
18823 assert_eq!(
18824 libsais16(&text, &mut direct_sa, 0, Some(&mut direct_freq)),
18825 0
18826 );
18827 assert_eq!(
18828 libsais16_omp(&text, &mut omp_sa, 0, Some(&mut omp_freq), 2),
18829 0
18830 );
18831 assert_eq!(omp_sa, direct_sa);
18832 assert_eq!(omp_freq, direct_freq);
18833
18834 let mut direct_gsa = vec![0; gsa_text.len()];
18835 let mut omp_gsa = vec![0; gsa_text.len()];
18836 direct_freq.fill(-1);
18837 omp_freq.fill(-1);
18838 assert_eq!(
18839 libsais16_gsa(&gsa_text, &mut direct_gsa, 0, Some(&mut direct_freq)),
18840 0
18841 );
18842 assert_eq!(
18843 libsais16_gsa_omp(&gsa_text, &mut omp_gsa, 0, Some(&mut omp_freq), 2),
18844 0
18845 );
18846 assert_eq!(omp_gsa, direct_gsa);
18847 assert_eq!(omp_freq, direct_freq);
18848
18849 let mut direct_bwt = vec![0; text.len()];
18850 let mut direct_work = vec![0; text.len()];
18851 let mut omp_bwt = vec![0; text.len()];
18852 let mut omp_work = vec![0; text.len()];
18853 direct_freq.fill(-1);
18854 omp_freq.fill(-1);
18855 assert_eq!(
18856 libsais16_bwt(
18857 &text,
18858 &mut direct_bwt,
18859 &mut direct_work,
18860 0,
18861 Some(&mut direct_freq)
18862 ),
18863 libsais16_bwt_omp(
18864 &text,
18865 &mut omp_bwt,
18866 &mut omp_work,
18867 0,
18868 Some(&mut omp_freq),
18869 2
18870 )
18871 );
18872 assert_eq!(omp_bwt, direct_bwt);
18873 assert_eq!(omp_freq, direct_freq);
18874
18875 let mut direct_aux = vec![0; 2];
18876 let mut omp_aux = vec![0; 2];
18877 direct_freq.fill(-1);
18878 omp_freq.fill(-1);
18879 assert_eq!(
18880 libsais16_bwt_aux(
18881 &text,
18882 &mut direct_bwt,
18883 &mut direct_work,
18884 0,
18885 Some(&mut direct_freq),
18886 4,
18887 &mut direct_aux
18888 ),
18889 libsais16_bwt_aux_omp(
18890 &text,
18891 &mut omp_bwt,
18892 &mut omp_work,
18893 0,
18894 Some(&mut omp_freq),
18895 4,
18896 &mut omp_aux,
18897 2
18898 )
18899 );
18900 assert_eq!(omp_bwt, direct_bwt);
18901 assert_eq!(omp_aux, direct_aux);
18902 assert_eq!(omp_freq, direct_freq);
18903 }
18904
18905 #[test]
18906 fn libsais16_unbwt_omp_frequency_wrappers_match_direct_calls() {
18907 let text = [2, 1, 3, 1, 2, 0];
18908 let mut freq = vec![0; ALPHABET_SIZE];
18909 let mut bwt = vec![0; text.len()];
18910 let mut work = vec![0; text.len()];
18911 let primary = libsais16_bwt(&text, &mut bwt, &mut work, 0, Some(&mut freq));
18912 assert!(primary >= 0);
18913
18914 let mut direct = vec![0; text.len()];
18915 let mut direct_work = vec![0; text.len() + 1];
18916 let mut omp = vec![0; text.len()];
18917 let mut omp_work = vec![0; text.len() + 1];
18918 assert_eq!(
18919 libsais16_unbwt(&bwt, &mut direct, &mut direct_work, Some(&freq), primary),
18920 libsais16_unbwt_omp(&bwt, &mut omp, &mut omp_work, Some(&freq), primary, 2)
18921 );
18922 assert_eq!(omp, direct);
18923 assert_eq!(omp, text);
18924
18925 let mut aux = vec![0; (text.len() - 1) / 4 + 1];
18926 assert_eq!(
18927 libsais16_bwt_aux(&text, &mut bwt, &mut work, 0, Some(&mut freq), 4, &mut aux),
18928 0
18929 );
18930 direct.fill(0);
18931 direct_work.fill(0);
18932 omp.fill(0);
18933 omp_work.fill(0);
18934 assert_eq!(
18935 libsais16_unbwt_aux(&bwt, &mut direct, &mut direct_work, Some(&freq), 4, &aux),
18936 libsais16_unbwt_aux_omp(&bwt, &mut omp, &mut omp_work, Some(&freq), 4, &aux, 2)
18937 );
18938 assert_eq!(omp, direct);
18939 assert_eq!(omp, text);
18940 }
18941
18942 #[test]
18943 fn libsais16_lcp_and_unbwt_omp_wrappers_match_direct_calls() {
18944 let text = [2, 1, 3, 1, 2, 0];
18945 let mut sa = vec![0; text.len()];
18946 assert_eq!(libsais16(&text, &mut sa, 0, None), 0);
18947
18948 let mut direct_plcp = vec![0; text.len()];
18949 let mut omp_plcp = vec![0; text.len()];
18950 assert_eq!(libsais16_plcp(&text, &sa, &mut direct_plcp), 0);
18951 assert_eq!(libsais16_plcp_omp(&text, &sa, &mut omp_plcp, 2), 0);
18952 assert_eq!(omp_plcp, direct_plcp);
18953 assert_eq!(libsais16_plcp_omp(&text, &sa, &mut omp_plcp, -1), -1);
18954
18955 let gsa_text = [2, 1, 0, 1, 2, 0];
18956 let mut gsa = vec![0; gsa_text.len()];
18957 assert_eq!(libsais16_gsa(&gsa_text, &mut gsa, 0, None), 0);
18958 let mut direct_gsa_plcp = vec![0; gsa_text.len()];
18959 let mut omp_gsa_plcp = vec![0; gsa_text.len()];
18960 assert_eq!(libsais16_plcp_gsa(&gsa_text, &gsa, &mut direct_gsa_plcp), 0);
18961 assert_eq!(
18962 libsais16_plcp_gsa_omp(&gsa_text, &gsa, &mut omp_gsa_plcp, 2),
18963 0
18964 );
18965 assert_eq!(omp_gsa_plcp, direct_gsa_plcp);
18966 assert_eq!(
18967 libsais16_plcp_gsa_omp(&gsa_text, &gsa, &mut omp_gsa_plcp, -1),
18968 -1
18969 );
18970
18971 let mut direct_lcp = vec![0; text.len()];
18972 let mut omp_lcp = vec![0; text.len()];
18973 assert_eq!(libsais16_lcp(&direct_plcp, &sa, &mut direct_lcp), 0);
18974 assert_eq!(libsais16_lcp_omp(&direct_plcp, &sa, &mut omp_lcp, 2), 0);
18975 assert_eq!(omp_lcp, direct_lcp);
18976 assert_eq!(libsais16_lcp_omp(&direct_plcp, &sa, &mut omp_lcp, -1), -1);
18977
18978 let mut bwt = vec![0; text.len()];
18979 let mut work = vec![0; text.len()];
18980 let primary = libsais16_bwt(&text, &mut bwt, &mut work, 0, None);
18981 let mut direct = vec![0; text.len()];
18982 let mut omp = vec![0; text.len()];
18983 let mut direct_work = vec![0; text.len()];
18984 let mut omp_work = vec![0; text.len()];
18985 assert_eq!(
18986 libsais16_unbwt(&bwt, &mut direct, &mut direct_work, None, primary),
18987 0
18988 );
18989 assert_eq!(
18990 libsais16_unbwt_omp(&bwt, &mut omp, &mut omp_work, None, primary, 2),
18991 0
18992 );
18993 assert_eq!(omp, direct);
18994 assert_eq!(
18995 libsais16_unbwt_omp(&bwt, &mut omp, &mut omp_work, None, primary, -1),
18996 -1
18997 );
18998 }
18999}