1use std::mem;
8
9pub type SaSint = i64;
10pub type SaUint = u64;
11
12pub const ALPHABET_SIZE: usize = 1usize << 16;
13const SAINT_MAX: SaSint = SaSint::MAX;
14const SAINT_MIN: SaSint = SaSint::MIN;
15const SAINT_BIT: u32 = 64;
16const SUFFIX_GROUP_BIT: u32 = SAINT_BIT - 1;
17const SUFFIX_GROUP_MARKER: SaSint = 1_i64 << (SUFFIX_GROUP_BIT - 1);
18const LIBSAIS_FLAGS_BWT: SaSint = 1;
19const LIBSAIS_FLAGS_GSA: SaSint = 2;
20const LIBSAIS_LOCAL_BUFFER_SIZE: usize = 2000;
21const UNBWT_FASTBITS: usize = 17;
22const PER_THREAD_CACHE_SIZE: usize = 2_097_184;
23
24#[repr(C)]
25#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
26struct ThreadCache {
27 symbol: SaSint,
28 index: SaSint,
29}
30
31#[derive(Clone, Debug, Default, PartialEq, Eq)]
32pub struct ThreadState {
33 position: SaSint,
34 m: SaSint,
35 last_lms_suffix: SaSint,
36 count: SaSint,
37 buckets: Vec<SaSint>,
38 cache: Vec<ThreadCache>,
39 cache_entries: usize,
40}
41
42#[derive(Clone, Debug, Default, PartialEq, Eq)]
43pub struct Context {
44 buckets: Vec<SaSint>,
45 thread_state: Option<Vec<ThreadState>>,
46 threads: SaSint,
47}
48
49#[derive(Clone, Debug, Default, PartialEq, Eq)]
50pub struct UnbwtContext {
51 bucket2: Vec<usize>,
52 fastbits: Vec<u16>,
53 buckets: Option<Vec<usize>>,
54 threads: SaSint,
55}
56
57pub fn create_ctx() -> Option<Context> {
63 create_ctx_main(1)
64}
65
66pub fn create_ctx_omp(threads: SaSint) -> Option<Context> {
74 if threads < 0 {
75 None
76 } else {
77 create_ctx_main(normalize_threads(threads))
78 }
79}
80
81pub fn free_ctx(_ctx: Context) {}
83
84pub fn unbwt_create_ctx() -> Option<UnbwtContext> {
90 unbwt_create_ctx_main(1)
91}
92
93pub fn unbwt_create_ctx_omp(threads: SaSint) -> Option<UnbwtContext> {
101 if threads < 0 {
102 None
103 } else {
104 unbwt_create_ctx_main(normalize_threads(threads))
105 }
106}
107
108pub fn unbwt_free_ctx(_ctx: UnbwtContext) {}
110
111fn normalize_threads(threads: SaSint) -> SaSint {
112 if threads > 0 {
113 threads
114 } else {
115 1
116 }
117}
118
119fn align_up(value: usize, alignment: usize) -> usize {
120 (value + (alignment - 1)) & !(alignment - 1)
121}
122
123fn alloc_thread_state(threads: SaSint) -> Option<Vec<ThreadState>> {
124 let threads = usize::try_from(threads).ok()?;
125 let mut thread_state = Vec::with_capacity(threads);
126 for _ in 0..threads {
127 thread_state.push(ThreadState {
128 position: 0,
129 m: 0,
130 last_lms_suffix: 0,
131 count: 0,
132 buckets: vec![0; 4 * ALPHABET_SIZE],
133 cache: vec![ThreadCache::default(); PER_THREAD_CACHE_SIZE],
134 cache_entries: PER_THREAD_CACHE_SIZE,
135 });
136 }
137 Some(thread_state)
138}
139
140fn create_ctx_main(threads: SaSint) -> Option<Context> {
141 let buckets = vec![0; 8 * ALPHABET_SIZE];
142 let thread_state = if threads > 1 {
143 Some(alloc_thread_state(threads)?)
144 } else {
145 None
146 };
147
148 Some(Context {
149 buckets,
150 thread_state,
151 threads,
152 })
153}
154
155fn unbwt_create_ctx_main(threads: SaSint) -> Option<UnbwtContext> {
156 let bucket2 = vec![0; ALPHABET_SIZE];
157 let fastbits = vec![0; 1 + (1 << UNBWT_FASTBITS)];
158 let buckets = if threads > 1 {
159 Some(vec![0; usize::try_from(threads).ok()? * ALPHABET_SIZE])
160 } else {
161 None
162 };
163
164 Some(UnbwtContext {
165 bucket2,
166 fastbits,
167 buckets,
168 threads,
169 })
170}
171
172fn fill_freq(t: &[u16], freq: Option<&mut [SaSint]>) {
173 if let Some(freq) = freq {
174 freq[..ALPHABET_SIZE].fill(0);
175 for &symbol in t {
176 freq[symbol as usize] += 1;
177 }
178 }
179}
180
181#[allow(dead_code)]
182fn buckets_index4(c: usize, s: usize) -> usize {
183 (c << 2) + s
184}
185
186#[allow(dead_code)]
187fn buckets_index2(c: usize, s: usize) -> usize {
188 (c << 1) + s
189}
190
191#[allow(dead_code)]
192fn place_cached_suffixes(
193 sa: &mut [SaSint],
194 cache: &[ThreadCache],
195 block_start: SaSint,
196 block_size: SaSint,
197) {
198 let start = usize::try_from(block_start).expect("block_start must be non-negative");
199 let len = usize::try_from(block_size).expect("block_size must be non-negative");
200 let entries = if cache.len() >= start + len {
201 &cache[start..start + len]
202 } else {
203 &cache[..len]
204 };
205
206 for entry in entries {
207 sa[entry.symbol as usize] = entry.index;
208 }
209}
210
211#[allow(dead_code)]
212fn compact_and_place_cached_suffixes(
213 sa: &mut [SaSint],
214 cache: &mut [ThreadCache],
215 block_start: SaSint,
216 block_size: SaSint,
217) {
218 let start = usize::try_from(block_start).expect("block_start must be non-negative");
219 let len = usize::try_from(block_size).expect("block_size must be non-negative");
220 let read_start = if cache.len() >= start + len { start } else { 0 };
221 let read_end = read_start + len;
222
223 let mut write = read_start;
224 for read in read_start..read_end {
225 let entry = cache[read];
226 if entry.symbol >= 0 {
227 cache[write] = entry;
228 write += 1;
229 }
230 }
231 place_cached_suffixes(sa, cache, block_start, (write - read_start) as SaSint);
232}
233
234#[allow(dead_code)]
235fn count_negative_marked_suffixes(
236 sa: &[SaSint],
237 block_start: SaSint,
238 block_size: SaSint,
239) -> SaSint {
240 let start = block_start as usize;
241 let end = start + block_size as usize;
242 sa[start..end].iter().filter(|&&value| value < 0).count() as SaSint
243}
244
245#[allow(dead_code)]
246fn count_zero_marked_suffixes(sa: &[SaSint], block_start: SaSint, block_size: SaSint) -> SaSint {
247 let start = block_start as usize;
248 let end = start + block_size as usize;
249 sa[start..end].iter().filter(|&&value| value == 0).count() as SaSint
250}
251
252#[allow(dead_code)]
253fn accumulate_counts_s32_n(
254 buckets: &mut [SaSint],
255 bucket00: usize,
256 bucket_size: usize,
257 bucket_stride: usize,
258 num_buckets: usize,
259) {
260 for s in 0..bucket_size {
261 let mut sum = buckets[bucket00 + s];
262 for bucket in 1..num_buckets {
263 sum += buckets[bucket00 - bucket * bucket_stride + s];
264 }
265 buckets[bucket00 + s] = sum;
266 }
267}
268
269#[allow(dead_code)]
270fn accumulate_counts_s32_2(
271 buckets: &mut [SaSint],
272 bucket00: usize,
273 bucket_size: usize,
274 bucket_stride: usize,
275) {
276 accumulate_counts_s32_n(buckets, bucket00, bucket_size, bucket_stride, 2);
277}
278
279#[allow(dead_code)]
280fn accumulate_counts_s32_3(
281 buckets: &mut [SaSint],
282 bucket00: usize,
283 bucket_size: usize,
284 bucket_stride: usize,
285) {
286 accumulate_counts_s32_n(buckets, bucket00, bucket_size, bucket_stride, 3);
287}
288
289#[allow(dead_code)]
290fn accumulate_counts_s32_4(
291 buckets: &mut [SaSint],
292 bucket00: usize,
293 bucket_size: usize,
294 bucket_stride: usize,
295) {
296 accumulate_counts_s32_n(buckets, bucket00, bucket_size, bucket_stride, 4);
297}
298
299#[allow(dead_code)]
300fn accumulate_counts_s32_5(
301 buckets: &mut [SaSint],
302 bucket00: usize,
303 bucket_size: usize,
304 bucket_stride: usize,
305) {
306 accumulate_counts_s32_n(buckets, bucket00, bucket_size, bucket_stride, 5);
307}
308
309#[allow(dead_code)]
310fn accumulate_counts_s32_6(
311 buckets: &mut [SaSint],
312 bucket00: usize,
313 bucket_size: usize,
314 bucket_stride: usize,
315) {
316 accumulate_counts_s32_n(buckets, bucket00, bucket_size, bucket_stride, 6);
317}
318
319#[allow(dead_code)]
320fn accumulate_counts_s32_7(
321 buckets: &mut [SaSint],
322 bucket00: usize,
323 bucket_size: usize,
324 bucket_stride: usize,
325) {
326 accumulate_counts_s32_n(buckets, bucket00, bucket_size, bucket_stride, 7);
327}
328
329#[allow(dead_code)]
330fn accumulate_counts_s32_8(
331 buckets: &mut [SaSint],
332 bucket00: usize,
333 bucket_size: usize,
334 bucket_stride: usize,
335) {
336 accumulate_counts_s32_n(buckets, bucket00, bucket_size, bucket_stride, 8);
337}
338
339#[allow(dead_code)]
340fn accumulate_counts_s32_9(
341 buckets: &mut [SaSint],
342 bucket00: usize,
343 bucket_size: usize,
344 bucket_stride: usize,
345) {
346 accumulate_counts_s32_n(buckets, bucket00, bucket_size, bucket_stride, 9);
347}
348
349#[allow(dead_code)]
350fn accumulate_counts_s32(
351 buckets: &mut [SaSint],
352 bucket00: usize,
353 bucket_size: usize,
354 bucket_stride: usize,
355 mut num_buckets: usize,
356) {
357 while num_buckets >= 9 {
358 accumulate_counts_s32_9(
359 buckets,
360 bucket00 - (num_buckets - 9) * bucket_stride,
361 bucket_size,
362 bucket_stride,
363 );
364 num_buckets -= 8;
365 }
366
367 match num_buckets {
368 2 => accumulate_counts_s32_2(buckets, bucket00, bucket_size, bucket_stride),
369 3 => accumulate_counts_s32_3(buckets, bucket00, bucket_size, bucket_stride),
370 4 => accumulate_counts_s32_4(buckets, bucket00, bucket_size, bucket_stride),
371 5 => accumulate_counts_s32_5(buckets, bucket00, bucket_size, bucket_stride),
372 6 => accumulate_counts_s32_6(buckets, bucket00, bucket_size, bucket_stride),
373 7 => accumulate_counts_s32_7(buckets, bucket00, bucket_size, bucket_stride),
374 8 => accumulate_counts_s32_8(buckets, bucket00, bucket_size, bucket_stride),
375 _ => {}
376 }
377}
378
379#[allow(dead_code)]
380fn flip_suffix_markers_omp(sa: &mut [SaSint], l: SaSint, threads: SaSint) {
381 let len = usize::try_from(l).expect("l must be non-negative");
382 let omp_num_threads = if threads > 1 && l >= 65_536 {
383 usize::try_from(threads).expect("threads must be non-negative")
384 } else {
385 1
386 };
387 let omp_block_stride = (len / omp_num_threads) & !15usize;
388 for omp_thread_num in 0..omp_num_threads {
389 let omp_block_start = omp_thread_num * omp_block_stride;
390 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
391 omp_block_stride
392 } else {
393 len - omp_block_start
394 };
395 for value in &mut sa[omp_block_start..omp_block_start + omp_block_size] {
396 *value ^= SAINT_MIN;
397 }
398 }
399}
400
401#[allow(dead_code)]
402fn gather_lms_suffixes_32s(t: &[SaSint], sa: &mut [SaSint], n: SaSint) -> SaSint {
403 let mut i = n - 2;
404 let mut m = n - 1;
405 let mut f0 = 1usize;
406 let mut f1: usize;
407 let mut c0 = t[(n - 1) as usize] as isize;
408 let mut c1: isize;
409
410 while i >= 3 {
411 c1 = t[i as usize] as isize;
412 f1 = usize::from(c1 > c0 - f0 as isize);
413 sa[m as usize] = i + 1;
414 m -= (f1 & !f0) as SaSint;
415
416 c0 = t[(i - 1) as usize] as isize;
417 f0 = usize::from(c0 > c1 - f1 as isize);
418 sa[m as usize] = i;
419 m -= (f0 & !f1) as SaSint;
420
421 c1 = t[(i - 2) as usize] as isize;
422 f1 = usize::from(c1 > c0 - f0 as isize);
423 sa[m as usize] = i - 1;
424 m -= (f1 & !f0) as SaSint;
425
426 c0 = t[(i - 3) as usize] as isize;
427 f0 = usize::from(c0 > c1 - f1 as isize);
428 sa[m as usize] = i - 2;
429 m -= (f0 & !f1) as SaSint;
430
431 i -= 4;
432 }
433
434 while i >= 0 {
435 c1 = c0;
436 c0 = t[i as usize] as isize;
437 f1 = f0;
438 f0 = usize::from(c0 > c1 - f1 as isize);
439 sa[m as usize] = i + 1;
440 m -= (f0 & !f1) as SaSint;
441 i -= 1;
442 }
443
444 n - 1 - m
445}
446
447#[allow(dead_code)]
448fn gather_compacted_lms_suffixes_32s(t: &[SaSint], sa: &mut [SaSint], n: SaSint) -> SaSint {
449 let mut i = n - 2;
450 let mut m = n - 1;
451 let mut f0 = 1usize;
452 let mut f1: usize;
453 let mut c0 = t[(n - 1) as usize] as isize;
454 let mut c1: isize;
455
456 while i >= 3 {
457 c1 = t[i as usize] as isize;
458 f1 = usize::from(c1 > c0 - f0 as isize);
459 sa[m as usize] = i + 1;
460 m -= (f1 & !f0 & usize::from(c0 >= 0)) as SaSint;
461
462 c0 = t[(i - 1) as usize] as isize;
463 f0 = usize::from(c0 > c1 - f1 as isize);
464 sa[m as usize] = i;
465 m -= (f0 & !f1 & usize::from(c1 >= 0)) as SaSint;
466
467 c1 = t[(i - 2) as usize] as isize;
468 f1 = usize::from(c1 > c0 - f0 as isize);
469 sa[m as usize] = i - 1;
470 m -= (f1 & !f0 & usize::from(c0 >= 0)) as SaSint;
471
472 c0 = t[(i - 3) as usize] as isize;
473 f0 = usize::from(c0 > c1 - f1 as isize);
474 sa[m as usize] = i - 2;
475 m -= (f0 & !f1 & usize::from(c1 >= 0)) as SaSint;
476
477 i -= 4;
478 }
479
480 while i >= 0 {
481 c1 = c0;
482 c0 = t[i as usize] as isize;
483 f1 = f0;
484 f0 = usize::from(c0 > c1 - f1 as isize);
485 sa[m as usize] = i + 1;
486 m -= (f0 & !f1 & usize::from(c1 >= 0)) as SaSint;
487 i -= 1;
488 }
489
490 n - 1 - m
491}
492
493#[allow(dead_code)]
494fn count_lms_suffixes_32s_4k(t: &[SaSint], n: SaSint, k: SaSint, buckets: &mut [SaSint]) {
495 buckets[..4 * k as usize].fill(0);
496 let mut i = n - 2;
497 let mut f0 = 1usize;
498 let mut f1: usize;
499 let mut c0 = t[(n - 1) as usize] as isize;
500 let mut c1: isize;
501
502 while i >= 3 {
503 c1 = t[i as usize] as isize;
504 f1 = usize::from(c1 > c0 - f0 as isize);
505 buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
506
507 c0 = t[(i - 1) as usize] as isize;
508 f0 = usize::from(c0 > c1 - f1 as isize);
509 buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
510
511 c1 = t[(i - 2) as usize] as isize;
512 f1 = usize::from(c1 > c0 - f0 as isize);
513 buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
514
515 c0 = t[(i - 3) as usize] as isize;
516 f0 = usize::from(c0 > c1 - f1 as isize);
517 buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
518
519 i -= 4;
520 }
521
522 while i >= 0 {
523 c1 = c0;
524 c0 = t[i as usize] as isize;
525 f1 = f0;
526 f0 = usize::from(c0 > c1 - f1 as isize);
527 buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
528 i -= 1;
529 }
530
531 buckets[buckets_index4(c0 as usize, f0 + f0)] += 1;
532}
533
534#[allow(dead_code)]
535fn count_lms_suffixes_32s_2k(t: &[SaSint], n: SaSint, k: SaSint, buckets: &mut [SaSint]) {
536 buckets[..2 * k as usize].fill(0);
537 let mut i = n - 2;
538 let mut f0 = 1usize;
539 let mut f1: usize;
540 let mut c0 = t[(n - 1) as usize] as isize;
541 let mut c1: isize;
542
543 while i >= 3 {
544 c1 = t[i as usize] as isize;
545 f1 = usize::from(c1 > c0 - f0 as isize);
546 buckets[buckets_index2(c0 as usize, f1 & !f0)] += 1;
547
548 c0 = t[(i - 1) as usize] as isize;
549 f0 = usize::from(c0 > c1 - f1 as isize);
550 buckets[buckets_index2(c1 as usize, f0 & !f1)] += 1;
551
552 c1 = t[(i - 2) as usize] as isize;
553 f1 = usize::from(c1 > c0 - f0 as isize);
554 buckets[buckets_index2(c0 as usize, f1 & !f0)] += 1;
555
556 c0 = t[(i - 3) as usize] as isize;
557 f0 = usize::from(c0 > c1 - f1 as isize);
558 buckets[buckets_index2(c1 as usize, f0 & !f1)] += 1;
559
560 i -= 4;
561 }
562
563 while i >= 0 {
564 c1 = c0;
565 c0 = t[i as usize] as isize;
566 f1 = f0;
567 f0 = usize::from(c0 > c1 - f1 as isize);
568 buckets[buckets_index2(c1 as usize, f0 & !f1)] += 1;
569 i -= 1;
570 }
571
572 buckets[buckets_index2(c0 as usize, 0)] += 1;
573}
574
575#[allow(dead_code)]
576fn count_compacted_lms_suffixes_32s_2k(t: &[SaSint], n: SaSint, k: SaSint, buckets: &mut [SaSint]) {
577 buckets[..2 * k as usize].fill(0);
578 let mut i = n - 2;
579 let mut f0 = 1usize;
580 let mut f1: usize;
581 let mut c0 = t[(n - 1) as usize] as isize;
582 let mut c1: isize;
583
584 while i >= 3 {
585 c1 = t[i as usize] as isize;
586 f1 = usize::from(c1 > c0 - f0 as isize);
587 buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
588
589 c0 = t[(i - 1) as usize] as isize;
590 f0 = usize::from(c0 > c1 - f1 as isize);
591 buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
592
593 c1 = t[(i - 2) as usize] as isize;
594 f1 = usize::from(c1 > c0 - f0 as isize);
595 buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
596
597 c0 = t[(i - 3) as usize] as isize;
598 f0 = usize::from(c0 > c1 - f1 as isize);
599 buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
600
601 i -= 4;
602 }
603
604 while i >= 0 {
605 c1 = c0;
606 c0 = t[i as usize] as isize;
607 f1 = f0;
608 f0 = usize::from(c0 > c1 - f1 as isize);
609 buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
610 i -= 1;
611 }
612
613 buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, 0)] += 1;
614}
615
616#[allow(dead_code)]
617fn get_bucket_stride(free_space: SaSint, bucket_size: SaSint, num_buckets: SaSint) -> SaSint {
618 let bucket_size_1024 = (bucket_size + 1023) & !1023;
619 if free_space / (num_buckets - 1) >= bucket_size_1024 {
620 return bucket_size_1024;
621 }
622 let bucket_size_16 = (bucket_size + 15) & !15;
623 if free_space / (num_buckets - 1) >= bucket_size_16 {
624 return bucket_size_16;
625 }
626 bucket_size
627}
628
629#[allow(dead_code)]
630fn count_and_gather_lms_suffixes_32s_4k(
631 t: &[SaSint],
632 sa: &mut [SaSint],
633 n: SaSint,
634 k: SaSint,
635 buckets: &mut [SaSint],
636 omp_block_start: isize,
637 omp_block_size: isize,
638) -> SaSint {
639 buckets[..4 * k as usize].fill(0);
640 let mut m = omp_block_start + omp_block_size - 1;
641
642 if omp_block_size > 0 {
643 let mut j = m + 1;
644 let mut c0 = t[m as usize] as isize;
645 let mut c1 = -1isize;
646 while j < n as isize {
647 c1 = t[j as usize] as isize;
648 if c1 != c0 {
649 break;
650 }
651 j += 1;
652 }
653
654 let mut f0 = usize::from(c0 >= c1);
655 let mut f1: usize;
656 let mut i = m - 1;
657 j = omp_block_start + 64 + 3;
658 while i >= j {
659 c1 = t[i as usize] as isize;
660 f1 = usize::from(c1 > c0 - f0 as isize);
661 sa[m as usize] = (i + 1) as SaSint;
662 m -= (f1 & !f0) as isize;
663 buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
664
665 c0 = t[(i - 1) as usize] as isize;
666 f0 = usize::from(c0 > c1 - f1 as isize);
667 sa[m as usize] = i as SaSint;
668 m -= (f0 & !f1) as isize;
669 buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
670
671 c1 = t[(i - 2) as usize] as isize;
672 f1 = usize::from(c1 > c0 - f0 as isize);
673 sa[m as usize] = (i - 1) as SaSint;
674 m -= (f1 & !f0) as isize;
675 buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
676
677 c0 = t[(i - 3) as usize] as isize;
678 f0 = usize::from(c0 > c1 - f1 as isize);
679 sa[m as usize] = (i - 2) as SaSint;
680 m -= (f0 & !f1) as isize;
681 buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
682
683 i -= 4;
684 }
685
686 j -= 64 + 3;
687 while i >= j {
688 c1 = c0;
689 c0 = t[i as usize] as isize;
690 f1 = f0;
691 f0 = usize::from(c0 > c1 - f1 as isize);
692 sa[m as usize] = (i + 1) as SaSint;
693 m -= (f0 & !f1) as isize;
694 buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
695 i -= 1;
696 }
697
698 c1 = if i >= 0 { t[i as usize] as isize } else { -1 };
699 f1 = usize::from(c1 > c0 - f0 as isize);
700 sa[m as usize] = (i + 1) as SaSint;
701 m -= (f1 & !f0) as isize;
702 buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
703 }
704
705 (omp_block_start + omp_block_size - 1 - m) as SaSint
706}
707
708#[allow(dead_code)]
709fn count_and_gather_lms_suffixes_32s_2k(
710 t: &[SaSint],
711 sa: &mut [SaSint],
712 n: SaSint,
713 k: SaSint,
714 buckets: &mut [SaSint],
715 omp_block_start: isize,
716 omp_block_size: isize,
717) -> SaSint {
718 buckets[..2 * k as usize].fill(0);
719 let mut m = omp_block_start + omp_block_size - 1;
720
721 if omp_block_size > 0 {
722 let mut j = m + 1;
723 let mut c0 = t[m as usize] as isize;
724 let mut c1 = -1isize;
725 while j < n as isize {
726 c1 = t[j as usize] as isize;
727 if c1 != c0 {
728 break;
729 }
730 j += 1;
731 }
732
733 let mut f0 = usize::from(c0 >= c1);
734 let mut f1: usize;
735 let mut i = m - 1;
736 j = omp_block_start + 64 + 3;
737 while i >= j {
738 c1 = t[i as usize] as isize;
739 f1 = usize::from(c1 > c0 - f0 as isize);
740 sa[m as usize] = (i + 1) as SaSint;
741 m -= (f1 & !f0) as isize;
742 buckets[buckets_index2(c0 as usize, f1 & !f0)] += 1;
743
744 c0 = t[(i - 1) as usize] as isize;
745 f0 = usize::from(c0 > c1 - f1 as isize);
746 sa[m as usize] = i as SaSint;
747 m -= (f0 & !f1) as isize;
748 buckets[buckets_index2(c1 as usize, f0 & !f1)] += 1;
749
750 c1 = t[(i - 2) as usize] as isize;
751 f1 = usize::from(c1 > c0 - f0 as isize);
752 sa[m as usize] = (i - 1) as SaSint;
753 m -= (f1 & !f0) as isize;
754 buckets[buckets_index2(c0 as usize, f1 & !f0)] += 1;
755
756 c0 = t[(i - 3) as usize] as isize;
757 f0 = usize::from(c0 > c1 - f1 as isize);
758 sa[m as usize] = (i - 2) as SaSint;
759 m -= (f0 & !f1) as isize;
760 buckets[buckets_index2(c1 as usize, f0 & !f1)] += 1;
761
762 i -= 4;
763 }
764
765 j -= 64 + 3;
766 while i >= j {
767 c1 = c0;
768 c0 = t[i as usize] as isize;
769 f1 = f0;
770 f0 = usize::from(c0 > c1 - f1 as isize);
771 sa[m as usize] = (i + 1) as SaSint;
772 m -= (f0 & !f1) as isize;
773 buckets[buckets_index2(c1 as usize, f0 & !f1)] += 1;
774 i -= 1;
775 }
776
777 c1 = if i >= 0 { t[i as usize] as isize } else { -1 };
778 f1 = usize::from(c1 > c0 - f0 as isize);
779 sa[m as usize] = (i + 1) as SaSint;
780 m -= (f1 & !f0) as isize;
781 buckets[buckets_index2(c0 as usize, f1 & !f0)] += 1;
782 }
783
784 (omp_block_start + omp_block_size - 1 - m) as SaSint
785}
786
787#[allow(dead_code)]
788fn count_and_gather_compacted_lms_suffixes_32s_2k(
789 t: &[SaSint],
790 sa: &mut [SaSint],
791 n: SaSint,
792 k: SaSint,
793 buckets: &mut [SaSint],
794 omp_block_start: isize,
795 omp_block_size: isize,
796) -> SaSint {
797 buckets[..2 * k as usize].fill(0);
798 let mut m = omp_block_start + omp_block_size - 1;
799
800 if omp_block_size > 0 {
801 let mut j = m + 1;
802 let mut c0 = t[m as usize] as isize;
803 let mut c1 = -1isize;
804 while j < n as isize {
805 c1 = t[j as usize] as isize;
806 if c1 != c0 {
807 break;
808 }
809 j += 1;
810 }
811
812 let mut f0 = usize::from(c0 >= c1);
813 let mut f1: usize;
814 let mut i = m - 1;
815 j = omp_block_start + 64 + 3;
816 while i >= j {
817 c1 = t[i as usize] as isize;
818 f1 = usize::from(c1 > c0 - f0 as isize);
819 sa[m as usize] = (i + 1) as SaSint;
820 m -= (f1 & !f0 & usize::from(c0 >= 0)) as isize;
821 buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
822
823 c0 = t[(i - 1) as usize] as isize;
824 f0 = usize::from(c0 > c1 - f1 as isize);
825 sa[m as usize] = i as SaSint;
826 m -= (f0 & !f1 & usize::from(c1 >= 0)) as isize;
827 buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
828
829 c1 = t[(i - 2) as usize] as isize;
830 f1 = usize::from(c1 > c0 - f0 as isize);
831 sa[m as usize] = (i - 1) as SaSint;
832 m -= (f1 & !f0 & usize::from(c0 >= 0)) as isize;
833 buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
834
835 c0 = t[(i - 3) as usize] as isize;
836 f0 = usize::from(c0 > c1 - f1 as isize);
837 sa[m as usize] = (i - 2) as SaSint;
838 m -= (f0 & !f1 & usize::from(c1 >= 0)) as isize;
839 buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
840
841 i -= 4;
842 }
843
844 j -= 64 + 3;
845 while i >= j {
846 c1 = c0;
847 c0 = t[i as usize] as isize;
848 f1 = f0;
849 f0 = usize::from(c0 > c1 - f1 as isize);
850 sa[m as usize] = (i + 1) as SaSint;
851 m -= (f0 & !f1 & usize::from(c1 >= 0)) as isize;
852 buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
853 i -= 1;
854 }
855
856 c1 = if i >= 0 { t[i as usize] as isize } else { -1 };
857 f1 = usize::from(c1 > c0 - f0 as isize);
858 sa[m as usize] = (i + 1) as SaSint;
859 m -= (f1 & !f0 & usize::from(c0 >= 0)) as isize;
860 buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
861 }
862
863 (omp_block_start + omp_block_size - 1 - m) as SaSint
864}
865
866#[allow(dead_code)]
867fn count_and_gather_lms_suffixes_32s_4k_fs_omp(
868 t: &[SaSint],
869 sa: &mut [SaSint],
870 n: SaSint,
871 k: SaSint,
872 buckets: &mut [SaSint],
873 local_buckets: SaSint,
874 threads: SaSint,
875 thread_state: &mut [ThreadState],
876) -> SaSint {
877 if threads == 1 || n < 65_536 {
878 return count_and_gather_lms_suffixes_32s_4k(t, sa, n, k, buckets, 0, n as isize);
879 }
880
881 let thread_count = threads as usize;
882 let n_usize = n as usize;
883 let bucket_size = 4 * k as usize;
884 let block_stride = (n / threads) & !15;
885 let free_space = if local_buckets != 0 {
886 LIBSAIS_LOCAL_BUFFER_SIZE as SaSint
887 } else {
888 buckets.len() as SaSint
889 };
890 let bucket_stride = get_bucket_stride(free_space, 4 * k, threads) as usize;
891 let workspace_len = bucket_size + bucket_stride * thread_count.saturating_sub(1);
892 let mut workspace = vec![0; workspace_len];
893
894 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
895 let block_start = thread as SaSint * block_stride;
896 let block_size = if thread + 1 < thread_count {
897 block_stride
898 } else {
899 n - block_start
900 };
901 let workspace_end = workspace_len - thread * bucket_stride;
902 let workspace_start = workspace_end - bucket_size;
903 state.count = count_and_gather_lms_suffixes_32s_4k(
904 t,
905 sa,
906 n,
907 k,
908 &mut workspace[workspace_start..workspace_end],
909 block_start as isize,
910 block_size as isize,
911 );
912 state.position = block_start + block_size;
913 }
914
915 let mut m = 0usize;
916 for thread in (0..thread_count).rev() {
917 let count =
918 usize::try_from(thread_state[thread].count).expect("count must be non-negative");
919 m += count;
920 if thread + 1 != thread_count && count > 0 {
921 let src_end = usize::try_from(thread_state[thread].position)
922 .expect("position must be non-negative");
923 let src_start = src_end - count;
924 let dst_start = n_usize - m;
925 sa.copy_within(src_start..src_end, dst_start);
926 }
927 }
928
929 let accumulation_threads = thread_count - 1;
930 let block_stride = (bucket_size / accumulation_threads) & !15usize;
931 for thread in 0..accumulation_threads {
932 let block_start = thread * block_stride;
933 let block_size = if thread + 1 < accumulation_threads {
934 block_stride
935 } else {
936 bucket_size - block_start
937 };
938 accumulate_counts_s32(
939 &mut workspace,
940 block_start,
941 block_size,
942 bucket_stride,
943 accumulation_threads + 1,
944 );
945 }
946
947 buckets[..bucket_size].copy_from_slice(&workspace[..bucket_size]);
948 m as SaSint
949}
950
951#[allow(dead_code)]
952fn count_and_gather_lms_suffixes_32s_2k_fs_omp(
953 t: &[SaSint],
954 sa: &mut [SaSint],
955 n: SaSint,
956 k: SaSint,
957 buckets: &mut [SaSint],
958 local_buckets: SaSint,
959 threads: SaSint,
960 thread_state: &mut [ThreadState],
961) -> SaSint {
962 if threads == 1 || n < 65_536 {
963 return count_and_gather_lms_suffixes_32s_2k(t, sa, n, k, buckets, 0, n as isize);
964 }
965
966 let thread_count = threads as usize;
967 let n_usize = n as usize;
968 let bucket_size = 2 * k as usize;
969 let block_stride = (n / threads) & !15;
970 let free_space = if local_buckets != 0 {
971 LIBSAIS_LOCAL_BUFFER_SIZE as SaSint
972 } else {
973 buckets.len() as SaSint
974 };
975 let bucket_stride = get_bucket_stride(free_space, 2 * k, threads) as usize;
976 let workspace_len = bucket_size + bucket_stride * thread_count.saturating_sub(1);
977 let mut workspace = vec![0; workspace_len];
978
979 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
980 let block_start = thread as SaSint * block_stride;
981 let block_size = if thread + 1 < thread_count {
982 block_stride
983 } else {
984 n - block_start
985 };
986 let workspace_end = workspace_len - thread * bucket_stride;
987 let workspace_start = workspace_end - bucket_size;
988 state.count = count_and_gather_lms_suffixes_32s_2k(
989 t,
990 sa,
991 n,
992 k,
993 &mut workspace[workspace_start..workspace_end],
994 block_start as isize,
995 block_size as isize,
996 );
997 state.position = block_start + block_size;
998 }
999
1000 let mut m = 0usize;
1001 for thread in (0..thread_count).rev() {
1002 let count =
1003 usize::try_from(thread_state[thread].count).expect("count must be non-negative");
1004 m += count;
1005 if thread + 1 != thread_count && count > 0 {
1006 let src_end = usize::try_from(thread_state[thread].position)
1007 .expect("position must be non-negative");
1008 let src_start = src_end - count;
1009 let dst_start = n_usize - m;
1010 sa.copy_within(src_start..src_end, dst_start);
1011 }
1012 }
1013
1014 let accumulation_threads = thread_count - 1;
1015 let block_stride = (bucket_size / accumulation_threads) & !15usize;
1016 for thread in 0..accumulation_threads {
1017 let block_start = thread * block_stride;
1018 let block_size = if thread + 1 < accumulation_threads {
1019 block_stride
1020 } else {
1021 bucket_size - block_start
1022 };
1023 accumulate_counts_s32(
1024 &mut workspace,
1025 block_start,
1026 block_size,
1027 bucket_stride,
1028 accumulation_threads + 1,
1029 );
1030 }
1031
1032 buckets[..bucket_size].copy_from_slice(&workspace[..bucket_size]);
1033 m as SaSint
1034}
1035
1036#[allow(dead_code)]
1037fn count_and_gather_compacted_lms_suffixes_32s_2k_fs_omp(
1038 t: &[SaSint],
1039 sa: &mut [SaSint],
1040 n: SaSint,
1041 k: SaSint,
1042 buckets: &mut [SaSint],
1043 _local_buckets: SaSint,
1044 threads: SaSint,
1045 thread_state: &mut [ThreadState],
1046) {
1047 if threads == 1 || n < 65_536 {
1048 count_and_gather_compacted_lms_suffixes_32s_2k(t, sa, n, k, buckets, 0, n as isize);
1049 return;
1050 }
1051
1052 let thread_count = threads as usize;
1053 let n_usize = n as usize;
1054 let bucket_size = 2 * k as usize;
1055 let block_stride = (n / threads) & !15;
1056 let mut workspaces = vec![vec![0; bucket_size]; thread_count];
1057 let mut gathered_runs = vec![Vec::<SaSint>::new(); thread_count];
1058
1059 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
1060 let block_start = thread as SaSint * block_stride;
1061 let block_size = if thread + 1 < thread_count {
1062 block_stride
1063 } else {
1064 n - block_start
1065 };
1066 let mut temp_sa = vec![0; n_usize + block_size as usize];
1067 state.count = count_and_gather_compacted_lms_suffixes_32s_2k(
1068 t,
1069 &mut temp_sa,
1070 n,
1071 k,
1072 &mut workspaces[thread],
1073 block_start as isize,
1074 block_size as isize,
1075 );
1076 state.position = block_start + block_size;
1077 let count = usize::try_from(state.count).expect("count must be non-negative");
1078 let src_end =
1079 n_usize + usize::try_from(state.position).expect("position must be non-negative");
1080 let src_start = src_end - count;
1081 gathered_runs[thread].extend_from_slice(&temp_sa[src_start..src_end]);
1082 }
1083
1084 let mut suffixes_before = 0usize;
1085 for thread in (0..thread_count).rev() {
1086 let count =
1087 usize::try_from(thread_state[thread].count).expect("count must be non-negative");
1088 suffixes_before += count;
1089 if count > 0 {
1090 let dst_start = n_usize - suffixes_before;
1091 let dst_end = dst_start + count;
1092 sa[dst_start..dst_end].copy_from_slice(&gathered_runs[thread]);
1093 }
1094 }
1095
1096 buckets.fill(0);
1097 for workspace in &workspaces {
1098 for (dst, src) in buckets.iter_mut().zip(workspace.iter()) {
1099 *dst += *src;
1100 }
1101 }
1102}
1103
1104#[allow(dead_code)]
1105fn count_and_gather_lms_suffixes_32s_4k_nofs_omp(
1106 t: &[SaSint],
1107 sa: &mut [SaSint],
1108 n: SaSint,
1109 k: SaSint,
1110 buckets: &mut [SaSint],
1111 threads: SaSint,
1112) -> SaSint {
1113 if threads > 1 && n >= 65_536 {
1114 count_lms_suffixes_32s_4k(t, n, k, buckets);
1115 gather_lms_suffixes_32s(t, sa, n)
1116 } else {
1117 count_and_gather_lms_suffixes_32s_4k(t, sa, n, k, buckets, 0, n as isize)
1118 }
1119}
1120
1121#[allow(dead_code)]
1122fn count_and_gather_lms_suffixes_32s_2k_nofs_omp(
1123 t: &[SaSint],
1124 sa: &mut [SaSint],
1125 n: SaSint,
1126 k: SaSint,
1127 buckets: &mut [SaSint],
1128 threads: SaSint,
1129) -> SaSint {
1130 if threads > 1 && n >= 65_536 {
1131 count_lms_suffixes_32s_2k(t, n, k, buckets);
1132 gather_lms_suffixes_32s(t, sa, n)
1133 } else {
1134 count_and_gather_lms_suffixes_32s_2k(t, sa, n, k, buckets, 0, n as isize)
1135 }
1136}
1137
1138#[allow(dead_code)]
1139fn count_and_gather_compacted_lms_suffixes_32s_2k_nofs_omp(
1140 t: &[SaSint],
1141 sa: &mut [SaSint],
1142 n: SaSint,
1143 k: SaSint,
1144 buckets: &mut [SaSint],
1145 threads: SaSint,
1146) -> SaSint {
1147 if threads > 1 && n >= 65_536 {
1148 count_compacted_lms_suffixes_32s_2k(t, n, k, buckets);
1149 gather_compacted_lms_suffixes_32s(t, sa, n)
1150 } else {
1151 count_and_gather_compacted_lms_suffixes_32s_2k(t, sa, n, k, buckets, 0, n as isize)
1152 }
1153}
1154
1155#[allow(dead_code)]
1156fn count_and_gather_lms_suffixes_32s_4k_omp(
1157 t: &[SaSint],
1158 sa: &mut [SaSint],
1159 n: SaSint,
1160 k: SaSint,
1161 buckets: &mut [SaSint],
1162 local_buckets: SaSint,
1163 threads: SaSint,
1164 thread_state: &mut [ThreadState],
1165) -> SaSint {
1166 let free_space = if local_buckets != 0 {
1167 LIBSAIS_LOCAL_BUFFER_SIZE as SaSint
1168 } else {
1169 buckets.len() as SaSint
1170 };
1171 let mut max_threads = (free_space / (((4 * k) + 15) & !15)).min(threads);
1172
1173 if max_threads > 1 && n >= 65_536 && n / k >= 2 {
1174 let thread_cap = n / (16 * k);
1175 if max_threads > thread_cap {
1176 max_threads = thread_cap;
1177 }
1178 count_and_gather_lms_suffixes_32s_4k_fs_omp(
1179 t,
1180 sa,
1181 n,
1182 k,
1183 buckets,
1184 local_buckets,
1185 max_threads.max(2),
1186 thread_state,
1187 )
1188 } else if threads > 1 && n >= 65_536 {
1189 count_lms_suffixes_32s_4k(t, n, k, buckets);
1190 gather_lms_suffixes_32s(t, sa, n)
1191 } else {
1192 count_and_gather_lms_suffixes_32s_4k_nofs_omp(t, sa, n, k, buckets, threads)
1193 }
1194}
1195
1196#[allow(dead_code)]
1197fn count_and_gather_lms_suffixes_32s_2k_omp(
1198 t: &[SaSint],
1199 sa: &mut [SaSint],
1200 n: SaSint,
1201 k: SaSint,
1202 buckets: &mut [SaSint],
1203 local_buckets: SaSint,
1204 threads: SaSint,
1205 thread_state: &mut [ThreadState],
1206) -> SaSint {
1207 let free_space = if local_buckets != 0 {
1208 LIBSAIS_LOCAL_BUFFER_SIZE as SaSint
1209 } else {
1210 buckets.len() as SaSint
1211 };
1212 let mut max_threads = (free_space / (((2 * k) + 15) & !15)).min(threads);
1213
1214 if max_threads > 1 && n >= 65_536 && n / k >= 2 {
1215 let thread_cap = n / (8 * k);
1216 if max_threads > thread_cap {
1217 max_threads = thread_cap;
1218 }
1219 count_and_gather_lms_suffixes_32s_2k_fs_omp(
1220 t,
1221 sa,
1222 n,
1223 k,
1224 buckets,
1225 local_buckets,
1226 max_threads.max(2),
1227 thread_state,
1228 )
1229 } else if threads > 1 && n >= 65_536 {
1230 count_lms_suffixes_32s_2k(t, n, k, buckets);
1231 gather_lms_suffixes_32s(t, sa, n)
1232 } else {
1233 count_and_gather_lms_suffixes_32s_2k_nofs_omp(t, sa, n, k, buckets, threads)
1234 }
1235}
1236
1237#[allow(dead_code)]
1238fn count_suffixes_32s(t: &[SaSint], n: SaSint, k: SaSint, buckets: &mut [SaSint]) {
1239 buckets[..k as usize].fill(0);
1240
1241 let mut i = 0usize;
1242 let mut j = (n as usize).saturating_sub(7);
1243 while i < j {
1244 buckets[t[i] as usize] += 1;
1245 buckets[t[i + 1] as usize] += 1;
1246 buckets[t[i + 2] as usize] += 1;
1247 buckets[t[i + 3] as usize] += 1;
1248 buckets[t[i + 4] as usize] += 1;
1249 buckets[t[i + 5] as usize] += 1;
1250 buckets[t[i + 6] as usize] += 1;
1251 buckets[t[i + 7] as usize] += 1;
1252 i += 8;
1253 }
1254
1255 j += 7;
1256 while i < j {
1257 buckets[t[i] as usize] += 1;
1258 i += 1;
1259 }
1260}
1261
1262#[allow(dead_code)]
1263fn initialize_buckets_start_and_end_32s_6k(k: SaSint, buckets: &mut [SaSint]) {
1264 let k = k as usize;
1265 let mut sum = 0;
1266 for j in 0..k {
1267 let i = buckets_index4(j, 0);
1268 buckets[4 * k + j] = sum;
1269 sum += buckets[i] + buckets[i + 1] + buckets[i + 2] + buckets[i + 3];
1270 buckets[5 * k + j] = sum;
1271 }
1272}
1273
1274#[allow(dead_code)]
1275fn initialize_buckets_start_and_end_32s_4k(k: SaSint, buckets: &mut [SaSint]) {
1276 let k = k as usize;
1277 let mut sum = 0;
1278 for j in 0..k {
1279 let i = buckets_index2(j, 0);
1280 buckets[2 * k + j] = sum;
1281 sum += buckets[i] + buckets[i + 1];
1282 buckets[3 * k + j] = sum;
1283 }
1284}
1285
1286#[allow(dead_code)]
1287fn initialize_buckets_end_32s_2k(k: SaSint, buckets: &mut [SaSint]) {
1288 let mut sum0 = 0;
1289 for j in 0..k as usize {
1290 let i = buckets_index2(j, 0);
1291 sum0 += buckets[i] + buckets[i + 1];
1292 buckets[i] = sum0;
1293 }
1294}
1295
1296#[allow(dead_code)]
1297fn initialize_buckets_start_and_end_32s_2k(k: SaSint, buckets: &mut [SaSint]) {
1298 let k = k as usize;
1299 for j in 0..k {
1300 let i = buckets_index2(j, 0);
1301 buckets[j] = buckets[i];
1302 }
1303 buckets[k] = 0;
1304 buckets.copy_within(0..k - 1, k + 1);
1305}
1306
1307#[allow(dead_code)]
1308fn initialize_buckets_start_32s_1k(k: SaSint, buckets: &mut [SaSint]) {
1309 let mut sum = 0;
1310 for bucket in buckets.iter_mut().take(k as usize) {
1311 let tmp = *bucket;
1312 *bucket = sum;
1313 sum += tmp;
1314 }
1315}
1316
1317#[allow(dead_code)]
1318fn initialize_buckets_end_32s_1k(k: SaSint, buckets: &mut [SaSint]) {
1319 let mut sum = 0;
1320 for bucket in buckets.iter_mut().take(k as usize) {
1321 sum += *bucket;
1322 *bucket = sum;
1323 }
1324}
1325
1326#[allow(dead_code)]
1327fn initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(
1328 t: &[SaSint],
1329 k: SaSint,
1330 buckets: &mut [SaSint],
1331 first_lms_suffix: SaSint,
1332) {
1333 buckets[buckets_index2(t[first_lms_suffix as usize] as usize, 0)] += 1;
1334 buckets[buckets_index2(t[first_lms_suffix as usize] as usize, 1)] -= 1;
1335
1336 let mut sum0 = 0;
1337 let mut sum1 = 0;
1338 for j in 0..k as usize {
1339 let i = buckets_index2(j, 0);
1340 sum0 += buckets[i] + buckets[i + 1];
1341 sum1 += buckets[i + 1];
1342 buckets[i] = sum0;
1343 buckets[i + 1] = sum1;
1344 }
1345}
1346
1347#[allow(dead_code)]
1348fn initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(
1349 t: &[SaSint],
1350 k: SaSint,
1351 buckets: &mut [SaSint],
1352 mut first_lms_suffix: SaSint,
1353) -> SaSint {
1354 let mut f0 = 0usize;
1355 let mut c0 = t[first_lms_suffix as usize] as isize;
1356
1357 loop {
1358 first_lms_suffix -= 1;
1359 if first_lms_suffix < 0 {
1360 break;
1361 }
1362 let c1 = c0;
1363 c0 = t[first_lms_suffix as usize] as isize;
1364 let f1 = f0;
1365 f0 = usize::from(c0 > c1 - f1 as isize);
1366 buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] -= 1;
1367 }
1368 buckets[buckets_index4(c0 as usize, f0 + f0)] -= 1;
1369
1370 let mut sum = 0;
1371 for j in 0..k as usize {
1372 let i = buckets_index4(j, 0);
1373 sum += buckets[i + 1] + buckets[i + 3];
1374 buckets[4 * k as usize + j] = sum;
1375 }
1376 sum
1377}
1378
1379#[allow(dead_code)]
1380fn initialize_buckets_for_partial_sorting_32s_6k(
1381 t: &[SaSint],
1382 k: SaSint,
1383 buckets: &mut [SaSint],
1384 first_lms_suffix: SaSint,
1385 left_suffixes_count: SaSint,
1386) {
1387 let k = k as usize;
1388 let temp_offset = 4 * k;
1389 let first_symbol = t[first_lms_suffix as usize] as usize;
1390 let mut sum0 = left_suffixes_count + 1;
1391 let mut sum1 = 0;
1392 let mut sum2 = 0;
1393
1394 for j in 0..first_symbol {
1395 let i = buckets_index4(j, 0);
1396 let tj = buckets_index2(j, 0);
1397 let ss = buckets[i];
1398 let ls = buckets[i + 1];
1399 let sl = buckets[i + 2];
1400 let ll = buckets[i + 3];
1401
1402 buckets[i] = sum0;
1403 buckets[i + 1] = sum2;
1404 buckets[i + 2] = 0;
1405 buckets[i + 3] = 0;
1406
1407 sum0 += ss + sl;
1408 sum1 += ls;
1409 sum2 += ls + ll;
1410
1411 buckets[temp_offset + tj] = sum0;
1412 buckets[temp_offset + tj + 1] = sum1;
1413 }
1414
1415 sum1 += 1;
1416 for j in first_symbol..k {
1417 let i = buckets_index4(j, 0);
1418 let tj = buckets_index2(j, 0);
1419 let ss = buckets[i];
1420 let ls = buckets[i + 1];
1421 let sl = buckets[i + 2];
1422 let ll = buckets[i + 3];
1423
1424 buckets[i] = sum0;
1425 buckets[i + 1] = sum2;
1426 buckets[i + 2] = 0;
1427 buckets[i + 3] = 0;
1428
1429 sum0 += ss + sl;
1430 sum1 += ls;
1431 sum2 += ls + ll;
1432
1433 buckets[temp_offset + tj] = sum0;
1434 buckets[temp_offset + tj + 1] = sum1;
1435 }
1436}
1437
1438#[allow(dead_code)]
1439fn initialize_buckets_for_radix_and_partial_sorting_32s_4k(
1440 t: &[SaSint],
1441 k: SaSint,
1442 buckets: &mut [SaSint],
1443 first_lms_suffix: SaSint,
1444) {
1445 let k = k as usize;
1446 buckets[buckets_index2(t[first_lms_suffix as usize] as usize, 0)] += 1;
1447 buckets[buckets_index2(t[first_lms_suffix as usize] as usize, 1)] -= 1;
1448
1449 let mut sum0 = 0;
1450 let mut sum1 = 0;
1451 for j in 0..k {
1452 let i = buckets_index2(j, 0);
1453 buckets[2 * k + j] = sum1;
1454 sum0 += buckets[i + 1];
1455 sum1 += buckets[i] + buckets[i + 1];
1456 buckets[i + 1] = sum0;
1457 buckets[3 * k + j] = sum1;
1458 }
1459}
1460
1461#[allow(dead_code)]
1462fn count_and_gather_compacted_lms_suffixes_32s_2k_omp(
1463 t: &[SaSint],
1464 sa: &mut [SaSint],
1465 n: SaSint,
1466 k: SaSint,
1467 buckets: &mut [SaSint],
1468 local_buckets: SaSint,
1469 threads: SaSint,
1470 thread_state: &mut [ThreadState],
1471) {
1472 let free_space = if local_buckets != 0 {
1473 LIBSAIS_LOCAL_BUFFER_SIZE as SaSint
1474 } else {
1475 buckets.len() as SaSint
1476 };
1477 let mut max_threads = (free_space / (((2 * k) + 15) & !15)).min(threads);
1478
1479 if local_buckets == 0 && max_threads > 1 && n >= 65_536 && n / k >= 2 {
1480 let thread_cap = n / (8 * k);
1481 if max_threads > thread_cap {
1482 max_threads = thread_cap;
1483 }
1484 count_and_gather_compacted_lms_suffixes_32s_2k_fs_omp(
1485 t,
1486 sa,
1487 n,
1488 k,
1489 buckets,
1490 local_buckets,
1491 max_threads.max(2),
1492 thread_state,
1493 );
1494 } else {
1495 count_and_gather_compacted_lms_suffixes_32s_2k_nofs_omp(t, sa, n, k, buckets, threads);
1496 }
1497}
1498
1499#[allow(dead_code)]
1500fn gather_lms_suffixes_16u(
1501 t: &[u16],
1502 sa: &mut [SaSint],
1503 n: SaSint,
1504 mut m: SaSint,
1505 omp_block_start: SaSint,
1506 omp_block_size: SaSint,
1507) {
1508 if omp_block_size > 0 {
1509 let n = n as isize;
1510 let mut i: isize;
1511 let mut j = (omp_block_start + omp_block_size) as isize;
1512 let mut c0 = t[(omp_block_start + omp_block_size - 1) as usize] as isize;
1513 let mut c1 = -1isize;
1514
1515 while j < n {
1516 c1 = t[j as usize] as isize;
1517 if c1 != c0 {
1518 break;
1519 }
1520 j += 1;
1521 }
1522
1523 let mut f0 = usize::from(c0 >= c1);
1524 let mut f1: usize;
1525
1526 i = (omp_block_start + omp_block_size - 2) as isize;
1527 j = (omp_block_start + 3) as isize;
1528 while i >= j {
1529 c1 = t[i as usize] as isize;
1530 f1 = usize::from(c1 > c0 - f0 as isize);
1531 sa[m as usize] = (i + 1) as SaSint;
1532 m -= (f1 & (1 - f0)) as SaSint;
1533
1534 c0 = t[(i - 1) as usize] as isize;
1535 f0 = usize::from(c0 > c1 - f1 as isize);
1536 sa[m as usize] = i as SaSint;
1537 m -= (f0 & (1 - f1)) as SaSint;
1538
1539 c1 = t[(i - 2) as usize] as isize;
1540 f1 = usize::from(c1 > c0 - f0 as isize);
1541 sa[m as usize] = (i - 1) as SaSint;
1542 m -= (f1 & (1 - f0)) as SaSint;
1543
1544 c0 = t[(i - 3) as usize] as isize;
1545 f0 = usize::from(c0 > c1 - f1 as isize);
1546 sa[m as usize] = (i - 2) as SaSint;
1547 m -= (f0 & (1 - f1)) as SaSint;
1548
1549 i -= 4;
1550 }
1551
1552 j -= 3;
1553 while i >= j {
1554 c1 = c0;
1555 c0 = t[i as usize] as isize;
1556 f1 = f0;
1557 f0 = usize::from(c0 > c1 - f1 as isize);
1558 sa[m as usize] = (i + 1) as SaSint;
1559 m -= (f0 & (1 - f1)) as SaSint;
1560 i -= 1;
1561 }
1562
1563 sa[m as usize] = (i + 1) as SaSint;
1564 }
1565}
1566
1567#[allow(dead_code)]
1568fn count_and_gather_lms_suffixes_16u(
1569 t: &[u16],
1570 sa: &mut [SaSint],
1571 n: SaSint,
1572 buckets: &mut [SaSint],
1573 omp_block_start: SaSint,
1574 omp_block_size: SaSint,
1575) -> SaSint {
1576 buckets[..4 * ALPHABET_SIZE].fill(0);
1577
1578 let mut m = (omp_block_start + omp_block_size - 1) as isize;
1579
1580 if omp_block_size > 0 {
1581 let n = n as isize;
1582 let mut i: isize;
1583 let mut j = m + 1;
1584 let mut c0 = t[m as usize] as isize;
1585 let mut c1 = -1isize;
1586
1587 while j < n {
1588 c1 = t[j as usize] as isize;
1589 if c1 != c0 {
1590 break;
1591 }
1592 j += 1;
1593 }
1594
1595 let mut f0 = usize::from(c0 >= c1);
1596 let mut f1: usize;
1597
1598 i = m - 1;
1599 j = (omp_block_start + 3) as isize;
1600 while i >= j {
1601 c1 = t[i as usize] as isize;
1602 f1 = usize::from(c1 > c0 - f0 as isize);
1603 sa[m as usize] = (i + 1) as SaSint;
1604 m -= (f1 & (1 - f0)) as isize;
1605 buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
1606
1607 c0 = t[(i - 1) as usize] as isize;
1608 f0 = usize::from(c0 > c1 - f1 as isize);
1609 sa[m as usize] = i as SaSint;
1610 m -= (f0 & (1 - f1)) as isize;
1611 buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
1612
1613 c1 = t[(i - 2) as usize] as isize;
1614 f1 = usize::from(c1 > c0 - f0 as isize);
1615 sa[m as usize] = (i - 1) as SaSint;
1616 m -= (f1 & (1 - f0)) as isize;
1617 buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
1618
1619 c0 = t[(i - 3) as usize] as isize;
1620 f0 = usize::from(c0 > c1 - f1 as isize);
1621 sa[m as usize] = (i - 2) as SaSint;
1622 m -= (f0 & (1 - f1)) as isize;
1623 buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
1624
1625 i -= 4;
1626 }
1627
1628 j -= 3;
1629 while i >= j {
1630 c1 = c0;
1631 c0 = t[i as usize] as isize;
1632 f1 = f0;
1633 f0 = usize::from(c0 > c1 - f1 as isize);
1634 sa[m as usize] = (i + 1) as SaSint;
1635 m -= (f0 & (1 - f1)) as isize;
1636 buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
1637 i -= 1;
1638 }
1639
1640 c1 = if i >= 0 { t[i as usize] as isize } else { -1 };
1641 f1 = usize::from(c1 > c0 - f0 as isize);
1642 sa[m as usize] = (i + 1) as SaSint;
1643 m -= (f1 & (1 - f0)) as isize;
1644 buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
1645 }
1646
1647 omp_block_start + omp_block_size - 1 - m as SaSint
1648}
1649
1650#[allow(dead_code)]
1651fn gather_lms_suffixes_16u_omp(
1652 t: &[u16],
1653 sa: &mut [SaSint],
1654 n: SaSint,
1655 threads: SaSint,
1656 thread_state: &mut [ThreadState],
1657) {
1658 if threads == 1 || n < 65_536 || thread_state.is_empty() {
1659 gather_lms_suffixes_16u(t, sa, n, n - 1, 0, n);
1660 return;
1661 }
1662
1663 let thread_count = threads as usize;
1664 let block_stride = (n / threads) & !15;
1665 let mut suffix_counts_after = vec![0; thread_count];
1666 let mut m = 0;
1667 for thread in (0..thread_count).rev() {
1668 suffix_counts_after[thread] = m;
1669 m += thread_state[thread].m;
1670 }
1671
1672 for thread in 0..thread_count {
1673 let block_start = thread as SaSint * block_stride;
1674 let block_size = if thread + 1 < thread_count {
1675 block_stride
1676 } else {
1677 n - block_start
1678 };
1679 gather_lms_suffixes_16u(
1680 t,
1681 sa,
1682 n,
1683 n - 1 - suffix_counts_after[thread],
1684 block_start,
1685 block_size,
1686 );
1687 }
1688
1689 for thread in 0..thread_count {
1690 if thread_state[thread].m > 0 {
1691 sa[(n - 1 - suffix_counts_after[thread]) as usize] =
1692 thread_state[thread].last_lms_suffix;
1693 }
1694 }
1695}
1696
1697#[allow(dead_code)]
1698fn count_and_gather_lms_suffixes_16u_omp(
1699 t: &[u16],
1700 sa: &mut [SaSint],
1701 n: SaSint,
1702 buckets: &mut [SaSint],
1703 threads: SaSint,
1704 thread_state: &mut [ThreadState],
1705) -> SaSint {
1706 if threads == 1 || n < 65_536 || thread_state.is_empty() {
1707 return count_and_gather_lms_suffixes_16u(t, sa, n, buckets, 0, n);
1708 }
1709
1710 let thread_count = threads as usize;
1711 let block_stride = (n / threads) & !15;
1712
1713 for thread in 0..thread_count {
1714 let block_start = thread as SaSint * block_stride;
1715 let block_size = if thread + 1 < thread_count {
1716 block_stride
1717 } else {
1718 n - block_start
1719 };
1720 let count = count_and_gather_lms_suffixes_16u(
1721 t,
1722 sa,
1723 n,
1724 &mut thread_state[thread].buckets,
1725 block_start,
1726 block_size,
1727 );
1728 thread_state[thread].m = count;
1729 thread_state[thread].position = block_start + block_size;
1730 if count > 0 {
1731 thread_state[thread].last_lms_suffix = sa[(block_start + block_size - 1) as usize];
1732 }
1733 }
1734
1735 buckets[..4 * ALPHABET_SIZE].fill(0);
1736 let mut m = 0;
1737 for thread in (0..thread_count).rev() {
1738 let position = thread_state[thread].position;
1739 let count = thread_state[thread].m;
1740 m += count;
1741 if thread + 1 != thread_count && count > 0 {
1742 let src_end = position as usize;
1743 let src_start = src_end - count as usize;
1744 let dst_start = (n - m) as usize;
1745 sa.copy_within(src_start..src_end, dst_start);
1746 }
1747 for s in 0..4 * ALPHABET_SIZE {
1748 let a = buckets[s];
1749 let b = thread_state[thread].buckets[s];
1750 buckets[s] = a + b;
1751 thread_state[thread].buckets[s] = a;
1752 }
1753 }
1754
1755 m
1756}
1757
1758#[allow(dead_code)]
1759fn initialize_buckets_start_and_end_16u(
1760 buckets: &mut [SaSint],
1761 freq: Option<&mut [SaSint]>,
1762) -> SaSint {
1763 let (count_buckets, start_end) = buckets.split_at_mut(6 * ALPHABET_SIZE);
1764 let (bucket_start, bucket_end) = start_end.split_at_mut(ALPHABET_SIZE);
1765
1766 let mut k = -1;
1767 let mut sum = 0;
1768
1769 if let Some(freq) = freq {
1770 for j in 0..ALPHABET_SIZE {
1771 let i = buckets_index4(j, 0);
1772 let total = count_buckets[i]
1773 + count_buckets[i + buckets_index4(0, 1)]
1774 + count_buckets[i + buckets_index4(0, 2)]
1775 + count_buckets[i + buckets_index4(0, 3)];
1776
1777 bucket_start[j] = sum;
1778 sum += total;
1779 bucket_end[j] = sum;
1780 if total > 0 {
1781 k = j as SaSint;
1782 }
1783 freq[j] = total;
1784 }
1785 } else {
1786 for j in 0..ALPHABET_SIZE {
1787 let i = buckets_index4(j, 0);
1788 let total = count_buckets[i]
1789 + count_buckets[i + buckets_index4(0, 1)]
1790 + count_buckets[i + buckets_index4(0, 2)]
1791 + count_buckets[i + buckets_index4(0, 3)];
1792
1793 bucket_start[j] = sum;
1794 sum += total;
1795 bucket_end[j] = sum;
1796 if total > 0 {
1797 k = j as SaSint;
1798 }
1799 }
1800 }
1801
1802 k + 1
1803}
1804
1805#[allow(dead_code)]
1806fn initialize_buckets_for_lms_suffixes_radix_sort_16u(
1807 t: &[u16],
1808 buckets: &mut [SaSint],
1809 mut first_lms_suffix: SaSint,
1810) -> SaSint {
1811 let mut f0 = 0usize;
1812 let mut c0 = t[first_lms_suffix as usize] as isize;
1813
1814 loop {
1815 first_lms_suffix -= 1;
1816 if first_lms_suffix < 0 {
1817 break;
1818 }
1819
1820 let c1 = c0;
1821 c0 = t[first_lms_suffix as usize] as isize;
1822 let f1 = f0;
1823 f0 = usize::from(c0 > c1 - f1 as isize);
1824 buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] -= 1;
1825 }
1826
1827 buckets[buckets_index4(c0 as usize, f0 + f0)] -= 1;
1828
1829 let (count_buckets, temp_bucket) = buckets.split_at_mut(4 * ALPHABET_SIZE);
1830 let mut sum = 0;
1831 for c in 0..ALPHABET_SIZE {
1832 let i = buckets_index4(c, 0);
1833 let j = buckets_index2(c, 0);
1834 temp_bucket[j + buckets_index2(0, 1)] = sum;
1835 sum += count_buckets[i + buckets_index4(0, 1)] + count_buckets[i + buckets_index4(0, 3)];
1836 temp_bucket[j] = sum;
1837 }
1838
1839 sum
1840}
1841
1842#[allow(dead_code)]
1843fn radix_sort_lms_suffixes_16u(
1844 t: &[u16],
1845 sa: &mut [SaSint],
1846 induction_bucket: &mut [SaSint],
1847 omp_block_start: SaSint,
1848 omp_block_size: SaSint,
1849) {
1850 let mut i = omp_block_start + omp_block_size - 1;
1851 let mut j = omp_block_start + 64 + 3;
1852 while i >= j {
1853 let p0 = sa[i as usize];
1854 induction_bucket[buckets_index2(t[p0 as usize] as usize, 0)] -= 1;
1855 sa[induction_bucket[buckets_index2(t[p0 as usize] as usize, 0)] as usize] = p0;
1856
1857 let p1 = sa[(i - 1) as usize];
1858 induction_bucket[buckets_index2(t[p1 as usize] as usize, 0)] -= 1;
1859 sa[induction_bucket[buckets_index2(t[p1 as usize] as usize, 0)] as usize] = p1;
1860
1861 let p2 = sa[(i - 2) as usize];
1862 induction_bucket[buckets_index2(t[p2 as usize] as usize, 0)] -= 1;
1863 sa[induction_bucket[buckets_index2(t[p2 as usize] as usize, 0)] as usize] = p2;
1864
1865 let p3 = sa[(i - 3) as usize];
1866 induction_bucket[buckets_index2(t[p3 as usize] as usize, 0)] -= 1;
1867 sa[induction_bucket[buckets_index2(t[p3 as usize] as usize, 0)] as usize] = p3;
1868
1869 i -= 4;
1870 }
1871
1872 j -= 64 + 3;
1873 while i >= j {
1874 let p = sa[i as usize];
1875 induction_bucket[buckets_index2(t[p as usize] as usize, 0)] -= 1;
1876 sa[induction_bucket[buckets_index2(t[p as usize] as usize, 0)] as usize] = p;
1877 i -= 1;
1878 }
1879}
1880
1881#[allow(dead_code)]
1882fn radix_sort_lms_suffixes_16u_omp(
1883 t: &[u16],
1884 sa: &mut [SaSint],
1885 n: SaSint,
1886 m: SaSint,
1887 flags: SaSint,
1888 buckets: &mut [SaSint],
1889 threads: SaSint,
1890 thread_state: &mut [ThreadState],
1891) {
1892 if (flags & LIBSAIS_FLAGS_GSA) != 0 {
1893 buckets[4 * ALPHABET_SIZE] -= 1;
1894 }
1895 if threads == 1 || n < 65_536 || m < 65_536 || thread_state.is_empty() {
1896 radix_sort_lms_suffixes_16u(t, sa, &mut buckets[4 * ALPHABET_SIZE..], n - m + 1, m - 1);
1897 return;
1898 }
1899
1900 let thread_count = threads as usize;
1901 for thread in 0..thread_count {
1902 let (src_buckets, state_buckets) = (
1903 &buckets[4 * ALPHABET_SIZE..],
1904 &mut thread_state[thread].buckets,
1905 );
1906 for c in 0..ALPHABET_SIZE {
1907 let i = buckets_index2(c, 0);
1908 let j = buckets_index4(c, 1);
1909 state_buckets[i] = src_buckets[i] - state_buckets[j];
1910 }
1911
1912 let mut block_start = 0;
1913 let mut block_size = thread_state[thread].m;
1914 for idx in (thread..thread_count).rev() {
1915 block_start += thread_state[idx].m;
1916 }
1917
1918 if block_start == m && block_size > 0 {
1919 block_start -= 1;
1920 block_size -= 1;
1921 }
1922
1923 radix_sort_lms_suffixes_16u(
1924 t,
1925 sa,
1926 &mut thread_state[thread].buckets,
1927 n - block_start,
1928 block_size,
1929 );
1930 }
1931}
1932
1933#[allow(dead_code)]
1934fn radix_sort_lms_suffixes_32s_6k(
1935 t: &[SaSint],
1936 sa: &mut [SaSint],
1937 induction_bucket: &mut [SaSint],
1938 omp_block_start: SaSint,
1939 omp_block_size: SaSint,
1940) {
1941 let mut i = omp_block_start + omp_block_size - 1;
1942 let mut j = omp_block_start + 64 + 3;
1943 while i >= j {
1944 let p0 = sa[i as usize];
1945 induction_bucket[t[p0 as usize] as usize] -= 1;
1946 sa[induction_bucket[t[p0 as usize] as usize] as usize] = p0;
1947 let p1 = sa[(i - 1) as usize];
1948 induction_bucket[t[p1 as usize] as usize] -= 1;
1949 sa[induction_bucket[t[p1 as usize] as usize] as usize] = p1;
1950 let p2 = sa[(i - 2) as usize];
1951 induction_bucket[t[p2 as usize] as usize] -= 1;
1952 sa[induction_bucket[t[p2 as usize] as usize] as usize] = p2;
1953 let p3 = sa[(i - 3) as usize];
1954 induction_bucket[t[p3 as usize] as usize] -= 1;
1955 sa[induction_bucket[t[p3 as usize] as usize] as usize] = p3;
1956 i -= 4;
1957 }
1958
1959 j -= 64 + 3;
1960 while i >= j {
1961 let p = sa[i as usize];
1962 induction_bucket[t[p as usize] as usize] -= 1;
1963 sa[induction_bucket[t[p as usize] as usize] as usize] = p;
1964 i -= 1;
1965 }
1966}
1967
1968#[allow(dead_code)]
1969fn radix_sort_lms_suffixes_32s_2k(
1970 t: &[SaSint],
1971 sa: &mut [SaSint],
1972 induction_bucket: &mut [SaSint],
1973 omp_block_start: SaSint,
1974 omp_block_size: SaSint,
1975) {
1976 let mut i = omp_block_start + omp_block_size - 1;
1977 let mut j = omp_block_start + 64 + 3;
1978 while i >= j {
1979 let p0 = sa[i as usize];
1980 induction_bucket[buckets_index2(t[p0 as usize] as usize, 0)] -= 1;
1981 sa[induction_bucket[buckets_index2(t[p0 as usize] as usize, 0)] as usize] = p0;
1982 let p1 = sa[(i - 1) as usize];
1983 induction_bucket[buckets_index2(t[p1 as usize] as usize, 0)] -= 1;
1984 sa[induction_bucket[buckets_index2(t[p1 as usize] as usize, 0)] as usize] = p1;
1985 let p2 = sa[(i - 2) as usize];
1986 induction_bucket[buckets_index2(t[p2 as usize] as usize, 0)] -= 1;
1987 sa[induction_bucket[buckets_index2(t[p2 as usize] as usize, 0)] as usize] = p2;
1988 let p3 = sa[(i - 3) as usize];
1989 induction_bucket[buckets_index2(t[p3 as usize] as usize, 0)] -= 1;
1990 sa[induction_bucket[buckets_index2(t[p3 as usize] as usize, 0)] as usize] = p3;
1991 i -= 4;
1992 }
1993
1994 j -= 64 + 3;
1995 while i >= j {
1996 let p = sa[i as usize];
1997 induction_bucket[buckets_index2(t[p as usize] as usize, 0)] -= 1;
1998 sa[induction_bucket[buckets_index2(t[p as usize] as usize, 0)] as usize] = p;
1999 i -= 1;
2000 }
2001}
2002
2003#[allow(dead_code)]
2004fn radix_sort_lms_suffixes_32s_block_gather(
2005 t: &[SaSint],
2006 sa: &[SaSint],
2007 cache: &mut [ThreadCache],
2008 omp_block_start: SaSint,
2009 omp_block_size: SaSint,
2010) {
2011 if omp_block_size <= 0 {
2012 return;
2013 }
2014
2015 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
2016 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
2017 let cache_base = if cache.len() >= start + size {
2018 0
2019 } else {
2020 start
2021 };
2022 let mut i = start;
2023 let mut j = if size > 67 { start + size - 67 } else { start };
2024
2025 while i < j {
2026 for current in [i, i + 1, i + 2, i + 3] {
2027 let ci = current - cache_base;
2028 let index = sa[current];
2029 cache[ci].index = index;
2030 cache[ci].symbol = t[index as usize];
2031 }
2032 i += 4;
2033 }
2034
2035 j = if size > 67 { j + 67 } else { start + size };
2036 while i < j {
2037 let ci = i - cache_base;
2038 let index = sa[i];
2039 cache[ci].index = index;
2040 cache[ci].symbol = t[index as usize];
2041 i += 1;
2042 }
2043}
2044
2045#[allow(dead_code)]
2046fn radix_sort_lms_suffixes_32s_6k_block_sort(
2047 induction_bucket: &mut [SaSint],
2048 cache: &mut [ThreadCache],
2049 omp_block_start: SaSint,
2050 omp_block_size: SaSint,
2051) {
2052 if omp_block_size <= 0 {
2053 return;
2054 }
2055
2056 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
2057 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
2058 let cache_base = if cache.len() >= start + size {
2059 0
2060 } else {
2061 start
2062 };
2063 let mut i = start + size - 1;
2064 let mut j = start + 64 + 3;
2065
2066 while i >= j {
2067 for current in [i, i - 1, i - 2, i - 3] {
2068 let ci = current - cache_base;
2069 let v = cache[ci].symbol as usize;
2070 induction_bucket[v] -= 1;
2071 cache[ci].symbol = induction_bucket[v];
2072 }
2073 i -= 4;
2074 }
2075
2076 j -= 64 + 3;
2077 while i >= j {
2078 let ci = i - cache_base;
2079 let v = cache[ci].symbol as usize;
2080 induction_bucket[v] -= 1;
2081 cache[ci].symbol = induction_bucket[v];
2082 if i == 0 {
2083 break;
2084 }
2085 i -= 1;
2086 }
2087}
2088
2089#[allow(dead_code)]
2090fn radix_sort_lms_suffixes_32s_2k_block_sort(
2091 induction_bucket: &mut [SaSint],
2092 cache: &mut [ThreadCache],
2093 omp_block_start: SaSint,
2094 omp_block_size: SaSint,
2095) {
2096 if omp_block_size <= 0 {
2097 return;
2098 }
2099
2100 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
2101 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
2102 let cache_base = if cache.len() >= start + size {
2103 0
2104 } else {
2105 start
2106 };
2107 let mut i = start + size - 1;
2108 let mut j = start + 64 + 3;
2109
2110 while i >= j {
2111 for current in [i, i - 1, i - 2, i - 3] {
2112 let ci = current - cache_base;
2113 let v = buckets_index2(cache[ci].symbol as usize, 0);
2114 induction_bucket[v] -= 1;
2115 cache[ci].symbol = induction_bucket[v];
2116 }
2117 i -= 4;
2118 }
2119
2120 j -= 64 + 3;
2121 while i >= j {
2122 let ci = i - cache_base;
2123 let v = buckets_index2(cache[ci].symbol as usize, 0);
2124 induction_bucket[v] -= 1;
2125 cache[ci].symbol = induction_bucket[v];
2126 if i == 0 {
2127 break;
2128 }
2129 i -= 1;
2130 }
2131}
2132
2133#[allow(dead_code)]
2134fn radix_sort_lms_suffixes_32s_6k_block_omp(
2135 t: &[SaSint],
2136 sa: &mut [SaSint],
2137 induction_bucket: &mut [SaSint],
2138 cache: &mut [ThreadCache],
2139 block_start: SaSint,
2140 block_size: SaSint,
2141 threads: SaSint,
2142) {
2143 if threads <= 1 || block_size < 16_384 {
2144 radix_sort_lms_suffixes_32s_6k(t, sa, induction_bucket, block_start, block_size);
2145 return;
2146 }
2147
2148 radix_sort_lms_suffixes_32s_block_gather(t, sa, cache, block_start, block_size);
2149 radix_sort_lms_suffixes_32s_6k_block_sort(induction_bucket, cache, block_start, block_size);
2150 place_cached_suffixes(sa, cache, block_start, block_size);
2151}
2152
2153#[allow(dead_code)]
2154fn radix_sort_lms_suffixes_32s_2k_block_omp(
2155 t: &[SaSint],
2156 sa: &mut [SaSint],
2157 induction_bucket: &mut [SaSint],
2158 cache: &mut [ThreadCache],
2159 block_start: SaSint,
2160 block_size: SaSint,
2161 threads: SaSint,
2162) {
2163 if threads <= 1 || block_size < 16_384 {
2164 radix_sort_lms_suffixes_32s_2k(t, sa, induction_bucket, block_start, block_size);
2165 return;
2166 }
2167
2168 radix_sort_lms_suffixes_32s_block_gather(t, sa, cache, block_start, block_size);
2169 radix_sort_lms_suffixes_32s_2k_block_sort(induction_bucket, cache, block_start, block_size);
2170 place_cached_suffixes(sa, cache, block_start, block_size);
2171}
2172
2173#[allow(dead_code)]
2174fn radix_sort_lms_suffixes_32s_6k_omp(
2175 t: &[SaSint],
2176 sa: &mut [SaSint],
2177 n: SaSint,
2178 m: SaSint,
2179 induction_bucket: &mut [SaSint],
2180 threads: SaSint,
2181) {
2182 if threads <= 1 || m < 65_536 {
2183 radix_sort_lms_suffixes_32s_6k(t, sa, induction_bucket, n - m + 1, m - 1);
2184 return;
2185 }
2186
2187 let threads_usize = usize::try_from(threads).expect("threads must be positive");
2188 let mut cache = vec![ThreadCache::default(); threads_usize * PER_THREAD_CACHE_SIZE];
2189 let mut block_start = 0usize;
2190 let m_usize = usize::try_from(m).expect("m must be non-negative");
2191 let n_usize = usize::try_from(n).expect("n must be non-negative");
2192 let last = m_usize - 1;
2193
2194 while block_start < last {
2195 let block_end = (block_start + threads_usize * PER_THREAD_CACHE_SIZE).min(last);
2196 radix_sort_lms_suffixes_32s_6k_block_omp(
2197 t,
2198 sa,
2199 induction_bucket,
2200 &mut cache,
2201 (n_usize - block_end) as SaSint,
2202 (block_end - block_start) as SaSint,
2203 threads,
2204 );
2205 block_start = block_end;
2206 }
2207}
2208
2209#[allow(dead_code)]
2210fn radix_sort_lms_suffixes_32s_2k_omp(
2211 t: &[SaSint],
2212 sa: &mut [SaSint],
2213 n: SaSint,
2214 m: SaSint,
2215 induction_bucket: &mut [SaSint],
2216 threads: SaSint,
2217) {
2218 if threads <= 1 || m < 65_536 {
2219 radix_sort_lms_suffixes_32s_2k(t, sa, induction_bucket, n - m + 1, m - 1);
2220 return;
2221 }
2222
2223 let threads_usize = usize::try_from(threads).expect("threads must be positive");
2224 let mut cache = vec![ThreadCache::default(); threads_usize * PER_THREAD_CACHE_SIZE];
2225 let mut block_start = 0usize;
2226 let m_usize = usize::try_from(m).expect("m must be non-negative");
2227 let n_usize = usize::try_from(n).expect("n must be non-negative");
2228 let last = m_usize - 1;
2229
2230 while block_start < last {
2231 let block_end = (block_start + threads_usize * PER_THREAD_CACHE_SIZE).min(last);
2232 radix_sort_lms_suffixes_32s_2k_block_omp(
2233 t,
2234 sa,
2235 induction_bucket,
2236 &mut cache,
2237 (n_usize - block_end) as SaSint,
2238 (block_end - block_start) as SaSint,
2239 threads,
2240 );
2241 block_start = block_end;
2242 }
2243}
2244
2245#[allow(dead_code)]
2246fn radix_sort_lms_suffixes_32s_1k(
2247 t: &[SaSint],
2248 sa: &mut [SaSint],
2249 n: SaSint,
2250 buckets: &mut [SaSint],
2251) -> SaSint {
2252 let mut i = n - 2;
2253 let mut m = 0;
2254 let mut f0 = 1usize;
2255 let mut f1: usize;
2256 let mut c0 = t[(n - 1) as usize] as isize;
2257 let mut c1: isize;
2258 let mut c2 = 0isize;
2259
2260 while i >= 64 + 3 {
2261 c1 = t[i as usize] as isize;
2262 f1 = usize::from(c1 > c0 - f0 as isize);
2263 if (f1 & !f0) != 0 {
2264 c2 = c0;
2265 buckets[c2 as usize] -= 1;
2266 sa[buckets[c2 as usize] as usize] = i + 1;
2267 m += 1;
2268 }
2269 c0 = t[(i - 1) as usize] as isize;
2270 f0 = usize::from(c0 > c1 - f1 as isize);
2271 if (f0 & !f1) != 0 {
2272 c2 = c1;
2273 buckets[c2 as usize] -= 1;
2274 sa[buckets[c2 as usize] as usize] = i;
2275 m += 1;
2276 }
2277 c1 = t[(i - 2) as usize] as isize;
2278 f1 = usize::from(c1 > c0 - f0 as isize);
2279 if (f1 & !f0) != 0 {
2280 c2 = c0;
2281 buckets[c2 as usize] -= 1;
2282 sa[buckets[c2 as usize] as usize] = i - 1;
2283 m += 1;
2284 }
2285 c0 = t[(i - 3) as usize] as isize;
2286 f0 = usize::from(c0 > c1 - f1 as isize);
2287 if (f0 & !f1) != 0 {
2288 c2 = c1;
2289 buckets[c2 as usize] -= 1;
2290 sa[buckets[c2 as usize] as usize] = i - 2;
2291 m += 1;
2292 }
2293 i -= 4;
2294 }
2295
2296 while i >= 0 {
2297 c1 = c0;
2298 c0 = t[i as usize] as isize;
2299 f1 = f0;
2300 f0 = usize::from(c0 > c1 - f1 as isize);
2301 if (f0 & !f1) != 0 {
2302 c2 = c1;
2303 buckets[c2 as usize] -= 1;
2304 sa[buckets[c2 as usize] as usize] = i + 1;
2305 m += 1;
2306 }
2307 i -= 1;
2308 }
2309
2310 if m > 1 {
2311 sa[buckets[c2 as usize] as usize] = 0;
2312 }
2313
2314 m
2315}
2316
2317#[allow(dead_code)]
2318fn radix_sort_set_markers_32s_6k(
2319 sa: &mut [SaSint],
2320 induction_bucket: &[SaSint],
2321 omp_block_start: SaSint,
2322 omp_block_size: SaSint,
2323) {
2324 let mut i = omp_block_start;
2325 let mut j = omp_block_start + omp_block_size - 64 - 3;
2326
2327 while i < j {
2328 sa[induction_bucket[i as usize] as usize] |= SAINT_MIN;
2329 sa[induction_bucket[(i + 1) as usize] as usize] |= SAINT_MIN;
2330 sa[induction_bucket[(i + 2) as usize] as usize] |= SAINT_MIN;
2331 sa[induction_bucket[(i + 3) as usize] as usize] |= SAINT_MIN;
2332 i += 4;
2333 }
2334
2335 j += 64 + 3;
2336 while i < j {
2337 sa[induction_bucket[i as usize] as usize] |= SAINT_MIN;
2338 i += 1;
2339 }
2340}
2341
2342#[allow(dead_code)]
2343fn radix_sort_set_markers_32s_4k(
2344 sa: &mut [SaSint],
2345 induction_bucket: &[SaSint],
2346 omp_block_start: SaSint,
2347 omp_block_size: SaSint,
2348) {
2349 let mut i = omp_block_start;
2350 let mut j = omp_block_start + omp_block_size - 64 - 3;
2351
2352 while i < j {
2353 sa[induction_bucket[buckets_index2(i as usize, 0)] as usize] |= SUFFIX_GROUP_MARKER;
2354 sa[induction_bucket[buckets_index2((i + 1) as usize, 0)] as usize] |= SUFFIX_GROUP_MARKER;
2355 sa[induction_bucket[buckets_index2((i + 2) as usize, 0)] as usize] |= SUFFIX_GROUP_MARKER;
2356 sa[induction_bucket[buckets_index2((i + 3) as usize, 0)] as usize] |= SUFFIX_GROUP_MARKER;
2357 i += 4;
2358 }
2359
2360 j += 64 + 3;
2361 while i < j {
2362 sa[induction_bucket[buckets_index2(i as usize, 0)] as usize] |= SUFFIX_GROUP_MARKER;
2363 i += 1;
2364 }
2365}
2366
2367#[allow(dead_code)]
2368fn radix_sort_set_markers_32s_6k_omp(
2369 sa: &mut [SaSint],
2370 k: SaSint,
2371 induction_bucket: &[SaSint],
2372 threads: SaSint,
2373) {
2374 if k <= 1 {
2375 return;
2376 }
2377
2378 if threads <= 1 || k < 65_536 {
2379 radix_sort_set_markers_32s_6k(sa, induction_bucket, 0, k - 1);
2380 return;
2381 }
2382
2383 let threads_usize = usize::try_from(threads).expect("threads must be positive");
2384 let last = usize::try_from(k - 1).expect("k must be positive");
2385 let stride = (last / threads_usize) & !15usize;
2386 let mut start = 0usize;
2387
2388 for thread in 0..threads_usize {
2389 let end = if thread + 1 == threads_usize {
2390 last
2391 } else {
2392 start + stride
2393 };
2394 if end > start {
2395 radix_sort_set_markers_32s_6k(
2396 sa,
2397 induction_bucket,
2398 start as SaSint,
2399 (end - start) as SaSint,
2400 );
2401 }
2402 start = end;
2403 }
2404}
2405
2406#[allow(dead_code)]
2407fn radix_sort_set_markers_32s_4k_omp(
2408 sa: &mut [SaSint],
2409 k: SaSint,
2410 induction_bucket: &[SaSint],
2411 threads: SaSint,
2412) {
2413 if k <= 1 {
2414 return;
2415 }
2416
2417 if threads <= 1 || k < 65_536 {
2418 radix_sort_set_markers_32s_4k(sa, induction_bucket, 0, k - 1);
2419 return;
2420 }
2421
2422 let threads_usize = usize::try_from(threads).expect("threads must be positive");
2423 let last = usize::try_from(k - 1).expect("k must be positive");
2424 let stride = (last / threads_usize) & !15usize;
2425 let mut start = 0usize;
2426
2427 for thread in 0..threads_usize {
2428 let end = if thread + 1 == threads_usize {
2429 last
2430 } else {
2431 start + stride
2432 };
2433 if end > start {
2434 radix_sort_set_markers_32s_4k(
2435 sa,
2436 induction_bucket,
2437 start as SaSint,
2438 (end - start) as SaSint,
2439 );
2440 }
2441 start = end;
2442 }
2443}
2444
2445#[allow(dead_code)]
2446fn initialize_buckets_for_partial_sorting_16u(
2447 t: &[u16],
2448 buckets: &mut [SaSint],
2449 first_lms_suffix: SaSint,
2450 left_suffixes_count: SaSint,
2451) {
2452 buckets[buckets_index4(t[first_lms_suffix as usize] as usize, 1)] += 1;
2453
2454 let (front, temp_bucket) = buckets.split_at_mut(4 * ALPHABET_SIZE);
2455 let mut sum0 = left_suffixes_count + 1;
2456 let mut sum1 = 0;
2457
2458 for c in 0..ALPHABET_SIZE {
2459 let i = buckets_index4(c, 0);
2460 let j = buckets_index2(c, 0);
2461
2462 temp_bucket[j + buckets_index2(0, 0)] = sum0;
2463
2464 sum0 += front[i + buckets_index4(0, 0)] + front[i + buckets_index4(0, 2)];
2465 sum1 += front[i + buckets_index4(0, 1)];
2466
2467 front[j + buckets_index2(0, 0)] = sum0;
2468 front[j + buckets_index2(0, 1)] = sum1;
2469 }
2470}
2471
2472#[allow(dead_code)]
2473fn partial_sorting_shift_markers_32s_6k_omp(
2474 sa: &mut [SaSint],
2475 k: SaSint,
2476 buckets: &[SaSint],
2477 threads: SaSint,
2478) {
2479 let k_usize = usize::try_from(k).expect("k must be non-negative");
2480 let temp_bucket = &buckets[4 * k_usize..];
2481 let thread_count = if threads > 1 && k >= 65536 {
2482 usize::try_from(threads).expect("threads must be positive")
2483 } else {
2484 1
2485 };
2486 for t in 0..thread_count {
2487 let mut c = k_usize as isize - 1 - t as isize;
2488 while c >= 1 {
2489 let c_usize = c as usize;
2490 let mut i = buckets[buckets_index4(c_usize, 0)] - 1;
2491 let mut j = temp_bucket[buckets_index2(c_usize - 1, 0)] + 3;
2492 let mut s = SAINT_MIN;
2493
2494 while i >= j {
2495 let p0 = sa[i as usize];
2496 let q0 = (p0 & SAINT_MIN) ^ s;
2497 s ^= q0;
2498 sa[i as usize] = p0 ^ q0;
2499
2500 let p1 = sa[(i - 1) as usize];
2501 let q1 = (p1 & SAINT_MIN) ^ s;
2502 s ^= q1;
2503 sa[(i - 1) as usize] = p1 ^ q1;
2504
2505 let p2 = sa[(i - 2) as usize];
2506 let q2 = (p2 & SAINT_MIN) ^ s;
2507 s ^= q2;
2508 sa[(i - 2) as usize] = p2 ^ q2;
2509
2510 let p3 = sa[(i - 3) as usize];
2511 let q3 = (p3 & SAINT_MIN) ^ s;
2512 s ^= q3;
2513 sa[(i - 3) as usize] = p3 ^ q3;
2514
2515 i -= 4;
2516 }
2517
2518 j -= 3;
2519 while i >= j {
2520 let p = sa[i as usize];
2521 let q = (p & SAINT_MIN) ^ s;
2522 s ^= q;
2523 sa[i as usize] = p ^ q;
2524 i -= 1;
2525 }
2526
2527 c -= thread_count as isize;
2528 }
2529 }
2530}
2531
2532#[allow(dead_code)]
2533fn partial_sorting_shift_markers_32s_4k(sa: &mut [SaSint], n: SaSint) {
2534 let mut i = n - 1;
2535 let mut s = SUFFIX_GROUP_MARKER;
2536
2537 while i >= 3 {
2538 let p0 = sa[i as usize];
2539 let q0 =
2540 ((p0 & SUFFIX_GROUP_MARKER) ^ s) & (((p0 > 0) as SaSint) << (SUFFIX_GROUP_BIT - 1));
2541 s ^= q0;
2542 sa[i as usize] = p0 ^ q0;
2543
2544 let p1 = sa[(i - 1) as usize];
2545 let q1 =
2546 ((p1 & SUFFIX_GROUP_MARKER) ^ s) & (((p1 > 0) as SaSint) << (SUFFIX_GROUP_BIT - 1));
2547 s ^= q1;
2548 sa[(i - 1) as usize] = p1 ^ q1;
2549
2550 let p2 = sa[(i - 2) as usize];
2551 let q2 =
2552 ((p2 & SUFFIX_GROUP_MARKER) ^ s) & (((p2 > 0) as SaSint) << (SUFFIX_GROUP_BIT - 1));
2553 s ^= q2;
2554 sa[(i - 2) as usize] = p2 ^ q2;
2555
2556 let p3 = sa[(i - 3) as usize];
2557 let q3 =
2558 ((p3 & SUFFIX_GROUP_MARKER) ^ s) & (((p3 > 0) as SaSint) << (SUFFIX_GROUP_BIT - 1));
2559 s ^= q3;
2560 sa[(i - 3) as usize] = p3 ^ q3;
2561
2562 i -= 4;
2563 }
2564
2565 while i >= 0 {
2566 let p = sa[i as usize];
2567 let q = ((p & SUFFIX_GROUP_MARKER) ^ s) & (((p > 0) as SaSint) << (SUFFIX_GROUP_BIT - 1));
2568 s ^= q;
2569 sa[i as usize] = p ^ q;
2570 i -= 1;
2571 }
2572}
2573
2574#[allow(dead_code)]
2575fn partial_sorting_shift_buckets_32s_6k(k: SaSint, buckets: &mut [SaSint]) {
2576 let temp_offset = 4 * k as usize;
2577 let mut i = buckets_index2(0, 0);
2578
2579 while i <= buckets_index2(k as usize - 1, 0) {
2580 buckets[2 * i + buckets_index4(0, 0)] = buckets[temp_offset + i + buckets_index2(0, 0)];
2581 buckets[2 * i + buckets_index4(0, 1)] = buckets[temp_offset + i + buckets_index2(0, 1)];
2582 i += buckets_index2(1, 0);
2583 }
2584}
2585
2586#[allow(dead_code)]
2587fn partial_sorting_scan_left_to_right_16u(
2588 t: &[u16],
2589 sa: &mut [SaSint],
2590 buckets: &mut [SaSint],
2591 mut d: SaSint,
2592 omp_block_start: SaSint,
2593 omp_block_size: SaSint,
2594) -> SaSint {
2595 let mut i = omp_block_start as isize;
2596 let mut j = (omp_block_start + omp_block_size - 64 - 1) as isize;
2597 while i < j {
2598 let mut p0 = sa[i as usize];
2599 d += SaSint::from(p0 < 0);
2600 p0 &= SAINT_MAX;
2601 let v0 = buckets_index2(
2602 t[(p0 - 1) as usize] as usize,
2603 usize::from(t[(p0 - 2) as usize] >= t[(p0 - 1) as usize]),
2604 );
2605 let mark0 = if buckets[2 * ALPHABET_SIZE + v0] != d {
2606 SAINT_MIN
2607 } else {
2608 0
2609 };
2610 let dst0 = buckets[4 * ALPHABET_SIZE + v0] as usize;
2611 sa[dst0] = (p0 - 1) | mark0;
2612 buckets[4 * ALPHABET_SIZE + v0] += 1;
2613 buckets[2 * ALPHABET_SIZE + v0] = d;
2614
2615 let mut p1 = sa[(i + 1) as usize];
2616 d += SaSint::from(p1 < 0);
2617 p1 &= SAINT_MAX;
2618 let v1 = buckets_index2(
2619 t[(p1 - 1) as usize] as usize,
2620 usize::from(t[(p1 - 2) as usize] >= t[(p1 - 1) as usize]),
2621 );
2622 let mark1 = if buckets[2 * ALPHABET_SIZE + v1] != d {
2623 SAINT_MIN
2624 } else {
2625 0
2626 };
2627 let dst1 = buckets[4 * ALPHABET_SIZE + v1] as usize;
2628 sa[dst1] = (p1 - 1) | mark1;
2629 buckets[4 * ALPHABET_SIZE + v1] += 1;
2630 buckets[2 * ALPHABET_SIZE + v1] = d;
2631
2632 i += 2;
2633 }
2634
2635 j += 64 + 1;
2636 while i < j {
2637 let mut p = sa[i as usize];
2638 d += SaSint::from(p < 0);
2639 p &= SAINT_MAX;
2640 let v = buckets_index2(
2641 t[(p - 1) as usize] as usize,
2642 usize::from(t[(p - 2) as usize] >= t[(p - 1) as usize]),
2643 );
2644 let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
2645 SAINT_MIN
2646 } else {
2647 0
2648 };
2649 let dst = buckets[4 * ALPHABET_SIZE + v] as usize;
2650 sa[dst] = (p - 1) | mark;
2651 buckets[4 * ALPHABET_SIZE + v] += 1;
2652 buckets[2 * ALPHABET_SIZE + v] = d;
2653 i += 1;
2654 }
2655
2656 d
2657}
2658
2659#[allow(dead_code)]
2660fn partial_sorting_scan_left_to_right_16u_block_prepare(
2661 t: &[u16],
2662 sa: &mut [SaSint],
2663 k: SaSint,
2664 buckets: &mut [SaSint],
2665 cache: &mut [ThreadCache],
2666 omp_block_start: SaSint,
2667 omp_block_size: SaSint,
2668 state: &mut ThreadState,
2669) -> SaSint {
2670 let width = 2 * k as usize;
2671 buckets[..width].fill(0);
2672 buckets[2 * ALPHABET_SIZE..2 * ALPHABET_SIZE + width].fill(0);
2673
2674 let mut count = 0usize;
2675 let mut d = 1;
2676 for i in omp_block_start as usize..(omp_block_start + omp_block_size) as usize {
2677 let mut p = sa[i];
2678 cache[count].index = p;
2679 d += SaSint::from(p < 0);
2680 p &= SAINT_MAX;
2681 let v = buckets_index2(
2682 t[(p - 1) as usize] as usize,
2683 usize::from(t[(p - 2) as usize] >= t[(p - 1) as usize]),
2684 );
2685 cache[count].symbol = v as SaSint;
2686 buckets[v] += 1;
2687 buckets[2 * ALPHABET_SIZE + v] = d;
2688 count += 1;
2689 }
2690 state.cache_entries = count;
2691 d - 1
2692}
2693
2694#[allow(dead_code)]
2695fn partial_sorting_scan_left_to_right_16u_block_place(
2696 sa: &mut [SaSint],
2697 buckets: &mut [SaSint],
2698 cache: &[ThreadCache],
2699 count: SaSint,
2700 mut d: SaSint,
2701) {
2702 for entry in cache.iter().take(count as usize) {
2703 let mut p = entry.index;
2704 d += SaSint::from(p < 0);
2705 p &= SAINT_MAX;
2706 let v = entry.symbol as usize;
2707 let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
2708 SAINT_MIN
2709 } else {
2710 0
2711 };
2712 let dst = buckets[v] as usize;
2713 sa[dst] = (p - 1) | mark;
2714 buckets[v] += 1;
2715 buckets[2 * ALPHABET_SIZE + v] = d;
2716 }
2717}
2718
2719#[allow(dead_code)]
2720fn partial_sorting_scan_left_to_right_16u_block_omp(
2721 t: &[u16],
2722 sa: &mut [SaSint],
2723 k: SaSint,
2724 buckets: &mut [SaSint],
2725 d: SaSint,
2726 block_start: SaSint,
2727 block_size: SaSint,
2728 threads: SaSint,
2729 thread_state: &mut [ThreadState],
2730) -> SaSint {
2731 let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
2732 usize::try_from(threads)
2733 .expect("threads must be non-negative")
2734 .min(thread_state.len())
2735 } else {
2736 1
2737 };
2738 if thread_count <= 1 {
2739 return partial_sorting_scan_left_to_right_16u(t, sa, buckets, d, block_start, block_size);
2740 }
2741
2742 let bucket_width = 2 * k as usize;
2743 let block_stride = (block_size / thread_count as SaSint) & !15;
2744
2745 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
2746 let local_start = thread as SaSint * block_stride;
2747 let local_size = if thread + 1 < thread_count {
2748 block_stride
2749 } else {
2750 block_size - local_start
2751 };
2752 let mut local_state = ThreadState::default();
2753 state.position = partial_sorting_scan_left_to_right_16u_block_prepare(
2754 t,
2755 sa,
2756 k,
2757 &mut state.buckets,
2758 &mut state.cache,
2759 block_start + local_start,
2760 local_size,
2761 &mut local_state,
2762 );
2763 state.count = local_state.cache_entries as SaSint;
2764 }
2765
2766 let mut next_d = d;
2767 for state in thread_state.iter_mut().take(thread_count) {
2768 for c in 0..bucket_width {
2769 let a = buckets[4 * ALPHABET_SIZE + c];
2770 let b = state.buckets[c];
2771 buckets[4 * ALPHABET_SIZE + c] = a + b;
2772 state.buckets[c] = a;
2773 }
2774
2775 next_d -= 1;
2776 for c in 0..bucket_width {
2777 let a = buckets[2 * ALPHABET_SIZE + c];
2778 let b = state.buckets[2 * ALPHABET_SIZE + c];
2779 let shifted = b + next_d;
2780 buckets[2 * ALPHABET_SIZE + c] = if b > 0 { shifted } else { a };
2781 state.buckets[2 * ALPHABET_SIZE + c] = a;
2782 }
2783 next_d += 1 + state.position;
2784 state.position = next_d - state.position;
2785 }
2786
2787 for state in thread_state.iter_mut().take(thread_count) {
2788 partial_sorting_scan_left_to_right_16u_block_place(
2789 sa,
2790 &mut state.buckets,
2791 &state.cache,
2792 state.count,
2793 state.position,
2794 );
2795 }
2796
2797 next_d
2798}
2799
2800#[allow(dead_code)]
2801fn partial_sorting_scan_left_to_right_16u_omp(
2802 t: &[u16],
2803 sa: &mut [SaSint],
2804 n: SaSint,
2805 k: SaSint,
2806 buckets: &mut [SaSint],
2807 left_suffixes_count: SaSint,
2808 mut d: SaSint,
2809 threads: SaSint,
2810) -> SaSint {
2811 let v = buckets_index2(
2812 t[(n - 1) as usize] as usize,
2813 usize::from(t[(n - 2) as usize] >= t[(n - 1) as usize]),
2814 );
2815 let dst = buckets[4 * ALPHABET_SIZE + v] as usize;
2816 buckets[4 * ALPHABET_SIZE + v] += 1;
2817 sa[dst] = (n - 1) | SAINT_MIN;
2818 d += 1;
2819 buckets[2 * ALPHABET_SIZE + v] = d;
2820
2821 if threads == 1 || left_suffixes_count < 65536 {
2822 d = partial_sorting_scan_left_to_right_16u(t, sa, buckets, d, 0, left_suffixes_count);
2823 } else {
2824 let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
2825 let mut block_start = 0;
2826 while block_start < left_suffixes_count {
2827 if sa[block_start as usize] == 0 {
2828 block_start += 1;
2829 } else {
2830 let mut block_end =
2831 block_start + threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
2832 if block_end > left_suffixes_count {
2833 block_end = left_suffixes_count;
2834 }
2835 let mut block_scan_end = block_start + 1;
2836 while block_scan_end < block_end && sa[block_scan_end as usize] != 0 {
2837 block_scan_end += 1;
2838 }
2839 let block_size = block_scan_end - block_start;
2840
2841 if block_size < 32 {
2842 while block_start < block_scan_end {
2843 let mut p = sa[block_start as usize];
2844 d += SaSint::from(p < 0);
2845 p &= SAINT_MAX;
2846 let v = buckets_index2(
2847 t[(p - 1) as usize] as usize,
2848 usize::from(t[(p - 2) as usize] >= t[(p - 1) as usize]),
2849 );
2850 let dst = buckets[4 * ALPHABET_SIZE + v] as usize;
2851 buckets[4 * ALPHABET_SIZE + v] += 1;
2852 let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
2853 SAINT_MIN
2854 } else {
2855 0
2856 };
2857 sa[dst] = (p - 1) | mark;
2858 buckets[2 * ALPHABET_SIZE + v] = d;
2859 block_start += 1;
2860 }
2861 } else {
2862 d = partial_sorting_scan_left_to_right_16u_block_omp(
2863 t,
2864 sa,
2865 k,
2866 buckets,
2867 d,
2868 block_start,
2869 block_size,
2870 threads,
2871 &mut thread_state,
2872 );
2873 block_start = block_scan_end;
2874 }
2875 }
2876 }
2877 }
2878 d
2879}
2880
2881#[allow(dead_code)]
2882fn partial_sorting_scan_right_to_left_16u(
2883 t: &[u16],
2884 sa: &mut [SaSint],
2885 buckets: &mut [SaSint],
2886 mut d: SaSint,
2887 omp_block_start: SaSint,
2888 omp_block_size: SaSint,
2889) -> SaSint {
2890 let mut i = (omp_block_start + omp_block_size - 1) as isize;
2891 let mut j = (omp_block_start + 64 + 1) as isize;
2892 while i >= j {
2893 let mut p0 = sa[i as usize];
2894 d += SaSint::from(p0 < 0);
2895 p0 &= SAINT_MAX;
2896 let v0 = buckets_index2(
2897 t[(p0 - 1) as usize] as usize,
2898 usize::from(t[(p0 - 2) as usize] > t[(p0 - 1) as usize]),
2899 );
2900 let mark0 = if buckets[2 * ALPHABET_SIZE + v0] != d {
2901 SAINT_MIN
2902 } else {
2903 0
2904 };
2905 buckets[v0] -= 1;
2906 sa[buckets[v0] as usize] = (p0 - 1) | mark0;
2907 buckets[2 * ALPHABET_SIZE + v0] = d;
2908
2909 let mut p1 = sa[(i - 1) as usize];
2910 d += SaSint::from(p1 < 0);
2911 p1 &= SAINT_MAX;
2912 let v1 = buckets_index2(
2913 t[(p1 - 1) as usize] as usize,
2914 usize::from(t[(p1 - 2) as usize] > t[(p1 - 1) as usize]),
2915 );
2916 let mark1 = if buckets[2 * ALPHABET_SIZE + v1] != d {
2917 SAINT_MIN
2918 } else {
2919 0
2920 };
2921 buckets[v1] -= 1;
2922 sa[buckets[v1] as usize] = (p1 - 1) | mark1;
2923 buckets[2 * ALPHABET_SIZE + v1] = d;
2924
2925 i -= 2;
2926 }
2927
2928 j -= 64 + 1;
2929 while i >= j {
2930 let mut p = sa[i as usize];
2931 d += SaSint::from(p < 0);
2932 p &= SAINT_MAX;
2933 let v = buckets_index2(
2934 t[(p - 1) as usize] as usize,
2935 usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
2936 );
2937 let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
2938 SAINT_MIN
2939 } else {
2940 0
2941 };
2942 buckets[v] -= 1;
2943 sa[buckets[v] as usize] = (p - 1) | mark;
2944 buckets[2 * ALPHABET_SIZE + v] = d;
2945 i -= 1;
2946 }
2947
2948 d
2949}
2950
2951#[allow(dead_code)]
2952fn partial_sorting_scan_right_to_left_16u_block_prepare(
2953 t: &[u16],
2954 sa: &mut [SaSint],
2955 k: SaSint,
2956 buckets: &mut [SaSint],
2957 cache: &mut [ThreadCache],
2958 omp_block_start: SaSint,
2959 omp_block_size: SaSint,
2960 state: &mut ThreadState,
2961) -> SaSint {
2962 let width = 2 * k as usize;
2963 buckets[..width].fill(0);
2964 buckets[2 * ALPHABET_SIZE..2 * ALPHABET_SIZE + width].fill(0);
2965
2966 let mut count = 0usize;
2967 let mut d = 1;
2968 for i in (omp_block_start as usize..(omp_block_start + omp_block_size) as usize).rev() {
2969 let mut p = sa[i];
2970 cache[count].index = p;
2971 d += SaSint::from(p < 0);
2972 p &= SAINT_MAX;
2973 let v = buckets_index2(
2974 t[(p - 1) as usize] as usize,
2975 usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
2976 );
2977 cache[count].symbol = v as SaSint;
2978 buckets[v] += 1;
2979 buckets[2 * ALPHABET_SIZE + v] = d;
2980 count += 1;
2981 }
2982 state.cache_entries = count;
2983 d - 1
2984}
2985
2986#[allow(dead_code)]
2987fn partial_sorting_scan_right_to_left_16u_block_place(
2988 sa: &mut [SaSint],
2989 buckets: &mut [SaSint],
2990 cache: &[ThreadCache],
2991 count: SaSint,
2992 mut d: SaSint,
2993) {
2994 for entry in cache.iter().take(count as usize) {
2995 let mut p = entry.index;
2996 d += SaSint::from(p < 0);
2997 p &= SAINT_MAX;
2998 let v = entry.symbol as usize;
2999 let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
3000 SAINT_MIN
3001 } else {
3002 0
3003 };
3004 buckets[v] -= 1;
3005 sa[buckets[v] as usize] = (p - 1) | mark;
3006 buckets[2 * ALPHABET_SIZE + v] = d;
3007 }
3008}
3009
3010#[allow(dead_code)]
3011fn partial_gsa_scan_right_to_left_16u_block_place(
3012 sa: &mut [SaSint],
3013 buckets: &mut [SaSint],
3014 cache: &[ThreadCache],
3015 count: SaSint,
3016 mut d: SaSint,
3017) {
3018 for entry in cache.iter().take(count as usize) {
3019 let mut p = entry.index;
3020 d += SaSint::from(p < 0);
3021 p &= SAINT_MAX;
3022 let v = entry.symbol as usize;
3023 if v != 1 {
3024 let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
3025 SAINT_MIN
3026 } else {
3027 0
3028 };
3029 buckets[v] -= 1;
3030 sa[buckets[v] as usize] = (p - 1) | mark;
3031 buckets[2 * ALPHABET_SIZE + v] = d;
3032 }
3033 }
3034}
3035
3036#[allow(dead_code)]
3037fn partial_sorting_scan_right_to_left_16u_block_omp(
3038 t: &[u16],
3039 sa: &mut [SaSint],
3040 k: SaSint,
3041 buckets: &mut [SaSint],
3042 d: SaSint,
3043 block_start: SaSint,
3044 block_size: SaSint,
3045 threads: SaSint,
3046 thread_state: &mut [ThreadState],
3047) -> SaSint {
3048 let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
3049 usize::try_from(threads)
3050 .expect("threads must be non-negative")
3051 .min(thread_state.len())
3052 } else {
3053 1
3054 };
3055 if thread_count <= 1 {
3056 return partial_sorting_scan_right_to_left_16u(t, sa, buckets, d, block_start, block_size);
3057 }
3058
3059 let width = 2 * k as usize;
3060 let distinct_offset = 2 * ALPHABET_SIZE;
3061 let block_stride = (block_size / thread_count as SaSint) & !15;
3062
3063 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
3064 let local_start = thread as SaSint * block_stride;
3065 let local_size = if thread + 1 < thread_count {
3066 block_stride
3067 } else {
3068 block_size - local_start
3069 };
3070 let mut local_state = ThreadState::default();
3071 state.position = partial_sorting_scan_right_to_left_16u_block_prepare(
3072 t,
3073 sa,
3074 k,
3075 &mut state.buckets,
3076 &mut state.cache,
3077 block_start + local_start,
3078 local_size,
3079 &mut local_state,
3080 );
3081 state.count = local_state.cache_entries as SaSint;
3082 }
3083
3084 let mut next_d = d;
3085 for state in thread_state.iter_mut().take(thread_count).rev() {
3086 for c in 0..width {
3087 let a = buckets[c];
3088 let b = state.buckets[c];
3089 buckets[c] = a - b;
3090 state.buckets[c] = a;
3091 }
3092
3093 next_d -= 1;
3094 for c in 0..width {
3095 let offset = distinct_offset + c;
3096 let a = buckets[offset];
3097 let b = state.buckets[offset];
3098 let shifted = b + next_d;
3099 buckets[offset] = if b > 0 { shifted } else { a };
3100 state.buckets[offset] = a;
3101 }
3102 next_d += 1 + state.position;
3103 state.position = next_d - state.position;
3104 }
3105
3106 for state in thread_state.iter_mut().take(thread_count) {
3107 partial_sorting_scan_right_to_left_16u_block_place(
3108 sa,
3109 &mut state.buckets,
3110 &state.cache,
3111 state.count,
3112 state.position,
3113 );
3114 }
3115
3116 next_d
3117}
3118
3119#[allow(dead_code)]
3120fn partial_sorting_scan_right_to_left_16u_omp(
3121 t: &[u16],
3122 sa: &mut [SaSint],
3123 n: SaSint,
3124 k: SaSint,
3125 buckets: &mut [SaSint],
3126 first_lms_suffix: SaSint,
3127 left_suffixes_count: SaSint,
3128 d: SaSint,
3129 threads: SaSint,
3130) {
3131 let scan_start = left_suffixes_count + 1;
3132 let scan_end = n - first_lms_suffix;
3133
3134 if threads == 1 || scan_end - scan_start < 65536 {
3135 partial_sorting_scan_right_to_left_16u(
3136 t,
3137 sa,
3138 buckets,
3139 d,
3140 scan_start,
3141 scan_end - scan_start,
3142 );
3143 } else {
3144 let mut d = d;
3145 let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
3146 let mut block_start = scan_end - 1;
3147 while block_start >= scan_start {
3148 if sa[block_start as usize] == 0 {
3149 block_start -= 1;
3150 } else {
3151 let block_limit = threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
3152 let mut block_max_end = block_start - block_limit;
3153 if block_max_end < scan_start {
3154 block_max_end = scan_start - 1;
3155 }
3156 let mut block_end = block_start - 1;
3157 while block_end > block_max_end && sa[block_end as usize] != 0 {
3158 block_end -= 1;
3159 }
3160 let block_size = block_start - block_end;
3161
3162 if block_size < 32 {
3163 while block_start > block_end {
3164 let mut p = sa[block_start as usize];
3165 d += SaSint::from(p < 0);
3166 p &= SAINT_MAX;
3167 let v = buckets_index2(
3168 t[(p - 1) as usize] as usize,
3169 usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
3170 );
3171 let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
3172 SAINT_MIN
3173 } else {
3174 0
3175 };
3176 buckets[v] -= 1;
3177 sa[buckets[v] as usize] = (p - 1) | mark;
3178 buckets[2 * ALPHABET_SIZE + v] = d;
3179 block_start -= 1;
3180 }
3181 } else {
3182 d = partial_sorting_scan_right_to_left_16u_block_omp(
3183 t,
3184 sa,
3185 k,
3186 buckets,
3187 d,
3188 block_end + 1,
3189 block_size,
3190 threads,
3191 &mut thread_state,
3192 );
3193 block_start = block_end;
3194 }
3195 }
3196 }
3197 }
3198}
3199
3200#[allow(dead_code)]
3201fn partial_sorting_scan_left_to_right_32s_6k(
3202 t: &[SaSint],
3203 sa: &mut [SaSint],
3204 buckets: &mut [SaSint],
3205 mut d: SaSint,
3206 omp_block_start: SaSint,
3207 omp_block_size: SaSint,
3208) -> SaSint {
3209 let mut i = omp_block_start;
3210 let mut j = omp_block_start + omp_block_size - 2 * 64 - 1;
3211
3212 while i < j {
3213 let mut p2 = sa[i as usize];
3214 d += SaSint::from(p2 < 0);
3215 p2 &= SAINT_MAX;
3216 let v2 = buckets_index4(
3217 t[(p2 - 1) as usize] as usize,
3218 usize::from(t[(p2 - 2) as usize] >= t[(p2 - 1) as usize]),
3219 );
3220 let pos2 = buckets[v2] as usize;
3221 buckets[v2] += 1;
3222 sa[pos2] = (p2 - 1) | (((buckets[2 + v2] != d) as SaSint) << (SAINT_BIT - 1));
3223 buckets[2 + v2] = d;
3224
3225 let mut p3 = sa[(i + 1) as usize];
3226 d += SaSint::from(p3 < 0);
3227 p3 &= SAINT_MAX;
3228 let v3 = buckets_index4(
3229 t[(p3 - 1) as usize] as usize,
3230 usize::from(t[(p3 - 2) as usize] >= t[(p3 - 1) as usize]),
3231 );
3232 let pos3 = buckets[v3] as usize;
3233 buckets[v3] += 1;
3234 sa[pos3] = (p3 - 1) | (((buckets[2 + v3] != d) as SaSint) << (SAINT_BIT - 1));
3235 buckets[2 + v3] = d;
3236
3237 i += 2;
3238 }
3239
3240 j += 2 * 64 + 1;
3241 while i < j {
3242 let mut p = sa[i as usize];
3243 d += SaSint::from(p < 0);
3244 p &= SAINT_MAX;
3245 let v = buckets_index4(
3246 t[(p - 1) as usize] as usize,
3247 usize::from(t[(p - 2) as usize] >= t[(p - 1) as usize]),
3248 );
3249 let pos = buckets[v] as usize;
3250 buckets[v] += 1;
3251 sa[pos] = (p - 1) | (((buckets[2 + v] != d) as SaSint) << (SAINT_BIT - 1));
3252 buckets[2 + v] = d;
3253 i += 1;
3254 }
3255
3256 d
3257}
3258
3259#[allow(dead_code)]
3260fn partial_sorting_scan_left_to_right_32s_4k(
3261 t: &[SaSint],
3262 sa: &mut [SaSint],
3263 k: SaSint,
3264 buckets: &mut [SaSint],
3265 mut d: SaSint,
3266 omp_block_start: SaSint,
3267 omp_block_size: SaSint,
3268) -> SaSint {
3269 let k = k as usize;
3270 let mut i = omp_block_start;
3271 let mut j = omp_block_start + omp_block_size - 2 * 64 - 1;
3272
3273 while i < j {
3274 let mut p0 = sa[i as usize];
3275 sa[i as usize] = p0 & SAINT_MAX;
3276 if p0 > 0 {
3277 sa[i as usize] = 0;
3278 d += p0 >> (SUFFIX_GROUP_BIT - 1);
3279 p0 &= !SUFFIX_GROUP_MARKER;
3280 let v0 = buckets_index2(
3281 t[(p0 - 1) as usize] as usize,
3282 usize::from(t[(p0 - 2) as usize] < t[(p0 - 1) as usize]),
3283 );
3284 let c0 = t[(p0 - 1) as usize] as usize;
3285 let pos0 = buckets[2 * k + c0] as usize;
3286 buckets[2 * k + c0] += 1;
3287 sa[pos0] = (p0 - 1)
3288 | ((usize::from(t[(p0 - 2) as usize] < t[(p0 - 1) as usize]) as SaSint)
3289 << (SAINT_BIT - 1))
3290 | (((buckets[v0] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3291 buckets[v0] = d;
3292 }
3293
3294 let mut p1 = sa[(i + 1) as usize];
3295 sa[(i + 1) as usize] = p1 & SAINT_MAX;
3296 if p1 > 0 {
3297 sa[(i + 1) as usize] = 0;
3298 d += p1 >> (SUFFIX_GROUP_BIT - 1);
3299 p1 &= !SUFFIX_GROUP_MARKER;
3300 let v1 = buckets_index2(
3301 t[(p1 - 1) as usize] as usize,
3302 usize::from(t[(p1 - 2) as usize] < t[(p1 - 1) as usize]),
3303 );
3304 let c1 = t[(p1 - 1) as usize] as usize;
3305 let pos1 = buckets[2 * k + c1] as usize;
3306 buckets[2 * k + c1] += 1;
3307 sa[pos1] = (p1 - 1)
3308 | ((usize::from(t[(p1 - 2) as usize] < t[(p1 - 1) as usize]) as SaSint)
3309 << (SAINT_BIT - 1))
3310 | (((buckets[v1] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3311 buckets[v1] = d;
3312 }
3313
3314 i += 2;
3315 }
3316
3317 j += 2 * 64 + 1;
3318 while i < j {
3319 let mut p = sa[i as usize];
3320 sa[i as usize] = p & SAINT_MAX;
3321 if p > 0 {
3322 sa[i as usize] = 0;
3323 d += p >> (SUFFIX_GROUP_BIT - 1);
3324 p &= !SUFFIX_GROUP_MARKER;
3325 let v = buckets_index2(
3326 t[(p - 1) as usize] as usize,
3327 usize::from(t[(p - 2) as usize] < t[(p - 1) as usize]),
3328 );
3329 let c = t[(p - 1) as usize] as usize;
3330 let pos = buckets[2 * k + c] as usize;
3331 buckets[2 * k + c] += 1;
3332 sa[pos] = (p - 1)
3333 | ((usize::from(t[(p - 2) as usize] < t[(p - 1) as usize]) as SaSint)
3334 << (SAINT_BIT - 1))
3335 | (((buckets[v] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3336 buckets[v] = d;
3337 }
3338 i += 1;
3339 }
3340
3341 d
3342}
3343
3344#[allow(dead_code)]
3345fn partial_sorting_scan_left_to_right_32s_1k(
3346 t: &[SaSint],
3347 sa: &mut [SaSint],
3348 induction_bucket: &mut [SaSint],
3349 omp_block_start: SaSint,
3350 omp_block_size: SaSint,
3351) {
3352 let mut i = omp_block_start;
3353 let mut j = omp_block_start + omp_block_size - 2 * 64 - 1;
3354
3355 while i < j {
3356 let p0 = sa[i as usize];
3357 sa[i as usize] = p0 & SAINT_MAX;
3358 if p0 > 0 {
3359 sa[i as usize] = 0;
3360 let c0 = t[(p0 - 1) as usize] as usize;
3361 let pos0 = induction_bucket[c0] as usize;
3362 induction_bucket[c0] += 1;
3363 sa[pos0] = (p0 - 1)
3364 | ((usize::from(t[(p0 - 2) as usize] < t[(p0 - 1) as usize]) as SaSint)
3365 << (SAINT_BIT - 1));
3366 }
3367
3368 let p1 = sa[(i + 1) as usize];
3369 sa[(i + 1) as usize] = p1 & SAINT_MAX;
3370 if p1 > 0 {
3371 sa[(i + 1) as usize] = 0;
3372 let c1 = t[(p1 - 1) as usize] as usize;
3373 let pos1 = induction_bucket[c1] as usize;
3374 induction_bucket[c1] += 1;
3375 sa[pos1] = (p1 - 1)
3376 | ((usize::from(t[(p1 - 2) as usize] < t[(p1 - 1) as usize]) as SaSint)
3377 << (SAINT_BIT - 1));
3378 }
3379
3380 i += 2;
3381 }
3382
3383 j += 2 * 64 + 1;
3384 while i < j {
3385 let p = sa[i as usize];
3386 sa[i as usize] = p & SAINT_MAX;
3387 if p > 0 {
3388 sa[i as usize] = 0;
3389 let c = t[(p - 1) as usize] as usize;
3390 let pos = induction_bucket[c] as usize;
3391 induction_bucket[c] += 1;
3392 sa[pos] = (p - 1)
3393 | ((usize::from(t[(p - 2) as usize] < t[(p - 1) as usize]) as SaSint)
3394 << (SAINT_BIT - 1));
3395 }
3396 i += 1;
3397 }
3398}
3399
3400#[allow(dead_code)]
3401fn partial_sorting_scan_left_to_right_32s_6k_omp(
3402 t: &[SaSint],
3403 sa: &mut [SaSint],
3404 n: SaSint,
3405 buckets: &mut [SaSint],
3406 left_suffixes_count: SaSint,
3407 mut d: SaSint,
3408 threads: SaSint,
3409 _thread_state: &mut [ThreadState],
3410) -> SaSint {
3411 let v = buckets_index4(
3412 t[(n - 1) as usize] as usize,
3413 usize::from(t[(n - 2) as usize] >= t[(n - 1) as usize]),
3414 );
3415 let pos = buckets[v] as usize;
3416 buckets[v] += 1;
3417 sa[pos] = (n - 1) | SAINT_MIN;
3418 d += 1;
3419 buckets[2 + v] = d;
3420
3421 if threads == 1 || left_suffixes_count < 65536 {
3422 d = partial_sorting_scan_left_to_right_32s_6k(t, sa, buckets, d, 0, left_suffixes_count);
3423 } else {
3424 let mut cache = vec![ThreadCache::default(); left_suffixes_count as usize];
3425 let mut block_start = 0;
3426 while block_start < left_suffixes_count {
3427 let mut block_end = block_start + threads * PER_THREAD_CACHE_SIZE as SaSint;
3428 if block_end > left_suffixes_count {
3429 block_end = left_suffixes_count;
3430 }
3431 d = partial_sorting_scan_left_to_right_32s_6k_block_omp(
3432 t,
3433 sa,
3434 buckets,
3435 d,
3436 &mut cache,
3437 block_start,
3438 block_end - block_start,
3439 threads,
3440 );
3441 block_start = block_end;
3442 }
3443 }
3444
3445 d
3446}
3447
3448#[allow(dead_code)]
3449fn partial_sorting_scan_left_to_right_32s_4k_omp(
3450 t: &[SaSint],
3451 sa: &mut [SaSint],
3452 n: SaSint,
3453 k: SaSint,
3454 buckets: &mut [SaSint],
3455 mut d: SaSint,
3456 threads: SaSint,
3457 _thread_state: &mut [ThreadState],
3458) -> SaSint {
3459 let k_usize = k as usize;
3460 let pos = buckets[2 * k_usize + t[(n - 1) as usize] as usize] as usize;
3461 buckets[2 * k_usize + t[(n - 1) as usize] as usize] += 1;
3462 sa[pos] = (n - 1)
3463 | ((usize::from(t[(n - 2) as usize] < t[(n - 1) as usize]) as SaSint) << (SAINT_BIT - 1))
3464 | SUFFIX_GROUP_MARKER;
3465 d += 1;
3466 buckets[buckets_index2(
3467 t[(n - 1) as usize] as usize,
3468 usize::from(t[(n - 2) as usize] < t[(n - 1) as usize]),
3469 )] = d;
3470
3471 if threads == 1 || n < 65536 {
3472 d = partial_sorting_scan_left_to_right_32s_4k(t, sa, k, buckets, d, 0, n);
3473 } else {
3474 let mut cache = vec![ThreadCache::default(); n as usize];
3475 let mut block_start = 0;
3476 while block_start < n {
3477 let mut block_end = block_start + threads * PER_THREAD_CACHE_SIZE as SaSint;
3478 if block_end > n {
3479 block_end = n;
3480 }
3481 d = partial_sorting_scan_left_to_right_32s_4k_block_omp(
3482 t,
3483 sa,
3484 k,
3485 buckets,
3486 d,
3487 &mut cache,
3488 block_start,
3489 block_end - block_start,
3490 threads,
3491 );
3492 block_start = block_end;
3493 }
3494 }
3495
3496 d
3497}
3498
3499#[allow(dead_code)]
3500fn partial_sorting_scan_left_to_right_32s_1k_omp(
3501 t: &[SaSint],
3502 sa: &mut [SaSint],
3503 n: SaSint,
3504 buckets: &mut [SaSint],
3505 threads: SaSint,
3506 _thread_state: &mut [ThreadState],
3507) {
3508 let pos = buckets[t[(n - 1) as usize] as usize] as usize;
3509 buckets[t[(n - 1) as usize] as usize] += 1;
3510 sa[pos] = (n - 1)
3511 | ((usize::from(t[(n - 2) as usize] < t[(n - 1) as usize]) as SaSint) << (SAINT_BIT - 1));
3512
3513 if threads == 1 || n < 65536 {
3514 partial_sorting_scan_left_to_right_32s_1k(t, sa, buckets, 0, n);
3515 } else {
3516 let mut cache = vec![ThreadCache::default(); n as usize];
3517 let mut block_start = 0;
3518 while block_start < n {
3519 let mut block_end = block_start + threads * PER_THREAD_CACHE_SIZE as SaSint;
3520 if block_end > n {
3521 block_end = n;
3522 }
3523 partial_sorting_scan_left_to_right_32s_1k_block_omp(
3524 t,
3525 sa,
3526 buckets,
3527 &mut cache,
3528 block_start,
3529 block_end - block_start,
3530 threads,
3531 );
3532 block_start = block_end;
3533 }
3534 }
3535}
3536
3537#[allow(dead_code)]
3538fn partial_sorting_scan_right_to_left_32s_6k(
3539 t: &[SaSint],
3540 sa: &mut [SaSint],
3541 buckets: &mut [SaSint],
3542 mut d: SaSint,
3543 omp_block_start: SaSint,
3544 omp_block_size: SaSint,
3545) -> SaSint {
3546 if omp_block_size <= 0 {
3547 return d;
3548 }
3549
3550 let mut i = omp_block_start + omp_block_size - 1;
3551 let mut j = omp_block_start + 2 * 64 + 1;
3552
3553 while i >= j {
3554 let mut p2 = sa[i as usize];
3555 d += SaSint::from(p2 < 0);
3556 p2 &= SAINT_MAX;
3557 let v2 = buckets_index4(
3558 t[(p2 - 1) as usize] as usize,
3559 usize::from(t[(p2 - 2) as usize] > t[(p2 - 1) as usize]),
3560 );
3561 buckets[v2] -= 1;
3562 sa[buckets[v2] as usize] =
3563 (p2 - 1) | (((buckets[2 + v2] != d) as SaSint) << (SAINT_BIT - 1));
3564 buckets[2 + v2] = d;
3565
3566 let mut p3 = sa[(i - 1) as usize];
3567 d += SaSint::from(p3 < 0);
3568 p3 &= SAINT_MAX;
3569 let v3 = buckets_index4(
3570 t[(p3 - 1) as usize] as usize,
3571 usize::from(t[(p3 - 2) as usize] > t[(p3 - 1) as usize]),
3572 );
3573 buckets[v3] -= 1;
3574 sa[buckets[v3] as usize] =
3575 (p3 - 1) | (((buckets[2 + v3] != d) as SaSint) << (SAINT_BIT - 1));
3576 buckets[2 + v3] = d;
3577
3578 i -= 2;
3579 }
3580
3581 j -= 2 * 64 + 1;
3582 while i >= j {
3583 let mut p = sa[i as usize];
3584 d += SaSint::from(p < 0);
3585 p &= SAINT_MAX;
3586 let v = buckets_index4(
3587 t[(p - 1) as usize] as usize,
3588 usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
3589 );
3590 buckets[v] -= 1;
3591 sa[buckets[v] as usize] = (p - 1) | (((buckets[2 + v] != d) as SaSint) << (SAINT_BIT - 1));
3592 buckets[2 + v] = d;
3593 i -= 1;
3594 }
3595
3596 d
3597}
3598
3599#[allow(dead_code)]
3600fn partial_sorting_scan_right_to_left_32s_4k(
3601 t: &[SaSint],
3602 sa: &mut [SaSint],
3603 k: SaSint,
3604 buckets: &mut [SaSint],
3605 mut d: SaSint,
3606 omp_block_start: SaSint,
3607 omp_block_size: SaSint,
3608) -> SaSint {
3609 if omp_block_size <= 0 {
3610 return d;
3611 }
3612
3613 let k = k as usize;
3614 let mut i = omp_block_start + omp_block_size - 1;
3615 let mut j = omp_block_start + 2 * 64 + 1;
3616
3617 while i >= j {
3618 let mut p0 = sa[i as usize];
3619 if p0 > 0 {
3620 sa[i as usize] = 0;
3621 d += p0 >> (SUFFIX_GROUP_BIT - 1);
3622 p0 &= !SUFFIX_GROUP_MARKER;
3623 let v0 = buckets_index2(
3624 t[(p0 - 1) as usize] as usize,
3625 usize::from(t[(p0 - 2) as usize] > t[(p0 - 1) as usize]),
3626 );
3627 let c0 = t[(p0 - 1) as usize] as usize;
3628 buckets[3 * k + c0] -= 1;
3629 sa[buckets[3 * k + c0] as usize] = (p0 - 1)
3630 | ((usize::from(t[(p0 - 2) as usize] > t[(p0 - 1) as usize]) as SaSint)
3631 << (SAINT_BIT - 1))
3632 | (((buckets[v0] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3633 buckets[v0] = d;
3634 }
3635
3636 let mut p1 = sa[(i - 1) as usize];
3637 if p1 > 0 {
3638 sa[(i - 1) as usize] = 0;
3639 d += p1 >> (SUFFIX_GROUP_BIT - 1);
3640 p1 &= !SUFFIX_GROUP_MARKER;
3641 let v1 = buckets_index2(
3642 t[(p1 - 1) as usize] as usize,
3643 usize::from(t[(p1 - 2) as usize] > t[(p1 - 1) as usize]),
3644 );
3645 let c1 = t[(p1 - 1) as usize] as usize;
3646 buckets[3 * k + c1] -= 1;
3647 sa[buckets[3 * k + c1] as usize] = (p1 - 1)
3648 | ((usize::from(t[(p1 - 2) as usize] > t[(p1 - 1) as usize]) as SaSint)
3649 << (SAINT_BIT - 1))
3650 | (((buckets[v1] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3651 buckets[v1] = d;
3652 }
3653
3654 i -= 2;
3655 }
3656
3657 j -= 2 * 64 + 1;
3658 while i >= j {
3659 let mut p = sa[i as usize];
3660 if p > 0 {
3661 sa[i as usize] = 0;
3662 d += p >> (SUFFIX_GROUP_BIT - 1);
3663 p &= !SUFFIX_GROUP_MARKER;
3664 let v = buckets_index2(
3665 t[(p - 1) as usize] as usize,
3666 usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
3667 );
3668 let c = t[(p - 1) as usize] as usize;
3669 buckets[3 * k + c] -= 1;
3670 sa[buckets[3 * k + c] as usize] = (p - 1)
3671 | ((usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]) as SaSint)
3672 << (SAINT_BIT - 1))
3673 | (((buckets[v] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3674 buckets[v] = d;
3675 }
3676 i -= 1;
3677 }
3678
3679 d
3680}
3681
3682#[allow(dead_code)]
3683fn partial_sorting_scan_right_to_left_32s_1k(
3684 t: &[SaSint],
3685 sa: &mut [SaSint],
3686 induction_bucket: &mut [SaSint],
3687 omp_block_start: SaSint,
3688 omp_block_size: SaSint,
3689) {
3690 if omp_block_size <= 0 {
3691 return;
3692 }
3693
3694 let mut i = omp_block_start + omp_block_size - 1;
3695 let mut j = omp_block_start + 2 * 64 + 1;
3696
3697 while i >= j {
3698 let p0 = sa[i as usize];
3699 if p0 > 0 {
3700 sa[i as usize] = 0;
3701 let c0 = t[(p0 - 1) as usize] as usize;
3702 induction_bucket[c0] -= 1;
3703 sa[induction_bucket[c0] as usize] = (p0 - 1)
3704 | ((usize::from(t[(p0 - 2) as usize] > t[(p0 - 1) as usize]) as SaSint)
3705 << (SAINT_BIT - 1));
3706 }
3707
3708 let p1 = sa[(i - 1) as usize];
3709 if p1 > 0 {
3710 sa[(i - 1) as usize] = 0;
3711 let c1 = t[(p1 - 1) as usize] as usize;
3712 induction_bucket[c1] -= 1;
3713 sa[induction_bucket[c1] as usize] = (p1 - 1)
3714 | ((usize::from(t[(p1 - 2) as usize] > t[(p1 - 1) as usize]) as SaSint)
3715 << (SAINT_BIT - 1));
3716 }
3717
3718 i -= 2;
3719 }
3720
3721 j -= 2 * 64 + 1;
3722 while i >= j {
3723 let p = sa[i as usize];
3724 if p > 0 {
3725 sa[i as usize] = 0;
3726 let c = t[(p - 1) as usize] as usize;
3727 induction_bucket[c] -= 1;
3728 sa[induction_bucket[c] as usize] = (p - 1)
3729 | ((usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]) as SaSint)
3730 << (SAINT_BIT - 1));
3731 }
3732 i -= 1;
3733 }
3734}
3735
3736#[allow(dead_code)]
3737fn partial_sorting_scan_right_to_left_32s_6k_omp(
3738 t: &[SaSint],
3739 sa: &mut [SaSint],
3740 n: SaSint,
3741 buckets: &mut [SaSint],
3742 first_lms_suffix: SaSint,
3743 left_suffixes_count: SaSint,
3744 mut d: SaSint,
3745 threads: SaSint,
3746 _thread_state: &mut [ThreadState],
3747) -> SaSint {
3748 let scan_start = left_suffixes_count + 1;
3749 let scan_end = n - first_lms_suffix;
3750
3751 if threads == 1 || scan_end - scan_start < 65536 {
3752 d = partial_sorting_scan_right_to_left_32s_6k(
3753 t,
3754 sa,
3755 buckets,
3756 d,
3757 scan_start,
3758 scan_end - scan_start,
3759 );
3760 } else {
3761 let mut cache = vec![ThreadCache::default(); (scan_end - scan_start) as usize];
3762 let mut block_start = scan_end;
3763 while block_start > scan_start {
3764 let block_size =
3765 (block_start - scan_start).min(threads * PER_THREAD_CACHE_SIZE as SaSint);
3766 block_start -= block_size;
3767 d = partial_sorting_scan_right_to_left_32s_6k_block_omp(
3768 t,
3769 sa,
3770 buckets,
3771 d,
3772 &mut cache,
3773 block_start,
3774 block_size,
3775 threads,
3776 );
3777 }
3778 }
3779
3780 d
3781}
3782
3783#[allow(dead_code)]
3784fn partial_sorting_scan_right_to_left_32s_4k_omp(
3785 t: &[SaSint],
3786 sa: &mut [SaSint],
3787 n: SaSint,
3788 k: SaSint,
3789 buckets: &mut [SaSint],
3790 mut d: SaSint,
3791 threads: SaSint,
3792 _thread_state: &mut [ThreadState],
3793) -> SaSint {
3794 if threads == 1 || n < 65536 {
3795 d = partial_sorting_scan_right_to_left_32s_4k(t, sa, k, buckets, d, 0, n);
3796 } else {
3797 let mut cache = vec![ThreadCache::default(); n as usize];
3798 let mut block_start = n;
3799 while block_start > 0 {
3800 let block_size = block_start.min(threads * PER_THREAD_CACHE_SIZE as SaSint);
3801 block_start -= block_size;
3802 d = partial_sorting_scan_right_to_left_32s_4k_block_omp(
3803 t,
3804 sa,
3805 k,
3806 buckets,
3807 d,
3808 &mut cache,
3809 block_start,
3810 block_size,
3811 threads,
3812 );
3813 }
3814 }
3815
3816 d
3817}
3818
3819#[allow(dead_code)]
3820fn partial_sorting_scan_right_to_left_32s_1k_omp(
3821 t: &[SaSint],
3822 sa: &mut [SaSint],
3823 n: SaSint,
3824 buckets: &mut [SaSint],
3825 threads: SaSint,
3826 _thread_state: &mut [ThreadState],
3827) {
3828 if threads == 1 || n < 65536 {
3829 partial_sorting_scan_right_to_left_32s_1k(t, sa, buckets, 0, n);
3830 } else {
3831 let mut cache = vec![ThreadCache::default(); n as usize];
3832 let mut block_start = n;
3833 while block_start > 0 {
3834 let block_size = block_start.min(threads * PER_THREAD_CACHE_SIZE as SaSint);
3835 block_start -= block_size;
3836 partial_sorting_scan_right_to_left_32s_1k_block_omp(
3837 t,
3838 sa,
3839 buckets,
3840 &mut cache,
3841 block_start,
3842 block_size,
3843 threads,
3844 );
3845 }
3846 }
3847}
3848
3849#[allow(dead_code)]
3850fn partial_sorting_scan_left_to_right_32s_6k_block_gather(
3851 t: &[SaSint],
3852 sa: &mut [SaSint],
3853 cache: &mut [ThreadCache],
3854 omp_block_start: SaSint,
3855 omp_block_size: SaSint,
3856) {
3857 let mut i = omp_block_start;
3858 let mut j = omp_block_start + omp_block_size - 64 - 1;
3859
3860 while i < j {
3861 let p0 = sa[i as usize];
3862 cache[i as usize].index = p0;
3863 let p0 = p0 & SAINT_MAX;
3864 cache[i as usize].symbol = if p0 != 0 {
3865 buckets_index4(
3866 t[(p0 - 1) as usize] as usize,
3867 usize::from(t[(p0 - 2) as usize] >= t[(p0 - 1) as usize]),
3868 ) as SaSint
3869 } else {
3870 0
3871 };
3872
3873 let p1 = sa[(i + 1) as usize];
3874 cache[(i + 1) as usize].index = p1;
3875 let p1 = p1 & SAINT_MAX;
3876 cache[(i + 1) as usize].symbol = if p1 != 0 {
3877 buckets_index4(
3878 t[(p1 - 1) as usize] as usize,
3879 usize::from(t[(p1 - 2) as usize] >= t[(p1 - 1) as usize]),
3880 ) as SaSint
3881 } else {
3882 0
3883 };
3884
3885 i += 2;
3886 }
3887
3888 j += 64 + 1;
3889 while i < j {
3890 let p = sa[i as usize];
3891 cache[i as usize].index = p;
3892 let p = p & SAINT_MAX;
3893 cache[i as usize].symbol = if p != 0 {
3894 buckets_index4(
3895 t[(p - 1) as usize] as usize,
3896 usize::from(t[(p - 2) as usize] >= t[(p - 1) as usize]),
3897 ) as SaSint
3898 } else {
3899 0
3900 };
3901 i += 1;
3902 }
3903}
3904
3905#[allow(dead_code)]
3906fn partial_sorting_scan_left_to_right_32s_4k_block_gather(
3907 t: &[SaSint],
3908 sa: &mut [SaSint],
3909 cache: &mut [ThreadCache],
3910 omp_block_start: SaSint,
3911 omp_block_size: SaSint,
3912) {
3913 let mut i = omp_block_start;
3914 let mut j = omp_block_start + omp_block_size - 64 - 1;
3915
3916 while i < j {
3917 let mut symbol0 = SAINT_MIN;
3918 let mut p0 = sa[i as usize];
3919 if p0 > 0 {
3920 cache[i as usize].index = p0;
3921 p0 &= !SUFFIX_GROUP_MARKER;
3922 symbol0 = buckets_index2(
3923 t[(p0 - 1) as usize] as usize,
3924 usize::from(t[(p0 - 2) as usize] < t[(p0 - 1) as usize]),
3925 ) as SaSint;
3926 p0 = 0;
3927 }
3928 cache[i as usize].symbol = symbol0;
3929 sa[i as usize] = p0 & SAINT_MAX;
3930
3931 let mut symbol1 = SAINT_MIN;
3932 let mut p1 = sa[(i + 1) as usize];
3933 if p1 > 0 {
3934 cache[(i + 1) as usize].index = p1;
3935 p1 &= !SUFFIX_GROUP_MARKER;
3936 symbol1 = buckets_index2(
3937 t[(p1 - 1) as usize] as usize,
3938 usize::from(t[(p1 - 2) as usize] < t[(p1 - 1) as usize]),
3939 ) as SaSint;
3940 p1 = 0;
3941 }
3942 cache[(i + 1) as usize].symbol = symbol1;
3943 sa[(i + 1) as usize] = p1 & SAINT_MAX;
3944
3945 i += 2;
3946 }
3947
3948 j += 64 + 1;
3949 while i < j {
3950 let mut symbol = SAINT_MIN;
3951 let mut p = sa[i as usize];
3952 if p > 0 {
3953 cache[i as usize].index = p;
3954 p &= !SUFFIX_GROUP_MARKER;
3955 symbol = buckets_index2(
3956 t[(p - 1) as usize] as usize,
3957 usize::from(t[(p - 2) as usize] < t[(p - 1) as usize]),
3958 ) as SaSint;
3959 p = 0;
3960 }
3961 cache[i as usize].symbol = symbol;
3962 sa[i as usize] = p & SAINT_MAX;
3963 i += 1;
3964 }
3965}
3966
3967#[allow(dead_code)]
3968fn partial_sorting_scan_left_to_right_32s_1k_block_gather(
3969 t: &[SaSint],
3970 sa: &mut [SaSint],
3971 cache: &mut [ThreadCache],
3972 omp_block_start: SaSint,
3973 omp_block_size: SaSint,
3974) {
3975 let mut i = omp_block_start;
3976 let mut j = omp_block_start + omp_block_size - 64 - 1;
3977
3978 while i < j {
3979 let mut symbol0 = SAINT_MIN;
3980 let mut p0 = sa[i as usize];
3981 if p0 > 0 {
3982 cache[i as usize].index = (p0 - 1)
3983 | ((usize::from(t[(p0 - 2) as usize] < t[(p0 - 1) as usize]) as SaSint)
3984 << (SAINT_BIT - 1));
3985 symbol0 = t[(p0 - 1) as usize];
3986 p0 = 0;
3987 }
3988 cache[i as usize].symbol = symbol0;
3989 sa[i as usize] = p0 & SAINT_MAX;
3990
3991 let mut symbol1 = SAINT_MIN;
3992 let mut p1 = sa[(i + 1) as usize];
3993 if p1 > 0 {
3994 cache[(i + 1) as usize].index = (p1 - 1)
3995 | ((usize::from(t[(p1 - 2) as usize] < t[(p1 - 1) as usize]) as SaSint)
3996 << (SAINT_BIT - 1));
3997 symbol1 = t[(p1 - 1) as usize];
3998 p1 = 0;
3999 }
4000 cache[(i + 1) as usize].symbol = symbol1;
4001 sa[(i + 1) as usize] = p1 & SAINT_MAX;
4002
4003 i += 2;
4004 }
4005
4006 j += 64 + 1;
4007 while i < j {
4008 let mut symbol = SAINT_MIN;
4009 let mut p = sa[i as usize];
4010 if p > 0 {
4011 cache[i as usize].index = (p - 1)
4012 | ((usize::from(t[(p - 2) as usize] < t[(p - 1) as usize]) as SaSint)
4013 << (SAINT_BIT - 1));
4014 symbol = t[(p - 1) as usize];
4015 p = 0;
4016 }
4017 cache[i as usize].symbol = symbol;
4018 sa[i as usize] = p & SAINT_MAX;
4019 i += 1;
4020 }
4021}
4022
4023#[allow(dead_code)]
4024fn partial_sorting_scan_right_to_left_32s_6k_block_gather(
4025 t: &[SaSint],
4026 sa: &mut [SaSint],
4027 cache: &mut [ThreadCache],
4028 omp_block_start: SaSint,
4029 omp_block_size: SaSint,
4030) {
4031 let mut i = omp_block_start;
4032 let mut j = omp_block_start + omp_block_size - 64 - 1;
4033
4034 while i < j {
4035 let p0 = sa[i as usize];
4036 cache[i as usize].index = p0;
4037 let p0 = p0 & SAINT_MAX;
4038 cache[i as usize].symbol = if p0 != 0 {
4039 buckets_index4(
4040 t[(p0 - 1) as usize] as usize,
4041 usize::from(t[(p0 - 2) as usize] > t[(p0 - 1) as usize]),
4042 ) as SaSint
4043 } else {
4044 0
4045 };
4046
4047 let p1 = sa[(i + 1) as usize];
4048 cache[(i + 1) as usize].index = p1;
4049 let p1 = p1 & SAINT_MAX;
4050 cache[(i + 1) as usize].symbol = if p1 != 0 {
4051 buckets_index4(
4052 t[(p1 - 1) as usize] as usize,
4053 usize::from(t[(p1 - 2) as usize] > t[(p1 - 1) as usize]),
4054 ) as SaSint
4055 } else {
4056 0
4057 };
4058
4059 i += 2;
4060 }
4061
4062 j += 64 + 1;
4063 while i < j {
4064 let p = sa[i as usize];
4065 cache[i as usize].index = p;
4066 let p = p & SAINT_MAX;
4067 cache[i as usize].symbol = if p != 0 {
4068 buckets_index4(
4069 t[(p - 1) as usize] as usize,
4070 usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
4071 ) as SaSint
4072 } else {
4073 0
4074 };
4075 i += 1;
4076 }
4077}
4078
4079#[allow(dead_code)]
4080fn partial_sorting_scan_right_to_left_32s_4k_block_gather(
4081 t: &[SaSint],
4082 sa: &mut [SaSint],
4083 cache: &mut [ThreadCache],
4084 omp_block_start: SaSint,
4085 omp_block_size: SaSint,
4086) {
4087 let mut i = omp_block_start;
4088 let mut j = omp_block_start + omp_block_size - 64 - 1;
4089
4090 while i < j {
4091 let mut symbol0 = SAINT_MIN;
4092 let mut p0 = sa[i as usize];
4093 if p0 > 0 {
4094 sa[i as usize] = 0;
4095 cache[i as usize].index = p0;
4096 p0 &= !SUFFIX_GROUP_MARKER;
4097 symbol0 = buckets_index2(
4098 t[(p0 - 1) as usize] as usize,
4099 usize::from(t[(p0 - 2) as usize] > t[(p0 - 1) as usize]),
4100 ) as SaSint;
4101 }
4102 cache[i as usize].symbol = symbol0;
4103
4104 let mut symbol1 = SAINT_MIN;
4105 let mut p1 = sa[(i + 1) as usize];
4106 if p1 > 0 {
4107 sa[(i + 1) as usize] = 0;
4108 cache[(i + 1) as usize].index = p1;
4109 p1 &= !SUFFIX_GROUP_MARKER;
4110 symbol1 = buckets_index2(
4111 t[(p1 - 1) as usize] as usize,
4112 usize::from(t[(p1 - 2) as usize] > t[(p1 - 1) as usize]),
4113 ) as SaSint;
4114 }
4115 cache[(i + 1) as usize].symbol = symbol1;
4116
4117 i += 2;
4118 }
4119
4120 j += 64 + 1;
4121 while i < j {
4122 let mut symbol = SAINT_MIN;
4123 let mut p = sa[i as usize];
4124 if p > 0 {
4125 sa[i as usize] = 0;
4126 cache[i as usize].index = p;
4127 p &= !SUFFIX_GROUP_MARKER;
4128 symbol = buckets_index2(
4129 t[(p - 1) as usize] as usize,
4130 usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
4131 ) as SaSint;
4132 }
4133 cache[i as usize].symbol = symbol;
4134 i += 1;
4135 }
4136}
4137
4138#[allow(dead_code)]
4139fn partial_sorting_scan_right_to_left_32s_1k_block_gather(
4140 t: &[SaSint],
4141 sa: &mut [SaSint],
4142 cache: &mut [ThreadCache],
4143 omp_block_start: SaSint,
4144 omp_block_size: SaSint,
4145) {
4146 let mut i = omp_block_start;
4147 let mut j = omp_block_start + omp_block_size - 64 - 1;
4148
4149 while i < j {
4150 let mut symbol0 = SAINT_MIN;
4151 let p0 = sa[i as usize];
4152 if p0 > 0 {
4153 sa[i as usize] = 0;
4154 cache[i as usize].index = (p0 - 1)
4155 | ((usize::from(t[(p0 - 2) as usize] > t[(p0 - 1) as usize]) as SaSint)
4156 << (SAINT_BIT - 1));
4157 symbol0 = t[(p0 - 1) as usize];
4158 }
4159 cache[i as usize].symbol = symbol0;
4160
4161 let mut symbol1 = SAINT_MIN;
4162 let p1 = sa[(i + 1) as usize];
4163 if p1 > 0 {
4164 sa[(i + 1) as usize] = 0;
4165 cache[(i + 1) as usize].index = (p1 - 1)
4166 | ((usize::from(t[(p1 - 2) as usize] > t[(p1 - 1) as usize]) as SaSint)
4167 << (SAINT_BIT - 1));
4168 symbol1 = t[(p1 - 1) as usize];
4169 }
4170 cache[(i + 1) as usize].symbol = symbol1;
4171
4172 i += 2;
4173 }
4174
4175 j += 64 + 1;
4176 while i < j {
4177 let mut symbol = SAINT_MIN;
4178 let p = sa[i as usize];
4179 if p > 0 {
4180 sa[i as usize] = 0;
4181 cache[i as usize].index = (p - 1)
4182 | ((usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]) as SaSint)
4183 << (SAINT_BIT - 1));
4184 symbol = t[(p - 1) as usize];
4185 }
4186 cache[i as usize].symbol = symbol;
4187 i += 1;
4188 }
4189}
4190
4191#[allow(dead_code)]
4192fn partial_sorting_scan_left_to_right_32s_6k_block_sort(
4193 t: &[SaSint],
4194 buckets: &mut [SaSint],
4195 mut d: SaSint,
4196 cache: &mut [ThreadCache],
4197 omp_block_start: SaSint,
4198 omp_block_size: SaSint,
4199) -> SaSint {
4200 let mut i = omp_block_start;
4201 let omp_block_end = omp_block_start + omp_block_size;
4202 let mut j = omp_block_end - 64 - 1;
4203
4204 while i < j {
4205 let v0 = cache[i as usize].symbol as usize;
4206 let p0 = cache[i as usize].index;
4207 d += SaSint::from(p0 < 0);
4208 cache[i as usize].symbol = buckets[v0];
4209 buckets[v0] += 1;
4210 cache[i as usize].index =
4211 (p0 - 1) | (((buckets[2 + v0] != d) as SaSint) << (SAINT_BIT - 1));
4212 buckets[2 + v0] = d;
4213 if cache[i as usize].symbol < omp_block_end {
4214 let s = cache[i as usize].symbol as usize;
4215 let q = cache[i as usize].index & SAINT_MAX;
4216 cache[s].index = cache[i as usize].index;
4217 cache[s].symbol = buckets_index4(
4218 t[(q - 1) as usize] as usize,
4219 usize::from(t[(q - 2) as usize] >= t[(q - 1) as usize]),
4220 ) as SaSint;
4221 }
4222
4223 let v1 = cache[(i + 1) as usize].symbol as usize;
4224 let p1 = cache[(i + 1) as usize].index;
4225 d += SaSint::from(p1 < 0);
4226 cache[(i + 1) as usize].symbol = buckets[v1];
4227 buckets[v1] += 1;
4228 cache[(i + 1) as usize].index =
4229 (p1 - 1) | (((buckets[2 + v1] != d) as SaSint) << (SAINT_BIT - 1));
4230 buckets[2 + v1] = d;
4231 if cache[(i + 1) as usize].symbol < omp_block_end {
4232 let s = cache[(i + 1) as usize].symbol as usize;
4233 let q = cache[(i + 1) as usize].index & SAINT_MAX;
4234 cache[s].index = cache[(i + 1) as usize].index;
4235 cache[s].symbol = buckets_index4(
4236 t[(q - 1) as usize] as usize,
4237 usize::from(t[(q - 2) as usize] >= t[(q - 1) as usize]),
4238 ) as SaSint;
4239 }
4240
4241 i += 2;
4242 }
4243
4244 j += 64 + 1;
4245 while i < j {
4246 let v = cache[i as usize].symbol as usize;
4247 let p = cache[i as usize].index;
4248 d += SaSint::from(p < 0);
4249 cache[i as usize].symbol = buckets[v];
4250 buckets[v] += 1;
4251 cache[i as usize].index = (p - 1) | (((buckets[2 + v] != d) as SaSint) << (SAINT_BIT - 1));
4252 buckets[2 + v] = d;
4253 if cache[i as usize].symbol < omp_block_end {
4254 let s = cache[i as usize].symbol as usize;
4255 let q = cache[i as usize].index & SAINT_MAX;
4256 cache[s].index = cache[i as usize].index;
4257 cache[s].symbol = buckets_index4(
4258 t[(q - 1) as usize] as usize,
4259 usize::from(t[(q - 2) as usize] >= t[(q - 1) as usize]),
4260 ) as SaSint;
4261 }
4262 i += 1;
4263 }
4264
4265 d
4266}
4267
4268#[allow(dead_code)]
4269fn partial_sorting_scan_left_to_right_32s_4k_block_sort(
4270 t: &[SaSint],
4271 k: SaSint,
4272 buckets: &mut [SaSint],
4273 mut d: SaSint,
4274 cache: &mut [ThreadCache],
4275 omp_block_start: SaSint,
4276 omp_block_size: SaSint,
4277) -> SaSint {
4278 let k = k as usize;
4279 let mut i = omp_block_start;
4280 let omp_block_end = omp_block_start + omp_block_size;
4281 let mut j = omp_block_end - 64 - 1;
4282
4283 while i < j {
4284 for current in [i, i + 1] {
4285 let v = cache[current as usize].symbol;
4286 if v >= 0 {
4287 let p = cache[current as usize].index;
4288 d += p >> (SUFFIX_GROUP_BIT - 1);
4289 let bucket_index = (v >> 1) as usize;
4290 let v_usize = v as usize;
4291 cache[current as usize].symbol = buckets[2 * k + bucket_index];
4292 buckets[2 * k + bucket_index] += 1;
4293 cache[current as usize].index = (p - 1)
4294 | ((v & 1) << (SAINT_BIT - 1))
4295 | (((buckets[v_usize] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
4296 buckets[v_usize] = d;
4297 if cache[current as usize].symbol < omp_block_end {
4298 let ni = cache[current as usize].symbol as usize;
4299 let mut np = cache[current as usize].index;
4300 if np > 0 {
4301 cache[ni].index = np;
4302 np &= !SUFFIX_GROUP_MARKER;
4303 cache[ni].symbol = buckets_index2(
4304 t[(np - 1) as usize] as usize,
4305 usize::from(t[(np - 2) as usize] < t[(np - 1) as usize]),
4306 ) as SaSint;
4307 np = 0;
4308 }
4309 cache[current as usize].index = np & SAINT_MAX;
4310 }
4311 }
4312 }
4313 i += 2;
4314 }
4315
4316 j += 64 + 1;
4317 while i < j {
4318 let v = cache[i as usize].symbol;
4319 if v >= 0 {
4320 let p = cache[i as usize].index;
4321 d += p >> (SUFFIX_GROUP_BIT - 1);
4322 let bucket_index = (v >> 1) as usize;
4323 let v_usize = v as usize;
4324 cache[i as usize].symbol = buckets[2 * k + bucket_index];
4325 buckets[2 * k + bucket_index] += 1;
4326 cache[i as usize].index = (p - 1)
4327 | ((v & 1) << (SAINT_BIT - 1))
4328 | (((buckets[v_usize] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
4329 buckets[v_usize] = d;
4330 if cache[i as usize].symbol < omp_block_end {
4331 let ni = cache[i as usize].symbol as usize;
4332 let mut np = cache[i as usize].index;
4333 if np > 0 {
4334 cache[ni].index = np;
4335 np &= !SUFFIX_GROUP_MARKER;
4336 cache[ni].symbol = buckets_index2(
4337 t[(np - 1) as usize] as usize,
4338 usize::from(t[(np - 2) as usize] < t[(np - 1) as usize]),
4339 ) as SaSint;
4340 np = 0;
4341 }
4342 cache[i as usize].index = np & SAINT_MAX;
4343 }
4344 }
4345 i += 1;
4346 }
4347
4348 d
4349}
4350
4351#[allow(dead_code)]
4352fn partial_sorting_scan_left_to_right_32s_1k_block_sort(
4353 t: &[SaSint],
4354 induction_bucket: &mut [SaSint],
4355 cache: &mut [ThreadCache],
4356 omp_block_start: SaSint,
4357 omp_block_size: SaSint,
4358) {
4359 let mut i = omp_block_start;
4360 let omp_block_end = omp_block_start + omp_block_size;
4361 let mut j = omp_block_end - 64 - 1;
4362
4363 while i < j {
4364 for current in [i, i + 1] {
4365 let v = cache[current as usize].symbol;
4366 if v >= 0 {
4367 cache[current as usize].symbol = induction_bucket[v as usize];
4368 induction_bucket[v as usize] += 1;
4369 if cache[current as usize].symbol < omp_block_end {
4370 let ni = cache[current as usize].symbol as usize;
4371 let mut np = cache[current as usize].index;
4372 if np > 0 {
4373 cache[ni].index = (np - 1)
4374 | ((usize::from(t[(np - 2) as usize] < t[(np - 1) as usize])
4375 as SaSint)
4376 << (SAINT_BIT - 1));
4377 cache[ni].symbol = t[(np - 1) as usize];
4378 np = 0;
4379 }
4380 cache[current as usize].index = np & SAINT_MAX;
4381 }
4382 }
4383 }
4384 i += 2;
4385 }
4386
4387 j = omp_block_end;
4388 while i < j {
4389 let v = cache[i as usize].symbol;
4390 if v >= 0 {
4391 cache[i as usize].symbol = induction_bucket[v as usize];
4392 induction_bucket[v as usize] += 1;
4393 if cache[i as usize].symbol < omp_block_end {
4394 let ni = cache[i as usize].symbol as usize;
4395 let mut np = cache[i as usize].index;
4396 if np > 0 {
4397 cache[ni].index = (np - 1)
4398 | ((usize::from(t[(np - 2) as usize] < t[(np - 1) as usize]) as SaSint)
4399 << (SAINT_BIT - 1));
4400 cache[ni].symbol = t[(np - 1) as usize];
4401 np = 0;
4402 }
4403 cache[i as usize].index = np & SAINT_MAX;
4404 }
4405 }
4406 i += 1;
4407 }
4408}
4409
4410#[allow(dead_code)]
4411fn partial_sorting_scan_right_to_left_32s_6k_block_sort(
4412 t: &[SaSint],
4413 buckets: &mut [SaSint],
4414 mut d: SaSint,
4415 cache: &mut [ThreadCache],
4416 omp_block_start: SaSint,
4417 omp_block_size: SaSint,
4418) -> SaSint {
4419 let mut i = omp_block_start + omp_block_size - 1;
4420 let mut j = omp_block_start + 64 + 1;
4421
4422 while i >= j {
4423 let v0 = cache[i as usize].symbol as usize;
4424 let p0 = cache[i as usize].index;
4425 d += SaSint::from(p0 < 0);
4426 buckets[v0] -= 1;
4427 cache[i as usize].symbol = buckets[v0];
4428 cache[i as usize].index =
4429 (p0 - 1) | (((buckets[2 + v0] != d) as SaSint) << (SAINT_BIT - 1));
4430 buckets[2 + v0] = d;
4431 if cache[i as usize].symbol >= omp_block_start {
4432 let s = cache[i as usize].symbol as usize;
4433 let q = cache[i as usize].index & SAINT_MAX;
4434 cache[s].index = cache[i as usize].index;
4435 cache[s].symbol = buckets_index4(
4436 t[(q - 1) as usize] as usize,
4437 usize::from(t[(q - 2) as usize] > t[(q - 1) as usize]),
4438 ) as SaSint;
4439 }
4440
4441 let v1 = cache[(i - 1) as usize].symbol as usize;
4442 let p1 = cache[(i - 1) as usize].index;
4443 d += SaSint::from(p1 < 0);
4444 buckets[v1] -= 1;
4445 cache[(i - 1) as usize].symbol = buckets[v1];
4446 cache[(i - 1) as usize].index =
4447 (p1 - 1) | (((buckets[2 + v1] != d) as SaSint) << (SAINT_BIT - 1));
4448 buckets[2 + v1] = d;
4449 if cache[(i - 1) as usize].symbol >= omp_block_start {
4450 let s = cache[(i - 1) as usize].symbol as usize;
4451 let q = cache[(i - 1) as usize].index & SAINT_MAX;
4452 cache[s].index = cache[(i - 1) as usize].index;
4453 cache[s].symbol = buckets_index4(
4454 t[(q - 1) as usize] as usize,
4455 usize::from(t[(q - 2) as usize] > t[(q - 1) as usize]),
4456 ) as SaSint;
4457 }
4458
4459 i -= 2;
4460 }
4461
4462 j -= 64 + 1;
4463 while i >= j {
4464 let v = cache[i as usize].symbol as usize;
4465 let p = cache[i as usize].index;
4466 d += SaSint::from(p < 0);
4467 buckets[v] -= 1;
4468 cache[i as usize].symbol = buckets[v];
4469 cache[i as usize].index = (p - 1) | (((buckets[2 + v] != d) as SaSint) << (SAINT_BIT - 1));
4470 buckets[2 + v] = d;
4471 if cache[i as usize].symbol >= omp_block_start {
4472 let s = cache[i as usize].symbol as usize;
4473 let q = cache[i as usize].index & SAINT_MAX;
4474 cache[s].index = cache[i as usize].index;
4475 cache[s].symbol = buckets_index4(
4476 t[(q - 1) as usize] as usize,
4477 usize::from(t[(q - 2) as usize] > t[(q - 1) as usize]),
4478 ) as SaSint;
4479 }
4480 i -= 1;
4481 }
4482
4483 d
4484}
4485
4486#[allow(dead_code)]
4487fn partial_sorting_scan_right_to_left_32s_4k_block_sort(
4488 t: &[SaSint],
4489 k: SaSint,
4490 buckets: &mut [SaSint],
4491 mut d: SaSint,
4492 cache: &mut [ThreadCache],
4493 omp_block_start: SaSint,
4494 omp_block_size: SaSint,
4495) -> SaSint {
4496 let k = k as usize;
4497 let mut i = omp_block_start + omp_block_size - 1;
4498 let mut j = omp_block_start + 64 + 1;
4499
4500 while i >= j {
4501 for current in [i, i - 1] {
4502 let v = cache[current as usize].symbol;
4503 if v >= 0 {
4504 let p = cache[current as usize].index;
4505 d += p >> (SUFFIX_GROUP_BIT - 1);
4506 let bucket_index = (v >> 1) as usize;
4507 let v_usize = v as usize;
4508 buckets[3 * k + bucket_index] -= 1;
4509 cache[current as usize].symbol = buckets[3 * k + bucket_index];
4510 cache[current as usize].index = (p - 1)
4511 | ((v & 1) << (SAINT_BIT - 1))
4512 | (((buckets[v_usize] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
4513 buckets[v_usize] = d;
4514 if cache[current as usize].symbol >= omp_block_start {
4515 let ni = cache[current as usize].symbol as usize;
4516 let mut np = cache[current as usize].index;
4517 if np > 0 {
4518 cache[current as usize].index = 0;
4519 cache[ni].index = np;
4520 np &= !SUFFIX_GROUP_MARKER;
4521 cache[ni].symbol = buckets_index2(
4522 t[(np - 1) as usize] as usize,
4523 usize::from(t[(np - 2) as usize] > t[(np - 1) as usize]),
4524 ) as SaSint;
4525 }
4526 }
4527 }
4528 }
4529 i -= 2;
4530 }
4531
4532 j -= 64 + 1;
4533 while i >= j {
4534 let v = cache[i as usize].symbol;
4535 if v >= 0 {
4536 let p = cache[i as usize].index;
4537 d += p >> (SUFFIX_GROUP_BIT - 1);
4538 let bucket_index = (v >> 1) as usize;
4539 let v_usize = v as usize;
4540 buckets[3 * k + bucket_index] -= 1;
4541 cache[i as usize].symbol = buckets[3 * k + bucket_index];
4542 cache[i as usize].index = (p - 1)
4543 | ((v & 1) << (SAINT_BIT - 1))
4544 | (((buckets[v_usize] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
4545 buckets[v_usize] = d;
4546 if cache[i as usize].symbol >= omp_block_start {
4547 let ni = cache[i as usize].symbol as usize;
4548 let mut np = cache[i as usize].index;
4549 if np > 0 {
4550 cache[i as usize].index = 0;
4551 cache[ni].index = np;
4552 np &= !SUFFIX_GROUP_MARKER;
4553 cache[ni].symbol = buckets_index2(
4554 t[(np - 1) as usize] as usize,
4555 usize::from(t[(np - 2) as usize] > t[(np - 1) as usize]),
4556 ) as SaSint;
4557 }
4558 }
4559 }
4560 i -= 1;
4561 }
4562
4563 d
4564}
4565
4566#[allow(dead_code)]
4567fn partial_sorting_scan_right_to_left_32s_1k_block_sort(
4568 t: &[SaSint],
4569 induction_bucket: &mut [SaSint],
4570 cache: &mut [ThreadCache],
4571 omp_block_start: SaSint,
4572 omp_block_size: SaSint,
4573) {
4574 let mut i = omp_block_start + omp_block_size - 1;
4575 let mut j = omp_block_start + 64 + 1;
4576
4577 while i >= j {
4578 for current in [i, i - 1] {
4579 let v = cache[current as usize].symbol;
4580 if v >= 0 {
4581 induction_bucket[v as usize] -= 1;
4582 cache[current as usize].symbol = induction_bucket[v as usize];
4583 if cache[current as usize].symbol >= omp_block_start {
4584 let ni = cache[current as usize].symbol as usize;
4585 let np = cache[current as usize].index;
4586 if np > 0 {
4587 cache[current as usize].index = 0;
4588 cache[ni].index = (np - 1)
4589 | ((usize::from(t[(np - 2) as usize] > t[(np - 1) as usize])
4590 as SaSint)
4591 << (SAINT_BIT - 1));
4592 cache[ni].symbol = t[(np - 1) as usize];
4593 }
4594 }
4595 }
4596 }
4597 i -= 2;
4598 }
4599
4600 j -= 64 + 1;
4601 while i >= j {
4602 let v = cache[i as usize].symbol;
4603 if v >= 0 {
4604 induction_bucket[v as usize] -= 1;
4605 cache[i as usize].symbol = induction_bucket[v as usize];
4606 if cache[i as usize].symbol >= omp_block_start {
4607 let ni = cache[i as usize].symbol as usize;
4608 let np = cache[i as usize].index;
4609 if np > 0 {
4610 cache[i as usize].index = 0;
4611 cache[ni].index = (np - 1)
4612 | ((usize::from(t[(np - 2) as usize] > t[(np - 1) as usize]) as SaSint)
4613 << (SAINT_BIT - 1));
4614 cache[ni].symbol = t[(np - 1) as usize];
4615 }
4616 }
4617 }
4618 i -= 1;
4619 }
4620}
4621
4622#[allow(dead_code)]
4623fn partial_sorting_scan_left_to_right_32s_6k_block_omp(
4624 t: &[SaSint],
4625 sa: &mut [SaSint],
4626 buckets: &mut [SaSint],
4627 d: SaSint,
4628 cache: &mut [ThreadCache],
4629 block_start: SaSint,
4630 block_size: SaSint,
4631 threads: SaSint,
4632) -> SaSint {
4633 if block_size <= 0 {
4634 return d;
4635 }
4636 if threads == 1 || block_size < 16_384 {
4637 return partial_sorting_scan_left_to_right_32s_6k(
4638 t,
4639 sa,
4640 buckets,
4641 d,
4642 block_start,
4643 block_size,
4644 );
4645 }
4646
4647 let threads_usize = usize::try_from(threads)
4648 .expect("threads must be non-negative")
4649 .max(1);
4650 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
4651 let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
4652 let omp_num_threads = threads_usize.min(block_size_usize.max(1));
4653 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
4654
4655 for omp_thread_num in 0..omp_num_threads {
4656 let mut omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4657 omp_block_stride
4658 } else {
4659 block_size_usize - omp_thread_num * omp_block_stride
4660 };
4661 let omp_block_start = block_start_usize + omp_thread_num * omp_block_stride;
4662 if omp_block_size == 0 {
4663 omp_block_size = block_size_usize - (omp_block_start - block_start_usize);
4664 }
4665 partial_sorting_scan_left_to_right_32s_6k_block_gather(
4666 t,
4667 sa,
4668 &mut cache[omp_thread_num * omp_block_stride
4669 ..omp_thread_num * omp_block_stride + omp_block_size],
4670 omp_block_start as SaSint,
4671 omp_block_size as SaSint,
4672 );
4673 }
4674
4675 let d = partial_sorting_scan_left_to_right_32s_6k_block_sort(
4676 t,
4677 buckets,
4678 d,
4679 &mut cache[..block_size_usize],
4680 block_start,
4681 block_size,
4682 );
4683 place_cached_suffixes(sa, &cache[..block_size_usize], 0, block_size);
4684 d
4685}
4686
4687#[allow(dead_code)]
4688fn partial_sorting_scan_left_to_right_32s_4k_block_omp(
4689 t: &[SaSint],
4690 sa: &mut [SaSint],
4691 k: SaSint,
4692 buckets: &mut [SaSint],
4693 d: SaSint,
4694 cache: &mut [ThreadCache],
4695 block_start: SaSint,
4696 block_size: SaSint,
4697 threads: SaSint,
4698) -> SaSint {
4699 if block_size <= 0 {
4700 return d;
4701 }
4702 if threads == 1 || block_size < 16_384 {
4703 return partial_sorting_scan_left_to_right_32s_4k(
4704 t,
4705 sa,
4706 k,
4707 buckets,
4708 d,
4709 block_start,
4710 block_size,
4711 );
4712 }
4713
4714 let threads_usize = usize::try_from(threads)
4715 .expect("threads must be non-negative")
4716 .max(1);
4717 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
4718 let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
4719 let omp_num_threads = threads_usize.min(block_size_usize.max(1));
4720 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
4721
4722 for omp_thread_num in 0..omp_num_threads {
4723 let mut omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4724 omp_block_stride
4725 } else {
4726 block_size_usize - omp_thread_num * omp_block_stride
4727 };
4728 let omp_block_start = block_start_usize + omp_thread_num * omp_block_stride;
4729 if omp_block_size == 0 {
4730 omp_block_size = block_size_usize - (omp_block_start - block_start_usize);
4731 }
4732 partial_sorting_scan_left_to_right_32s_4k_block_gather(
4733 t,
4734 sa,
4735 &mut cache[omp_thread_num * omp_block_stride
4736 ..omp_thread_num * omp_block_stride + omp_block_size],
4737 omp_block_start as SaSint,
4738 omp_block_size as SaSint,
4739 );
4740 }
4741
4742 let cache = &mut cache[..block_size_usize];
4743 let d = partial_sorting_scan_left_to_right_32s_4k_block_sort(
4744 t,
4745 k,
4746 buckets,
4747 d,
4748 cache,
4749 block_start,
4750 block_size,
4751 );
4752 for entry in cache.iter() {
4753 if entry.symbol >= 0 {
4754 sa[entry.symbol as usize] = entry.index;
4755 }
4756 }
4757 d
4758}
4759
4760#[allow(dead_code)]
4761fn partial_sorting_scan_left_to_right_32s_1k_block_omp(
4762 t: &[SaSint],
4763 sa: &mut [SaSint],
4764 buckets: &mut [SaSint],
4765 cache: &mut [ThreadCache],
4766 block_start: SaSint,
4767 block_size: SaSint,
4768 threads: SaSint,
4769) {
4770 if block_size <= 0 {
4771 return;
4772 }
4773 if threads == 1 || block_size < 16_384 {
4774 partial_sorting_scan_left_to_right_32s_1k(t, sa, buckets, block_start, block_size);
4775 return;
4776 }
4777
4778 let threads_usize = usize::try_from(threads)
4779 .expect("threads must be non-negative")
4780 .max(1);
4781 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
4782 let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
4783 let omp_num_threads = threads_usize.min(block_size_usize.max(1));
4784 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
4785
4786 for omp_thread_num in 0..omp_num_threads {
4787 let mut omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4788 omp_block_stride
4789 } else {
4790 block_size_usize - omp_thread_num * omp_block_stride
4791 };
4792 let omp_block_start = block_start_usize + omp_thread_num * omp_block_stride;
4793 if omp_block_size == 0 {
4794 omp_block_size = block_size_usize - (omp_block_start - block_start_usize);
4795 }
4796 partial_sorting_scan_left_to_right_32s_1k_block_gather(
4797 t,
4798 sa,
4799 &mut cache[omp_thread_num * omp_block_stride
4800 ..omp_thread_num * omp_block_stride + omp_block_size],
4801 omp_block_start as SaSint,
4802 omp_block_size as SaSint,
4803 );
4804 }
4805
4806 let cache = &mut cache[..block_size_usize];
4807 partial_sorting_scan_left_to_right_32s_1k_block_sort(
4808 t,
4809 buckets,
4810 cache,
4811 block_start,
4812 block_size,
4813 );
4814 compact_and_place_cached_suffixes(sa, cache, block_start, block_size);
4815}
4816
4817#[allow(dead_code)]
4818fn partial_sorting_scan_right_to_left_32s_6k_block_omp(
4819 t: &[SaSint],
4820 sa: &mut [SaSint],
4821 buckets: &mut [SaSint],
4822 mut d: SaSint,
4823 cache: &mut [ThreadCache],
4824 block_start: SaSint,
4825 block_size: SaSint,
4826 threads: SaSint,
4827) -> SaSint {
4828 if block_size <= 0 {
4829 return d;
4830 }
4831 if threads == 1 || block_size < 16_384 {
4832 return partial_sorting_scan_right_to_left_32s_6k(
4833 t,
4834 sa,
4835 buckets,
4836 d,
4837 block_start,
4838 block_size,
4839 );
4840 }
4841
4842 let threads_usize = usize::try_from(threads)
4843 .expect("threads must be non-negative")
4844 .max(1);
4845 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
4846 let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
4847 let omp_num_threads = threads_usize.min(block_size_usize.max(1));
4848 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
4849
4850 for omp_thread_num in 0..omp_num_threads {
4851 let mut omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4852 omp_block_stride
4853 } else {
4854 block_size_usize - omp_thread_num * omp_block_stride
4855 };
4856 let omp_block_start = block_start_usize + omp_thread_num * omp_block_stride;
4857 if omp_block_size == 0 {
4858 omp_block_size = block_size_usize - (omp_block_start - block_start_usize);
4859 }
4860 partial_sorting_scan_right_to_left_32s_6k_block_gather(
4861 t,
4862 sa,
4863 &mut cache[omp_thread_num * omp_block_stride
4864 ..omp_thread_num * omp_block_stride + omp_block_size],
4865 omp_block_start as SaSint,
4866 omp_block_size as SaSint,
4867 );
4868 }
4869
4870 d = partial_sorting_scan_right_to_left_32s_6k_block_sort(
4871 t,
4872 buckets,
4873 d,
4874 &mut cache[..block_size_usize],
4875 block_start,
4876 block_size,
4877 );
4878 for omp_thread_num in 0..omp_num_threads {
4879 let mut omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4880 omp_block_stride
4881 } else {
4882 block_size_usize - omp_thread_num * omp_block_stride
4883 };
4884 let cache_start = omp_thread_num * omp_block_stride;
4885 if omp_block_size == 0 {
4886 omp_block_size = block_size_usize - cache_start;
4887 }
4888 for entry in &cache[cache_start..cache_start + omp_block_size] {
4889 sa[entry.symbol as usize] = entry.index;
4890 }
4891 }
4892 d
4893}
4894
4895#[allow(dead_code)]
4896fn partial_sorting_scan_right_to_left_32s_4k_block_omp(
4897 t: &[SaSint],
4898 sa: &mut [SaSint],
4899 k: SaSint,
4900 buckets: &mut [SaSint],
4901 mut d: SaSint,
4902 cache: &mut [ThreadCache],
4903 block_start: SaSint,
4904 block_size: SaSint,
4905 threads: SaSint,
4906) -> SaSint {
4907 if block_size <= 0 {
4908 return d;
4909 }
4910 if threads == 1 || block_size < 16_384 {
4911 return partial_sorting_scan_right_to_left_32s_4k(
4912 t,
4913 sa,
4914 k,
4915 buckets,
4916 d,
4917 block_start,
4918 block_size,
4919 );
4920 }
4921
4922 let threads_usize = usize::try_from(threads)
4923 .expect("threads must be non-negative")
4924 .max(1);
4925 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
4926 let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
4927 let omp_num_threads = threads_usize.min(block_size_usize.max(1));
4928 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
4929
4930 for omp_thread_num in 0..omp_num_threads {
4931 let mut omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4932 omp_block_stride
4933 } else {
4934 block_size_usize - omp_thread_num * omp_block_stride
4935 };
4936 let omp_block_start = block_start_usize + omp_thread_num * omp_block_stride;
4937 if omp_block_size == 0 {
4938 omp_block_size = block_size_usize - (omp_block_start - block_start_usize);
4939 }
4940 partial_sorting_scan_right_to_left_32s_4k_block_gather(
4941 t,
4942 sa,
4943 &mut cache[omp_thread_num * omp_block_stride
4944 ..omp_thread_num * omp_block_stride + omp_block_size],
4945 omp_block_start as SaSint,
4946 omp_block_size as SaSint,
4947 );
4948 }
4949
4950 d = partial_sorting_scan_right_to_left_32s_4k_block_sort(
4951 t,
4952 k,
4953 buckets,
4954 d,
4955 &mut cache[..block_size_usize],
4956 block_start,
4957 block_size,
4958 );
4959 let mut write = 0usize;
4960 for read in 0..block_size_usize {
4961 let entry = cache[read];
4962 if entry.symbol >= 0 {
4963 cache[write] = entry;
4964 write += 1;
4965 }
4966 }
4967 for entry in &cache[..write] {
4968 sa[entry.symbol as usize] = entry.index;
4969 }
4970 d
4971}
4972
4973#[allow(dead_code)]
4974fn partial_sorting_scan_right_to_left_32s_1k_block_omp(
4975 t: &[SaSint],
4976 sa: &mut [SaSint],
4977 buckets: &mut [SaSint],
4978 cache: &mut [ThreadCache],
4979 block_start: SaSint,
4980 block_size: SaSint,
4981 threads: SaSint,
4982) {
4983 if block_size <= 0 {
4984 return;
4985 }
4986 if threads == 1 || block_size < 16_384 {
4987 partial_sorting_scan_right_to_left_32s_1k(t, sa, buckets, block_start, block_size);
4988 return;
4989 }
4990
4991 let threads_usize = usize::try_from(threads)
4992 .expect("threads must be non-negative")
4993 .max(1);
4994 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
4995 let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
4996 let omp_num_threads = threads_usize.min(block_size_usize.max(1));
4997 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
4998
4999 for omp_thread_num in 0..omp_num_threads {
5000 let mut omp_block_size = if omp_thread_num + 1 < omp_num_threads {
5001 omp_block_stride
5002 } else {
5003 block_size_usize - omp_thread_num * omp_block_stride
5004 };
5005 let omp_block_start = block_start_usize + omp_thread_num * omp_block_stride;
5006 if omp_block_size == 0 {
5007 omp_block_size = block_size_usize - (omp_block_start - block_start_usize);
5008 }
5009 partial_sorting_scan_right_to_left_32s_1k_block_gather(
5010 t,
5011 sa,
5012 &mut cache[omp_thread_num * omp_block_stride
5013 ..omp_thread_num * omp_block_stride + omp_block_size],
5014 omp_block_start as SaSint,
5015 omp_block_size as SaSint,
5016 );
5017 }
5018
5019 let cache = &mut cache[..block_size_usize];
5020 partial_sorting_scan_right_to_left_32s_1k_block_sort(
5021 t,
5022 buckets,
5023 cache,
5024 block_start,
5025 block_size,
5026 );
5027 compact_and_place_cached_suffixes(sa, cache, block_start, block_size);
5028}
5029
5030#[allow(dead_code)]
5031fn partial_sorting_gather_lms_suffixes_32s_4k(
5032 sa: &mut [SaSint],
5033 omp_block_start: SaSint,
5034 omp_block_size: SaSint,
5035) -> SaSint {
5036 let mut i = omp_block_start;
5037 let mut j = omp_block_start + omp_block_size - 3;
5038 let mut l = omp_block_start;
5039
5040 while i < j {
5041 let s0 = sa[i as usize] as SaUint;
5042 sa[l as usize] = (s0.wrapping_sub(SUFFIX_GROUP_MARKER as SaUint)
5043 & !(SUFFIX_GROUP_MARKER as SaUint)) as SaSint;
5044 l += SaSint::from((s0 as SaSint) < 0);
5045
5046 let s1 = sa[(i + 1) as usize] as SaUint;
5047 sa[l as usize] = (s1.wrapping_sub(SUFFIX_GROUP_MARKER as SaUint)
5048 & !(SUFFIX_GROUP_MARKER as SaUint)) as SaSint;
5049 l += SaSint::from((s1 as SaSint) < 0);
5050
5051 let s2 = sa[(i + 2) as usize] as SaUint;
5052 sa[l as usize] = (s2.wrapping_sub(SUFFIX_GROUP_MARKER as SaUint)
5053 & !(SUFFIX_GROUP_MARKER as SaUint)) as SaSint;
5054 l += SaSint::from((s2 as SaSint) < 0);
5055
5056 let s3 = sa[(i + 3) as usize] as SaUint;
5057 sa[l as usize] = (s3.wrapping_sub(SUFFIX_GROUP_MARKER as SaUint)
5058 & !(SUFFIX_GROUP_MARKER as SaUint)) as SaSint;
5059 l += SaSint::from((s3 as SaSint) < 0);
5060
5061 i += 4;
5062 }
5063
5064 j += 3;
5065 while i < j {
5066 let s = sa[i as usize] as SaUint;
5067 sa[l as usize] = (s.wrapping_sub(SUFFIX_GROUP_MARKER as SaUint)
5068 & !(SUFFIX_GROUP_MARKER as SaUint)) as SaSint;
5069 l += SaSint::from((s as SaSint) < 0);
5070 i += 1;
5071 }
5072
5073 l
5074}
5075
5076#[allow(dead_code)]
5077fn partial_sorting_gather_lms_suffixes_32s_1k(
5078 sa: &mut [SaSint],
5079 omp_block_start: SaSint,
5080 omp_block_size: SaSint,
5081) -> SaSint {
5082 let mut i = omp_block_start;
5083 let mut j = omp_block_start + omp_block_size - 3;
5084 let mut l = omp_block_start;
5085
5086 while i < j {
5087 let s0 = sa[i as usize];
5088 sa[l as usize] = s0 & SAINT_MAX;
5089 l += SaSint::from(s0 < 0);
5090
5091 let s1 = sa[(i + 1) as usize];
5092 sa[l as usize] = s1 & SAINT_MAX;
5093 l += SaSint::from(s1 < 0);
5094
5095 let s2 = sa[(i + 2) as usize];
5096 sa[l as usize] = s2 & SAINT_MAX;
5097 l += SaSint::from(s2 < 0);
5098
5099 let s3 = sa[(i + 3) as usize];
5100 sa[l as usize] = s3 & SAINT_MAX;
5101 l += SaSint::from(s3 < 0);
5102
5103 i += 4;
5104 }
5105
5106 j += 3;
5107 while i < j {
5108 let s = sa[i as usize];
5109 sa[l as usize] = s & SAINT_MAX;
5110 l += SaSint::from(s < 0);
5111 i += 1;
5112 }
5113
5114 l
5115}
5116
5117#[allow(dead_code)]
5118fn partial_sorting_gather_lms_suffixes_32s_4k_omp(
5119 sa: &mut [SaSint],
5120 n: SaSint,
5121 threads: SaSint,
5122 thread_state: &mut [ThreadState],
5123) {
5124 let n_usize = usize::try_from(n).expect("n must be non-negative");
5125 let thread_count = if threads > 1 && n >= 65_536 {
5126 usize::try_from(threads)
5127 .expect("threads must be non-negative")
5128 .min(thread_state.len())
5129 .max(1)
5130 } else {
5131 1
5132 };
5133
5134 if thread_count == 1 {
5135 let _ = partial_sorting_gather_lms_suffixes_32s_4k(sa, 0, n);
5136 return;
5137 }
5138
5139 let block_stride = (n_usize / thread_count) & !15usize;
5140 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
5141 let block_start = thread * block_stride;
5142 let block_size = if thread + 1 < thread_count {
5143 block_stride
5144 } else {
5145 n_usize - block_start
5146 };
5147 state.position = block_start as SaSint;
5148 state.count = partial_sorting_gather_lms_suffixes_32s_4k(
5149 sa,
5150 block_start as SaSint,
5151 block_size as SaSint,
5152 ) - block_start as SaSint;
5153 }
5154
5155 let mut position = 0usize;
5156 for (thread, state) in thread_state.iter().take(thread_count).enumerate() {
5157 let count = usize::try_from(state.count).expect("count must be non-negative");
5158 let src = usize::try_from(state.position).expect("position must be non-negative");
5159 if thread > 0 && count > 0 {
5160 sa.copy_within(src..src + count, position);
5161 }
5162 position += count;
5163 }
5164}
5165
5166#[allow(dead_code)]
5167fn partial_sorting_gather_lms_suffixes_32s_1k_omp(
5168 sa: &mut [SaSint],
5169 n: SaSint,
5170 threads: SaSint,
5171 thread_state: &mut [ThreadState],
5172) {
5173 let n_usize = usize::try_from(n).expect("n must be non-negative");
5174 let thread_count = if threads > 1 && n >= 65_536 {
5175 usize::try_from(threads)
5176 .expect("threads must be non-negative")
5177 .min(thread_state.len())
5178 .max(1)
5179 } else {
5180 1
5181 };
5182
5183 if thread_count == 1 {
5184 let _ = partial_sorting_gather_lms_suffixes_32s_1k(sa, 0, n);
5185 return;
5186 }
5187
5188 let block_stride = (n_usize / thread_count) & !15usize;
5189 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
5190 let block_start = thread * block_stride;
5191 let block_size = if thread + 1 < thread_count {
5192 block_stride
5193 } else {
5194 n_usize - block_start
5195 };
5196 state.position = block_start as SaSint;
5197 state.count = partial_sorting_gather_lms_suffixes_32s_1k(
5198 sa,
5199 block_start as SaSint,
5200 block_size as SaSint,
5201 ) - block_start as SaSint;
5202 }
5203
5204 let mut position = 0usize;
5205 for (thread, state) in thread_state.iter().take(thread_count).enumerate() {
5206 let count = usize::try_from(state.count).expect("count must be non-negative");
5207 let src = usize::try_from(state.position).expect("position must be non-negative");
5208 if thread > 0 && count > 0 {
5209 sa.copy_within(src..src + count, position);
5210 }
5211 position += count;
5212 }
5213}
5214
5215#[allow(dead_code)]
5216fn partial_gsa_scan_right_to_left_16u(
5217 t: &[u16],
5218 sa: &mut [SaSint],
5219 buckets: &mut [SaSint],
5220 mut d: SaSint,
5221 omp_block_start: SaSint,
5222 omp_block_size: SaSint,
5223) -> SaSint {
5224 let mut i = (omp_block_start + omp_block_size - 1) as isize;
5225 let mut j = (omp_block_start + 64 + 1) as isize;
5226 while i >= j {
5227 let mut p0 = sa[i as usize];
5228 d += SaSint::from(p0 < 0);
5229 p0 &= SAINT_MAX;
5230 let v0 = buckets_index2(
5231 t[(p0 - 1) as usize] as usize,
5232 usize::from(t[(p0 - 2) as usize] > t[(p0 - 1) as usize]),
5233 );
5234 if v0 != 1 {
5235 let mark0 = if buckets[2 * ALPHABET_SIZE + v0] != d {
5236 SAINT_MIN
5237 } else {
5238 0
5239 };
5240 buckets[v0] -= 1;
5241 sa[buckets[v0] as usize] = (p0 - 1) | mark0;
5242 buckets[2 * ALPHABET_SIZE + v0] = d;
5243 }
5244
5245 let mut p1 = sa[(i - 1) as usize];
5246 d += SaSint::from(p1 < 0);
5247 p1 &= SAINT_MAX;
5248 let v1 = buckets_index2(
5249 t[(p1 - 1) as usize] as usize,
5250 usize::from(t[(p1 - 2) as usize] > t[(p1 - 1) as usize]),
5251 );
5252 if v1 != 1 {
5253 let mark1 = if buckets[2 * ALPHABET_SIZE + v1] != d {
5254 SAINT_MIN
5255 } else {
5256 0
5257 };
5258 buckets[v1] -= 1;
5259 sa[buckets[v1] as usize] = (p1 - 1) | mark1;
5260 buckets[2 * ALPHABET_SIZE + v1] = d;
5261 }
5262
5263 i -= 2;
5264 }
5265
5266 j -= 64 + 1;
5267 while i >= j {
5268 let mut p = sa[i as usize];
5269 d += SaSint::from(p < 0);
5270 p &= SAINT_MAX;
5271 let v = buckets_index2(
5272 t[(p - 1) as usize] as usize,
5273 usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
5274 );
5275 if v != 1 {
5276 let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
5277 SAINT_MIN
5278 } else {
5279 0
5280 };
5281 buckets[v] -= 1;
5282 sa[buckets[v] as usize] = (p - 1) | mark;
5283 buckets[2 * ALPHABET_SIZE + v] = d;
5284 }
5285 i -= 1;
5286 }
5287
5288 d
5289}
5290
5291#[allow(dead_code)]
5292fn partial_gsa_scan_right_to_left_16u_block_omp(
5293 t: &[u16],
5294 sa: &mut [SaSint],
5295 k: SaSint,
5296 buckets: &mut [SaSint],
5297 d: SaSint,
5298 block_start: SaSint,
5299 block_size: SaSint,
5300 threads: SaSint,
5301 thread_state: &mut [ThreadState],
5302) -> SaSint {
5303 let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
5304 usize::try_from(threads)
5305 .expect("threads must be non-negative")
5306 .min(thread_state.len())
5307 } else {
5308 1
5309 };
5310 if thread_count <= 1 {
5311 return partial_gsa_scan_right_to_left_16u(t, sa, buckets, d, block_start, block_size);
5312 }
5313
5314 let width = 2 * k as usize;
5315 let distinct_offset = 2 * ALPHABET_SIZE;
5316 let block_stride = (block_size / thread_count as SaSint) & !15;
5317
5318 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
5319 let local_start = thread as SaSint * block_stride;
5320 let local_size = if thread + 1 < thread_count {
5321 block_stride
5322 } else {
5323 block_size - local_start
5324 };
5325 let mut local_state = ThreadState::default();
5326 state.position = partial_sorting_scan_right_to_left_16u_block_prepare(
5327 t,
5328 sa,
5329 k,
5330 &mut state.buckets,
5331 &mut state.cache,
5332 block_start + local_start,
5333 local_size,
5334 &mut local_state,
5335 );
5336 state.count = local_state.cache_entries as SaSint;
5337 }
5338
5339 let mut next_d = d;
5340 for state in thread_state.iter_mut().take(thread_count).rev() {
5341 for c in 0..width {
5342 let a = buckets[c];
5343 let b = state.buckets[c];
5344 buckets[c] = a - b;
5345 state.buckets[c] = a;
5346 }
5347
5348 next_d -= 1;
5349 for c in 0..width {
5350 let offset = distinct_offset + c;
5351 let a = buckets[offset];
5352 let b = state.buckets[offset];
5353 let shifted = b + next_d;
5354 buckets[offset] = if b > 0 { shifted } else { a };
5355 state.buckets[offset] = a;
5356 }
5357 next_d += 1 + state.position;
5358 state.position = next_d - state.position;
5359 }
5360
5361 for state in thread_state.iter_mut().take(thread_count) {
5362 partial_gsa_scan_right_to_left_16u_block_place(
5363 sa,
5364 &mut state.buckets,
5365 &state.cache,
5366 state.count,
5367 state.position,
5368 );
5369 }
5370
5371 next_d
5372}
5373
5374#[allow(dead_code)]
5375fn partial_gsa_scan_right_to_left_16u_omp(
5376 t: &[u16],
5377 sa: &mut [SaSint],
5378 n: SaSint,
5379 k: SaSint,
5380 buckets: &mut [SaSint],
5381 first_lms_suffix: SaSint,
5382 left_suffixes_count: SaSint,
5383 d: SaSint,
5384 threads: SaSint,
5385) {
5386 let scan_start = left_suffixes_count + 1;
5387 let scan_end = n - first_lms_suffix;
5388
5389 if threads == 1 || scan_end - scan_start < 65536 {
5390 partial_gsa_scan_right_to_left_16u(t, sa, buckets, d, scan_start, scan_end - scan_start);
5391 } else {
5392 let mut d = d;
5393 let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
5394 let mut block_start = scan_end - 1;
5395 while block_start >= scan_start {
5396 if sa[block_start as usize] == 0 {
5397 block_start -= 1;
5398 } else {
5399 let block_limit = threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
5400 let mut block_max_end = block_start - block_limit;
5401 if block_max_end < scan_start {
5402 block_max_end = scan_start - 1;
5403 }
5404 let mut block_end = block_start - 1;
5405 while block_end > block_max_end && sa[block_end as usize] != 0 {
5406 block_end -= 1;
5407 }
5408 let block_size = block_start - block_end;
5409
5410 if block_size < 32 {
5411 while block_start > block_end {
5412 let mut p = sa[block_start as usize];
5413 d += SaSint::from(p < 0);
5414 p &= SAINT_MAX;
5415 let v = buckets_index2(
5416 t[(p - 1) as usize] as usize,
5417 usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
5418 );
5419 if v != 1 {
5420 let mark = if buckets[2 * ALPHABET_SIZE + v] != d {
5421 SAINT_MIN
5422 } else {
5423 0
5424 };
5425 buckets[v] -= 1;
5426 sa[buckets[v] as usize] = (p - 1) | mark;
5427 buckets[2 * ALPHABET_SIZE + v] = d;
5428 }
5429 block_start -= 1;
5430 }
5431 } else {
5432 d = partial_gsa_scan_right_to_left_16u_block_omp(
5433 t,
5434 sa,
5435 k,
5436 buckets,
5437 d,
5438 block_end + 1,
5439 block_size,
5440 threads,
5441 &mut thread_state,
5442 );
5443 block_start = block_end;
5444 }
5445 }
5446 }
5447 }
5448}
5449
5450#[allow(dead_code)]
5451fn partial_sorting_shift_markers_16u_omp(
5452 sa: &mut [SaSint],
5453 n: SaSint,
5454 buckets: &[SaSint],
5455 threads: SaSint,
5456) {
5457 let thread_count = if threads > 1 && n >= 65536 {
5458 usize::try_from(threads).expect("threads must be positive")
5459 } else {
5460 1
5461 };
5462 let c_step = buckets_index2(1, 0) as isize;
5463 let c_min = buckets_index2(1, 0) as isize;
5464 let c_max = buckets_index2(ALPHABET_SIZE - 1, 0) as isize;
5465 for t in 0..thread_count {
5466 let mut c = c_max - (t as isize * c_step);
5467 while c >= c_min {
5468 let c_usize = c as usize;
5469 let mut s = SAINT_MIN;
5470 let mut i = buckets[4 * ALPHABET_SIZE + c_usize] as isize - 1;
5471 let mut j = buckets[c_usize - buckets_index2(1, 0)] as isize + 3;
5472 while i >= j {
5473 let p0 = sa[i as usize];
5474 let q0 = (p0 & SAINT_MIN) ^ s;
5475 s ^= q0;
5476 sa[i as usize] = p0 ^ q0;
5477
5478 let p1 = sa[(i - 1) as usize];
5479 let q1 = (p1 & SAINT_MIN) ^ s;
5480 s ^= q1;
5481 sa[(i - 1) as usize] = p1 ^ q1;
5482
5483 let p2 = sa[(i - 2) as usize];
5484 let q2 = (p2 & SAINT_MIN) ^ s;
5485 s ^= q2;
5486 sa[(i - 2) as usize] = p2 ^ q2;
5487
5488 let p3 = sa[(i - 3) as usize];
5489 let q3 = (p3 & SAINT_MIN) ^ s;
5490 s ^= q3;
5491 sa[(i - 3) as usize] = p3 ^ q3;
5492
5493 i -= 4;
5494 }
5495
5496 j -= 3;
5497 while i >= j {
5498 let p = sa[i as usize];
5499 let q = (p & SAINT_MIN) ^ s;
5500 s ^= q;
5501 sa[i as usize] = p ^ q;
5502 i -= 1;
5503 }
5504
5505 c -= c_step * thread_count as isize;
5506 }
5507 }
5508}
5509
5510#[allow(dead_code)]
5511fn induce_partial_order_16u_omp(
5512 t: &[u16],
5513 sa: &mut [SaSint],
5514 n: SaSint,
5515 k: SaSint,
5516 flags: SaSint,
5517 buckets: &mut [SaSint],
5518 first_lms_suffix: SaSint,
5519 left_suffixes_count: SaSint,
5520 threads: SaSint,
5521) {
5522 buckets[2 * ALPHABET_SIZE..4 * ALPHABET_SIZE].fill(0);
5523
5524 if (flags & LIBSAIS_FLAGS_GSA) != 0 {
5525 let marker = 4 * ALPHABET_SIZE + buckets_index2(0, 1);
5526 buckets[marker] = buckets[4 * ALPHABET_SIZE + buckets_index2(1, 1)] - 1;
5527 flip_suffix_markers_omp(sa, buckets[marker], threads);
5528 }
5529
5530 let d = partial_sorting_scan_left_to_right_16u_omp(
5531 t,
5532 sa,
5533 n,
5534 k,
5535 buckets,
5536 left_suffixes_count,
5537 0,
5538 threads,
5539 );
5540 partial_sorting_shift_markers_16u_omp(sa, n, buckets, threads);
5541
5542 if (flags & LIBSAIS_FLAGS_GSA) != 0 {
5543 partial_gsa_scan_right_to_left_16u_omp(
5544 t,
5545 sa,
5546 n,
5547 k,
5548 buckets,
5549 first_lms_suffix,
5550 left_suffixes_count,
5551 d,
5552 threads,
5553 );
5554
5555 if t[first_lms_suffix as usize] == 0 {
5556 let count = (buckets[buckets_index2(1, 1)] - 1) as usize;
5557 sa.copy_within(0..count, 1);
5558 sa[0] = first_lms_suffix | SAINT_MIN;
5559 }
5560
5561 buckets[buckets_index2(0, 1)] = 0;
5562 } else {
5563 partial_sorting_scan_right_to_left_16u_omp(
5564 t,
5565 sa,
5566 n,
5567 k,
5568 buckets,
5569 first_lms_suffix,
5570 left_suffixes_count,
5571 d,
5572 threads,
5573 );
5574 }
5575}
5576
5577#[allow(dead_code)]
5578fn induce_partial_order_32s_6k_omp(
5579 t: &[SaSint],
5580 sa: &mut [SaSint],
5581 n: SaSint,
5582 k: SaSint,
5583 buckets: &mut [SaSint],
5584 first_lms_suffix: SaSint,
5585 left_suffixes_count: SaSint,
5586 threads: SaSint,
5587 thread_state: &mut [ThreadState],
5588) {
5589 let d = partial_sorting_scan_left_to_right_32s_6k_omp(
5590 t,
5591 sa,
5592 n,
5593 buckets,
5594 left_suffixes_count,
5595 0,
5596 threads,
5597 thread_state,
5598 );
5599 partial_sorting_shift_markers_32s_6k_omp(sa, k, buckets, threads);
5600 partial_sorting_shift_buckets_32s_6k(k, buckets);
5601 partial_sorting_scan_right_to_left_32s_6k_omp(
5602 t,
5603 sa,
5604 n,
5605 buckets,
5606 first_lms_suffix,
5607 left_suffixes_count,
5608 d,
5609 threads,
5610 thread_state,
5611 );
5612}
5613
5614#[allow(dead_code)]
5615fn induce_partial_order_32s_4k_omp(
5616 t: &[SaSint],
5617 sa: &mut [SaSint],
5618 n: SaSint,
5619 k: SaSint,
5620 buckets: &mut [SaSint],
5621 threads: SaSint,
5622 thread_state: &mut [ThreadState],
5623) {
5624 buckets[..2 * k as usize].fill(0);
5625 let d = partial_sorting_scan_left_to_right_32s_4k_omp(
5626 t,
5627 sa,
5628 n,
5629 k,
5630 buckets,
5631 0,
5632 threads,
5633 thread_state,
5634 );
5635 partial_sorting_shift_markers_32s_4k(sa, n);
5636 partial_sorting_scan_right_to_left_32s_4k_omp(t, sa, n, k, buckets, d, threads, thread_state);
5637 partial_sorting_gather_lms_suffixes_32s_4k_omp(sa, n, threads, thread_state);
5638}
5639
5640#[allow(dead_code)]
5641fn induce_partial_order_32s_2k_omp(
5642 t: &[SaSint],
5643 sa: &mut [SaSint],
5644 n: SaSint,
5645 k: SaSint,
5646 buckets: &mut [SaSint],
5647 threads: SaSint,
5648 thread_state: &mut [ThreadState],
5649) {
5650 let k = k as usize;
5651 let (left, right) = buckets.split_at_mut(k);
5652 partial_sorting_scan_left_to_right_32s_1k_omp(t, sa, n, right, threads, thread_state);
5653 partial_sorting_scan_right_to_left_32s_1k_omp(t, sa, n, left, threads, thread_state);
5654 partial_sorting_gather_lms_suffixes_32s_1k_omp(sa, n, threads, thread_state);
5655}
5656
5657#[allow(dead_code)]
5658fn induce_partial_order_32s_1k_omp(
5659 t: &[SaSint],
5660 sa: &mut [SaSint],
5661 n: SaSint,
5662 k: SaSint,
5663 buckets: &mut [SaSint],
5664 threads: SaSint,
5665 thread_state: &mut [ThreadState],
5666) {
5667 count_suffixes_32s(t, n, k, buckets);
5668 initialize_buckets_start_32s_1k(k, buckets);
5669 partial_sorting_scan_left_to_right_32s_1k_omp(t, sa, n, buckets, threads, thread_state);
5670
5671 count_suffixes_32s(t, n, k, buckets);
5672 initialize_buckets_end_32s_1k(k, buckets);
5673 partial_sorting_scan_right_to_left_32s_1k_omp(t, sa, n, buckets, threads, thread_state);
5674
5675 partial_sorting_gather_lms_suffixes_32s_1k_omp(sa, n, threads, thread_state);
5676}
5677
5678#[allow(dead_code)]
5679fn final_sorting_scan_left_to_right_16u(
5680 t: &[u16],
5681 sa: &mut [SaSint],
5682 induction_bucket: &mut [SaSint],
5683 omp_block_start: SaSint,
5684 omp_block_size: SaSint,
5685) {
5686 let mut i = omp_block_start as isize;
5687 let mut j = (omp_block_start + omp_block_size - 64 - 1) as isize;
5688 while i < j {
5689 final_sorting_ltr_step(t, sa, induction_bucket, i as usize);
5690 final_sorting_ltr_step(t, sa, induction_bucket, (i + 1) as usize);
5691 i += 2;
5692 }
5693 j += 64 + 1;
5694 while i < j {
5695 final_sorting_ltr_step(t, sa, induction_bucket, i as usize);
5696 i += 1;
5697 }
5698}
5699
5700#[allow(dead_code)]
5701fn final_sorting_scan_right_to_left_16u(
5702 t: &[u16],
5703 sa: &mut [SaSint],
5704 induction_bucket: &mut [SaSint],
5705 omp_block_start: SaSint,
5706 omp_block_size: SaSint,
5707) {
5708 let mut i = (omp_block_start + omp_block_size - 1) as isize;
5709 let mut j = (omp_block_start + 64 + 1) as isize;
5710 while i >= j {
5711 final_sorting_rtl_step(t, sa, induction_bucket, i as usize, false);
5712 final_sorting_rtl_step(t, sa, induction_bucket, (i - 1) as usize, false);
5713 i -= 2;
5714 }
5715 j -= 64 + 1;
5716 while i >= j {
5717 final_sorting_rtl_step(t, sa, induction_bucket, i as usize, false);
5718 i -= 1;
5719 }
5720}
5721
5722#[allow(dead_code)]
5723fn final_sorting_scan_left_to_right_32s(
5724 t: &[SaSint],
5725 sa: &mut [SaSint],
5726 induction_bucket: &mut [SaSint],
5727 omp_block_start: SaSint,
5728 omp_block_size: SaSint,
5729) {
5730 let mut i = omp_block_start as isize;
5731 let mut j = (omp_block_start + omp_block_size - 2 * 64 - 1) as isize;
5732 while i < j {
5733 for current in [i, i + 1] {
5734 let current = current as usize;
5735 let mut p = sa[current];
5736 sa[current] = p ^ SAINT_MIN;
5737 if p > 0 {
5738 p -= 1;
5739 let p_usize = p as usize;
5740 let bucket = t[p_usize] as usize;
5741 let slot = induction_bucket[bucket] as usize;
5742 sa[slot] = p
5743 | ((usize::from(t[p_usize - usize::from(p > 0)] < t[p_usize]) as SaSint)
5744 << (SAINT_BIT - 1));
5745 induction_bucket[bucket] += 1;
5746 }
5747 }
5748 i += 2;
5749 }
5750
5751 j += 2 * 64 + 1;
5752 while i < j {
5753 let current = i as usize;
5754 let mut p = sa[current];
5755 sa[current] = p ^ SAINT_MIN;
5756 if p > 0 {
5757 p -= 1;
5758 let p_usize = p as usize;
5759 let bucket = t[p_usize] as usize;
5760 let slot = induction_bucket[bucket] as usize;
5761 sa[slot] = p
5762 | ((usize::from(t[p_usize - usize::from(p > 0)] < t[p_usize]) as SaSint)
5763 << (SAINT_BIT - 1));
5764 induction_bucket[bucket] += 1;
5765 }
5766 i += 1;
5767 }
5768}
5769
5770#[allow(dead_code)]
5771fn final_sorting_scan_left_to_right_32s_block_gather(
5772 t: &[SaSint],
5773 sa: &mut [SaSint],
5774 cache: &mut [ThreadCache],
5775 omp_block_start: SaSint,
5776 omp_block_size: SaSint,
5777) {
5778 if omp_block_size <= 0 {
5779 return;
5780 }
5781
5782 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
5783 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
5784 for offset in 0..size {
5785 let current = start + offset;
5786 let mut symbol = SAINT_MIN;
5787 let mut p = sa[current];
5788 sa[current] = p ^ SAINT_MIN;
5789 if p > 0 {
5790 p -= 1;
5791 let p_usize = p as usize;
5792 cache[offset].index = p
5793 | ((usize::from(t[p_usize - usize::from(p > 0)] < t[p_usize]) as SaSint)
5794 << (SAINT_BIT - 1));
5795 symbol = t[p_usize];
5796 }
5797 cache[offset].symbol = symbol;
5798 }
5799}
5800
5801#[allow(dead_code)]
5802fn final_sorting_scan_left_to_right_32s_block_sort(
5803 t: &[SaSint],
5804 induction_bucket: &mut [SaSint],
5805 cache: &mut [ThreadCache],
5806 omp_block_start: SaSint,
5807 omp_block_size: SaSint,
5808) {
5809 if omp_block_size <= 0 {
5810 return;
5811 }
5812
5813 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
5814 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
5815 let block_end = start + size;
5816
5817 for offset in 0..size {
5818 let v = cache[offset].symbol;
5819 if v >= 0 {
5820 let bucket_index = v as usize;
5821 let target = induction_bucket[bucket_index];
5822 cache[offset].symbol = target;
5823 induction_bucket[bucket_index] += 1;
5824 if target >= omp_block_start && target < block_end as SaSint {
5825 let ni = usize::try_from(target - omp_block_start)
5826 .expect("cache slot must be non-negative");
5827 let mut np = cache[offset].index;
5828 cache[offset].index = np ^ SAINT_MIN;
5829 if np > 0 {
5830 np -= 1;
5831 let np_usize = np as usize;
5832 cache[ni].index = np
5833 | ((usize::from(t[np_usize - usize::from(np > 0)] < t[np_usize])
5834 as SaSint)
5835 << (SAINT_BIT - 1));
5836 cache[ni].symbol = t[np_usize];
5837 }
5838 }
5839 }
5840 }
5841}
5842
5843#[allow(dead_code)]
5844fn final_sorting_scan_left_to_right_32s_block_omp(
5845 t: &[SaSint],
5846 sa: &mut [SaSint],
5847 buckets: &mut [SaSint],
5848 cache: &mut [ThreadCache],
5849 block_start: SaSint,
5850 block_size: SaSint,
5851 threads: SaSint,
5852) {
5853 if threads <= 1 || block_size < 16_384 {
5854 final_sorting_scan_left_to_right_32s(t, sa, buckets, block_start, block_size);
5855 return;
5856 }
5857
5858 final_sorting_scan_left_to_right_32s_block_gather(t, sa, cache, block_start, block_size);
5859 final_sorting_scan_left_to_right_32s_block_sort(t, buckets, cache, block_start, block_size);
5860
5861 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
5862 let threads_usize = usize::try_from(threads.max(1)).expect("threads must be positive");
5863 let omp_num_threads = threads_usize.min(block_size_usize.max(1));
5864 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
5865 for omp_thread_num in 0..omp_num_threads {
5866 let omp_block_start = omp_thread_num * omp_block_stride;
5867 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
5868 omp_block_stride
5869 } else {
5870 block_size_usize - omp_block_start
5871 };
5872 compact_and_place_cached_suffixes(
5873 sa,
5874 cache,
5875 omp_block_start as SaSint,
5876 omp_block_size as SaSint,
5877 );
5878 }
5879}
5880
5881#[allow(dead_code)]
5882fn final_sorting_scan_left_to_right_32s_omp(
5883 t: &[SaSint],
5884 sa: &mut [SaSint],
5885 n: SaSint,
5886 induction_bucket: &mut [SaSint],
5887 threads: SaSint,
5888 thread_state: &mut [ThreadState],
5889) {
5890 let last = (n - 1) as usize;
5891 let bucket = t[last] as usize;
5892 let slot = induction_bucket[bucket] as usize;
5893 sa[slot] = (n - 1) | ((usize::from(t[last - 1] < t[last]) as SaSint) << (SAINT_BIT - 1));
5894 induction_bucket[bucket] += 1;
5895
5896 if threads == 1 || n < 65536 || thread_state.is_empty() {
5897 final_sorting_scan_left_to_right_32s(t, sa, induction_bucket, 0, n);
5898 return;
5899 }
5900
5901 let threads_usize = usize::try_from(threads)
5902 .expect("threads must be non-negative")
5903 .max(1);
5904 let block_span = threads_usize * PER_THREAD_CACHE_SIZE;
5905 let mut cache = vec![ThreadCache::default(); block_span];
5906 let mut block_start = 0;
5907 while block_start < n {
5908 let block_end = (block_start + block_span as SaSint).min(n);
5909 final_sorting_scan_left_to_right_32s_block_omp(
5910 t,
5911 sa,
5912 induction_bucket,
5913 &mut cache,
5914 block_start,
5915 block_end - block_start,
5916 threads,
5917 );
5918 block_start = block_end;
5919 }
5920}
5921
5922#[allow(dead_code)]
5923fn final_sorting_scan_right_to_left_32s(
5924 t: &[SaSint],
5925 sa: &mut [SaSint],
5926 induction_bucket: &mut [SaSint],
5927 omp_block_start: SaSint,
5928 omp_block_size: SaSint,
5929) {
5930 let mut i = (omp_block_start + omp_block_size - 1) as isize;
5931 let mut j = (omp_block_start + 2 * 64 + 1) as isize;
5932 while i >= j {
5933 for current in [i, i - 1] {
5934 let current = current as usize;
5935 let mut p = sa[current];
5936 sa[current] = p & SAINT_MAX;
5937 if p > 0 {
5938 p -= 1;
5939 let p_usize = p as usize;
5940 let bucket = t[p_usize] as usize;
5941 induction_bucket[bucket] -= 1;
5942 let slot = induction_bucket[bucket] as usize;
5943 sa[slot] = p
5944 | ((usize::from(t[p_usize - usize::from(p > 0)] > t[p_usize]) as SaSint)
5945 << (SAINT_BIT - 1));
5946 }
5947 }
5948 i -= 2;
5949 }
5950
5951 j -= 2 * 64 + 1;
5952 while i >= j {
5953 let current = i as usize;
5954 let mut p = sa[current];
5955 sa[current] = p & SAINT_MAX;
5956 if p > 0 {
5957 p -= 1;
5958 let p_usize = p as usize;
5959 let bucket = t[p_usize] as usize;
5960 induction_bucket[bucket] -= 1;
5961 let slot = induction_bucket[bucket] as usize;
5962 sa[slot] = p
5963 | ((usize::from(t[p_usize - usize::from(p > 0)] > t[p_usize]) as SaSint)
5964 << (SAINT_BIT - 1));
5965 }
5966 i -= 1;
5967 }
5968}
5969
5970#[allow(dead_code)]
5971fn final_sorting_scan_right_to_left_32s_block_gather(
5972 t: &[SaSint],
5973 sa: &mut [SaSint],
5974 cache: &mut [ThreadCache],
5975 omp_block_start: SaSint,
5976 omp_block_size: SaSint,
5977) {
5978 if omp_block_size <= 0 {
5979 return;
5980 }
5981
5982 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
5983 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
5984 for offset in 0..size {
5985 let current = start + offset;
5986 let mut symbol = SAINT_MIN;
5987 let mut p = sa[current];
5988 sa[current] = p & SAINT_MAX;
5989 if p > 0 {
5990 p -= 1;
5991 let p_usize = p as usize;
5992 cache[offset].index = p
5993 | ((usize::from(t[p_usize - usize::from(p > 0)] > t[p_usize]) as SaSint)
5994 << (SAINT_BIT - 1));
5995 symbol = t[p_usize];
5996 }
5997 cache[offset].symbol = symbol;
5998 }
5999}
6000
6001#[allow(dead_code)]
6002fn final_sorting_scan_right_to_left_32s_block_sort(
6003 t: &[SaSint],
6004 induction_bucket: &mut [SaSint],
6005 cache: &mut [ThreadCache],
6006 omp_block_start: SaSint,
6007 omp_block_size: SaSint,
6008) {
6009 if omp_block_size <= 0 {
6010 return;
6011 }
6012
6013 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
6014 let block_end = omp_block_start + omp_block_size;
6015 let mut offset = size;
6016
6017 while offset > 0 {
6018 offset -= 1;
6019 let v = cache[offset].symbol;
6020 if v >= 0 {
6021 let bucket_index = v as usize;
6022 induction_bucket[bucket_index] -= 1;
6023 let target = induction_bucket[bucket_index];
6024 cache[offset].symbol = target;
6025 if target >= omp_block_start && target < block_end {
6026 let ni = usize::try_from(target - omp_block_start)
6027 .expect("cache slot must be non-negative");
6028 let mut np = cache[offset].index;
6029 cache[offset].index = np & SAINT_MAX;
6030 if np > 0 {
6031 np -= 1;
6032 let np_usize = np as usize;
6033 cache[ni].index = np
6034 | ((usize::from(t[np_usize - usize::from(np > 0)] > t[np_usize])
6035 as SaSint)
6036 << (SAINT_BIT - 1));
6037 cache[ni].symbol = t[np_usize];
6038 }
6039 }
6040 }
6041 }
6042}
6043
6044#[allow(dead_code)]
6045fn final_sorting_scan_right_to_left_32s_block_omp(
6046 t: &[SaSint],
6047 sa: &mut [SaSint],
6048 buckets: &mut [SaSint],
6049 cache: &mut [ThreadCache],
6050 block_start: SaSint,
6051 block_size: SaSint,
6052 threads: SaSint,
6053) {
6054 if threads <= 1 || block_size < 16_384 {
6055 final_sorting_scan_right_to_left_32s(t, sa, buckets, block_start, block_size);
6056 return;
6057 }
6058
6059 final_sorting_scan_right_to_left_32s_block_gather(t, sa, cache, block_start, block_size);
6060 final_sorting_scan_right_to_left_32s_block_sort(t, buckets, cache, block_start, block_size);
6061
6062 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
6063 let threads_usize = usize::try_from(threads.max(1)).expect("threads must be positive");
6064 let omp_num_threads = threads_usize.min(block_size_usize.max(1));
6065 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
6066 for omp_thread_num in 0..omp_num_threads {
6067 let omp_block_start = omp_thread_num * omp_block_stride;
6068 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
6069 omp_block_stride
6070 } else {
6071 block_size_usize - omp_block_start
6072 };
6073 compact_and_place_cached_suffixes(
6074 sa,
6075 cache,
6076 omp_block_start as SaSint,
6077 omp_block_size as SaSint,
6078 );
6079 }
6080}
6081
6082#[allow(dead_code)]
6083fn final_sorting_scan_right_to_left_32s_omp(
6084 t: &[SaSint],
6085 sa: &mut [SaSint],
6086 n: SaSint,
6087 induction_bucket: &mut [SaSint],
6088 threads: SaSint,
6089 thread_state: &mut [ThreadState],
6090) {
6091 if threads == 1 || n < 65536 || thread_state.is_empty() {
6092 final_sorting_scan_right_to_left_32s(t, sa, induction_bucket, 0, n);
6093 return;
6094 }
6095
6096 let threads_usize = usize::try_from(threads)
6097 .expect("threads must be non-negative")
6098 .max(1);
6099 let block_span = threads_usize * PER_THREAD_CACHE_SIZE;
6100 let mut cache = vec![ThreadCache::default(); block_span];
6101 let mut block_start = n - 1;
6102 while block_start >= 0 {
6103 let block_end = (block_start - block_span as SaSint).max(-1);
6104 final_sorting_scan_right_to_left_32s_block_omp(
6105 t,
6106 sa,
6107 induction_bucket,
6108 &mut cache,
6109 block_end + 1,
6110 block_start - block_end,
6111 threads,
6112 );
6113 block_start = block_end;
6114 }
6115}
6116
6117#[allow(dead_code)]
6118fn induce_final_order_32s_6k(
6119 t: &[SaSint],
6120 sa: &mut [SaSint],
6121 n: SaSint,
6122 k: SaSint,
6123 buckets: &mut [SaSint],
6124 threads: SaSint,
6125 thread_state: &mut [ThreadState],
6126) {
6127 let k = k as usize;
6128 final_sorting_scan_left_to_right_32s_omp(
6129 t,
6130 sa,
6131 n,
6132 &mut buckets[4 * k..5 * k],
6133 threads,
6134 thread_state,
6135 );
6136 final_sorting_scan_right_to_left_32s_omp(
6137 t,
6138 sa,
6139 n,
6140 &mut buckets[5 * k..6 * k],
6141 threads,
6142 thread_state,
6143 );
6144}
6145
6146#[allow(dead_code)]
6147fn induce_final_order_32s_4k(
6148 t: &[SaSint],
6149 sa: &mut [SaSint],
6150 n: SaSint,
6151 k: SaSint,
6152 buckets: &mut [SaSint],
6153 threads: SaSint,
6154 thread_state: &mut [ThreadState],
6155) {
6156 let k = k as usize;
6157 final_sorting_scan_left_to_right_32s_omp(
6158 t,
6159 sa,
6160 n,
6161 &mut buckets[2 * k..3 * k],
6162 threads,
6163 thread_state,
6164 );
6165 final_sorting_scan_right_to_left_32s_omp(
6166 t,
6167 sa,
6168 n,
6169 &mut buckets[3 * k..4 * k],
6170 threads,
6171 thread_state,
6172 );
6173}
6174
6175#[allow(dead_code)]
6176fn induce_final_order_32s_2k(
6177 t: &[SaSint],
6178 sa: &mut [SaSint],
6179 n: SaSint,
6180 k: SaSint,
6181 buckets: &mut [SaSint],
6182 threads: SaSint,
6183 thread_state: &mut [ThreadState],
6184) {
6185 let k = k as usize;
6186 final_sorting_scan_left_to_right_32s_omp(
6187 t,
6188 sa,
6189 n,
6190 &mut buckets[k..2 * k],
6191 threads,
6192 thread_state,
6193 );
6194 final_sorting_scan_right_to_left_32s_omp(t, sa, n, &mut buckets[..k], threads, thread_state);
6195}
6196
6197#[allow(dead_code)]
6198fn induce_final_order_32s_1k(
6199 t: &[SaSint],
6200 sa: &mut [SaSint],
6201 n: SaSint,
6202 k: SaSint,
6203 buckets: &mut [SaSint],
6204 threads: SaSint,
6205 thread_state: &mut [ThreadState],
6206) {
6207 count_suffixes_32s(t, n, k, buckets);
6208 initialize_buckets_start_32s_1k(k, buckets);
6209 final_sorting_scan_left_to_right_32s_omp(t, sa, n, buckets, threads, thread_state);
6210
6211 count_suffixes_32s(t, n, k, buckets);
6212 initialize_buckets_end_32s_1k(k, buckets);
6213 final_sorting_scan_right_to_left_32s_omp(t, sa, n, buckets, threads, thread_state);
6214}
6215
6216#[allow(dead_code)]
6217fn clear_lms_suffixes_omp(
6218 sa: &mut [SaSint],
6219 n: SaSint,
6220 k: SaSint,
6221 bucket_start: &[SaSint],
6222 bucket_end: &[SaSint],
6223 threads: SaSint,
6224) {
6225 let k_usize = usize::try_from(k).expect("k must be non-negative");
6226 let thread_count = if threads > 1 && n >= 65536 {
6227 usize::try_from(threads).expect("threads must be positive")
6228 } else {
6229 1
6230 };
6231 for t in 0..thread_count {
6232 let mut c = t;
6233 while c < k_usize {
6234 if bucket_end[c] > bucket_start[c] {
6235 let start = bucket_start[c] as usize;
6236 let end = bucket_end[c] as usize;
6237 sa[start..end].fill(0);
6238 }
6239 c += thread_count;
6240 }
6241 }
6242}
6243
6244#[allow(dead_code)]
6245fn final_gsa_scan_right_to_left_16u(
6246 t: &[u16],
6247 sa: &mut [SaSint],
6248 induction_bucket: &mut [SaSint],
6249 omp_block_start: SaSint,
6250 omp_block_size: SaSint,
6251) {
6252 let mut i = (omp_block_start + omp_block_size - 1) as isize;
6253 let mut j = (omp_block_start + 64 + 1) as isize;
6254 while i >= j {
6255 final_sorting_rtl_step(t, sa, induction_bucket, i as usize, true);
6256 final_sorting_rtl_step(t, sa, induction_bucket, (i - 1) as usize, true);
6257 i -= 2;
6258 }
6259 j -= 64 + 1;
6260 while i >= j {
6261 final_sorting_rtl_step(t, sa, induction_bucket, i as usize, true);
6262 i -= 1;
6263 }
6264}
6265
6266#[allow(dead_code)]
6267fn final_sorting_ltr_step(
6268 t: &[u16],
6269 sa: &mut [SaSint],
6270 induction_bucket: &mut [SaSint],
6271 index: usize,
6272) {
6273 let mut p = sa[index];
6274 sa[index] = p ^ SAINT_MIN;
6275 if p > 0 {
6276 p -= 1;
6277 let c = t[p as usize] as usize;
6278 let mark = if t[(p - SaSint::from(p > 0)) as usize] < t[p as usize] {
6279 SAINT_MIN
6280 } else {
6281 0
6282 };
6283 let dst = induction_bucket[c] as usize;
6284 sa[dst] = p | mark;
6285 induction_bucket[c] += 1;
6286 }
6287}
6288
6289#[allow(dead_code)]
6290fn final_sorting_rtl_step(
6291 t: &[u16],
6292 sa: &mut [SaSint],
6293 induction_bucket: &mut [SaSint],
6294 index: usize,
6295 gsa: bool,
6296) {
6297 let mut p = sa[index];
6298 sa[index] = p & SAINT_MAX;
6299 if p > 0 && (!gsa || t[(p - 1) as usize] > 0) {
6300 p -= 1;
6301 let c = t[p as usize] as usize;
6302 let mark = if t[(p - SaSint::from(p > 0)) as usize] > t[p as usize] {
6303 SAINT_MIN
6304 } else {
6305 0
6306 };
6307 induction_bucket[c] -= 1;
6308 sa[induction_bucket[c] as usize] = p | mark;
6309 }
6310}
6311
6312#[allow(dead_code)]
6313fn final_bwt_scan_left_to_right_16u(
6314 t: &[u16],
6315 sa: &mut [SaSint],
6316 induction_bucket: &mut [SaSint],
6317 omp_block_start: SaSint,
6318 omp_block_size: SaSint,
6319) {
6320 let mut i = omp_block_start as isize;
6321 let mut j = (omp_block_start + omp_block_size - 64 - 1) as isize;
6322 while i < j {
6323 final_bwt_ltr_step(t, sa, induction_bucket, i as usize);
6324 final_bwt_ltr_step(t, sa, induction_bucket, (i + 1) as usize);
6325 i += 2;
6326 }
6327 j += 64 + 1;
6328 while i < j {
6329 final_bwt_ltr_step(t, sa, induction_bucket, i as usize);
6330 i += 1;
6331 }
6332}
6333
6334#[allow(dead_code)]
6335fn final_bwt_scan_right_to_left_16u(
6336 t: &[u16],
6337 sa: &mut [SaSint],
6338 induction_bucket: &mut [SaSint],
6339 omp_block_start: SaSint,
6340 omp_block_size: SaSint,
6341) -> SaSint {
6342 let mut index = -1;
6343 let mut i = (omp_block_start + omp_block_size - 1) as isize;
6344 let mut j = (omp_block_start + 64 + 1) as isize;
6345 while i >= j {
6346 final_bwt_rtl_step(t, sa, induction_bucket, i as usize, &mut index);
6347 final_bwt_rtl_step(t, sa, induction_bucket, (i - 1) as usize, &mut index);
6348 i -= 2;
6349 }
6350 j -= 64 + 1;
6351 while i >= j {
6352 final_bwt_rtl_step(t, sa, induction_bucket, i as usize, &mut index);
6353 i -= 1;
6354 }
6355 index
6356}
6357
6358#[allow(dead_code)]
6359fn final_bwt_aux_scan_left_to_right_16u(
6360 t: &[u16],
6361 sa: &mut [SaSint],
6362 rm: SaSint,
6363 i_sample: &mut [SaSint],
6364 induction_bucket: &mut [SaSint],
6365 omp_block_start: SaSint,
6366 omp_block_size: SaSint,
6367) {
6368 let mut i = omp_block_start as isize;
6369 let mut j = (omp_block_start + omp_block_size - 64 - 1) as isize;
6370 while i < j {
6371 final_bwt_aux_ltr_step(t, sa, rm, i_sample, induction_bucket, i as usize);
6372 final_bwt_aux_ltr_step(t, sa, rm, i_sample, induction_bucket, (i + 1) as usize);
6373 i += 2;
6374 }
6375 j += 64 + 1;
6376 while i < j {
6377 final_bwt_aux_ltr_step(t, sa, rm, i_sample, induction_bucket, i as usize);
6378 i += 1;
6379 }
6380}
6381
6382#[allow(dead_code)]
6383fn final_bwt_aux_scan_right_to_left_16u(
6384 t: &[u16],
6385 sa: &mut [SaSint],
6386 rm: SaSint,
6387 i_sample: &mut [SaSint],
6388 induction_bucket: &mut [SaSint],
6389 omp_block_start: SaSint,
6390 omp_block_size: SaSint,
6391) {
6392 let mut i = (omp_block_start + omp_block_size - 1) as isize;
6393 let mut j = (omp_block_start + 64 + 1) as isize;
6394 while i >= j {
6395 final_bwt_aux_rtl_step(t, sa, rm, i_sample, induction_bucket, i as usize);
6396 final_bwt_aux_rtl_step(t, sa, rm, i_sample, induction_bucket, (i - 1) as usize);
6397 i -= 2;
6398 }
6399 j -= 64 + 1;
6400 while i >= j {
6401 final_bwt_aux_rtl_step(t, sa, rm, i_sample, induction_bucket, i as usize);
6402 i -= 1;
6403 }
6404}
6405
6406#[allow(dead_code)]
6407fn renumber_lms_suffixes_16u(
6408 sa: &mut [SaSint],
6409 m: SaSint,
6410 mut name: SaSint,
6411 omp_block_start: SaSint,
6412 omp_block_size: SaSint,
6413) -> SaSint {
6414 let mut i = omp_block_start as isize;
6415 let mut j = (omp_block_start + omp_block_size - 64 - 3) as isize;
6416 while i < j {
6417 let p0 = sa[i as usize];
6418 sa[m as usize + ((p0 & SAINT_MAX) >> 1) as usize] = name | SAINT_MIN;
6419 name += SaSint::from(p0 < 0);
6420
6421 let p1 = sa[(i + 1) as usize];
6422 sa[m as usize + ((p1 & SAINT_MAX) >> 1) as usize] = name | SAINT_MIN;
6423 name += SaSint::from(p1 < 0);
6424
6425 let p2 = sa[(i + 2) as usize];
6426 sa[m as usize + ((p2 & SAINT_MAX) >> 1) as usize] = name | SAINT_MIN;
6427 name += SaSint::from(p2 < 0);
6428
6429 let p3 = sa[(i + 3) as usize];
6430 sa[m as usize + ((p3 & SAINT_MAX) >> 1) as usize] = name | SAINT_MIN;
6431 name += SaSint::from(p3 < 0);
6432
6433 i += 4;
6434 }
6435
6436 j += 64 + 3;
6437 while i < j {
6438 let p = sa[i as usize];
6439 sa[m as usize + ((p & SAINT_MAX) >> 1) as usize] = name | SAINT_MIN;
6440 name += SaSint::from(p < 0);
6441 i += 1;
6442 }
6443
6444 name
6445}
6446
6447#[allow(dead_code)]
6448fn renumber_lms_suffixes_16u_omp(
6449 sa: &mut [SaSint],
6450 m: SaSint,
6451 threads: SaSint,
6452 thread_state: &mut [ThreadState],
6453) -> SaSint {
6454 if threads == 1 || m < 65_536 || thread_state.is_empty() {
6455 return renumber_lms_suffixes_16u(sa, m, 0, 0, m);
6456 }
6457
6458 let thread_count = usize::try_from(threads)
6459 .expect("threads must be non-negative")
6460 .min(thread_state.len());
6461 let block_stride = (m / thread_count as SaSint) & !15;
6462
6463 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
6464 let block_start = thread as SaSint * block_stride;
6465 let block_size = if thread + 1 < thread_count {
6466 block_stride
6467 } else {
6468 m - block_start
6469 };
6470 state.count = count_negative_marked_suffixes(sa, block_start, block_size);
6471 }
6472
6473 let mut name = 0;
6474 for thread in 0..thread_count {
6475 let block_start = thread as SaSint * block_stride;
6476 let block_size = if thread + 1 < thread_count {
6477 block_stride
6478 } else {
6479 m - block_start
6480 };
6481 renumber_lms_suffixes_16u(sa, m, name, block_start, block_size);
6482 name += thread_state[thread].count;
6483 }
6484
6485 name
6486}
6487
6488#[allow(dead_code)]
6489fn gather_marked_lms_suffixes(
6490 sa: &mut [SaSint],
6491 m: SaSint,
6492 mut l: isize,
6493 omp_block_start: isize,
6494 omp_block_size: isize,
6495) -> isize {
6496 if omp_block_size <= 0 {
6497 return l;
6498 }
6499
6500 l -= 1;
6501 let mut i = m as isize + omp_block_start + omp_block_size - 1;
6502 let mut j = m as isize + omp_block_start + 3;
6503 while i >= j {
6504 let s0 = sa[i as usize];
6505 sa[l as usize] = s0 & SAINT_MAX;
6506 l -= isize::from(s0 < 0);
6507
6508 let s1 = sa[(i - 1) as usize];
6509 sa[l as usize] = s1 & SAINT_MAX;
6510 l -= isize::from(s1 < 0);
6511
6512 let s2 = sa[(i - 2) as usize];
6513 sa[l as usize] = s2 & SAINT_MAX;
6514 l -= isize::from(s2 < 0);
6515
6516 let s3 = sa[(i - 3) as usize];
6517 sa[l as usize] = s3 & SAINT_MAX;
6518 l -= isize::from(s3 < 0);
6519
6520 i -= 4;
6521 }
6522
6523 j -= 3;
6524 while i >= j {
6525 let s = sa[i as usize];
6526 sa[l as usize] = s & SAINT_MAX;
6527 l -= isize::from(s < 0);
6528 i -= 1;
6529 }
6530
6531 l + 1
6532}
6533
6534#[allow(dead_code)]
6535fn gather_marked_lms_suffixes_omp(
6536 sa: &mut [SaSint],
6537 n: SaSint,
6538 m: SaSint,
6539 fs: SaSint,
6540 threads: SaSint,
6541 thread_state: &mut [ThreadState],
6542) {
6543 let half_n = n >> 1;
6544 if threads == 1 || n < 131_072 || thread_state.is_empty() {
6545 let _ = gather_marked_lms_suffixes(sa, m, (n + fs) as isize, 0, half_n as isize);
6546 return;
6547 }
6548
6549 let thread_count = usize::try_from(threads)
6550 .expect("threads must be non-negative")
6551 .min(thread_state.len());
6552 let block_stride = (half_n / thread_count as SaSint) & !15;
6553
6554 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
6555 let block_start = thread as SaSint * block_stride;
6556 let block_size = if thread + 1 < thread_count {
6557 block_stride
6558 } else {
6559 half_n - block_start
6560 };
6561 let local_end = if thread + 1 < thread_count {
6562 m + block_start + block_size
6563 } else {
6564 n + fs
6565 } as isize;
6566 let gathered_position =
6567 gather_marked_lms_suffixes(sa, m, local_end, block_start as isize, block_size as isize);
6568 state.position = gathered_position as SaSint;
6569 state.count = (local_end - gathered_position) as SaSint;
6570 }
6571
6572 let mut position = (n + fs) as isize;
6573 for thread in (0..thread_count).rev() {
6574 let count =
6575 usize::try_from(thread_state[thread].count).expect("count must be non-negative");
6576 position -= thread_state[thread].count as isize;
6577 if thread + 1 != thread_count && count > 0 {
6578 let src = usize::try_from(thread_state[thread].position)
6579 .expect("position must be non-negative");
6580 let dst = position as usize;
6581 sa.copy_within(src..src + count, dst);
6582 }
6583 }
6584}
6585
6586#[allow(dead_code)]
6587fn renumber_and_gather_lms_suffixes_omp(
6588 sa: &mut [SaSint],
6589 n: SaSint,
6590 m: SaSint,
6591 fs: SaSint,
6592 threads: SaSint,
6593 thread_state: &mut [ThreadState],
6594) -> SaSint {
6595 let m_usize = m as usize;
6596 let half_n = (n >> 1) as usize;
6597 sa[m_usize..m_usize + half_n].fill(0);
6598
6599 let name = renumber_lms_suffixes_16u_omp(sa, m, threads, thread_state);
6600 if name < m {
6601 gather_marked_lms_suffixes_omp(sa, n, m, fs, threads, thread_state);
6602 } else {
6603 for item in &mut sa[..m_usize] {
6604 *item &= SAINT_MAX;
6605 }
6606 }
6607
6608 name
6609}
6610
6611#[allow(dead_code)]
6612fn reconstruct_lms_suffixes(
6613 sa: &mut [SaSint],
6614 n: SaSint,
6615 m: SaSint,
6616 omp_block_start: isize,
6617 omp_block_size: isize,
6618) {
6619 if omp_block_size <= 0 {
6620 return;
6621 }
6622
6623 let base = (n - m) as usize;
6624 let mut i = omp_block_start;
6625 let mut j = omp_block_start + omp_block_size - 64 - 3;
6626 while i < j {
6627 let iu = i as usize;
6628 let s0 = sa[iu] as usize;
6629 let s1 = sa[iu + 1] as usize;
6630 let s2 = sa[iu + 2] as usize;
6631 let s3 = sa[iu + 3] as usize;
6632 sa[iu] = sa[base + s0];
6633 sa[iu + 1] = sa[base + s1];
6634 sa[iu + 2] = sa[base + s2];
6635 sa[iu + 3] = sa[base + s3];
6636 i += 4;
6637 }
6638
6639 j += 64 + 3;
6640 while i < j {
6641 let iu = i as usize;
6642 let s = sa[iu] as usize;
6643 sa[iu] = sa[base + s];
6644 i += 1;
6645 }
6646}
6647
6648#[allow(dead_code)]
6649fn reconstruct_lms_suffixes_omp(sa: &mut [SaSint], n: SaSint, m: SaSint, threads: SaSint) {
6650 if threads == 1 || m < 65_536 {
6651 reconstruct_lms_suffixes(sa, n, m, 0, m as isize);
6652 return;
6653 }
6654
6655 let thread_count = threads as usize;
6656 let block_stride = (m / threads) & !15;
6657 for thread in 0..thread_count {
6658 let block_start = thread as SaSint * block_stride;
6659 let block_size = if thread + 1 < thread_count {
6660 block_stride
6661 } else {
6662 m - block_start
6663 };
6664 reconstruct_lms_suffixes(sa, n, m, block_start as isize, block_size as isize);
6665 }
6666}
6667
6668#[allow(dead_code)]
6669fn renumber_distinct_lms_suffixes_32s_4k(
6670 sa: &mut [SaSint],
6671 m: SaSint,
6672 mut name: SaSint,
6673 omp_block_start: isize,
6674 omp_block_size: isize,
6675) -> SaSint {
6676 if omp_block_size <= 0 {
6677 return name;
6678 }
6679
6680 let m_usize = m as usize;
6681 let start = omp_block_start as usize;
6682 let size = omp_block_size as usize;
6683 let (sa_head, sam) = sa.split_at_mut(m_usize);
6684 let mut i = start;
6685 let mut j = start + size.saturating_sub(64 + 3);
6686 let mut p3 = 0;
6687
6688 while i < j {
6689 let p0 = sa_head[i];
6690 sa_head[i] = p0 & SAINT_MAX;
6691 sam[(sa_head[i] >> 1) as usize] = name | (p0 & p3 & SAINT_MIN);
6692 name += SaSint::from(p0 < 0);
6693
6694 let p1 = sa_head[i + 1];
6695 sa_head[i + 1] = p1 & SAINT_MAX;
6696 sam[(sa_head[i + 1] >> 1) as usize] = name | (p1 & p0 & SAINT_MIN);
6697 name += SaSint::from(p1 < 0);
6698
6699 let p2 = sa_head[i + 2];
6700 sa_head[i + 2] = p2 & SAINT_MAX;
6701 sam[(sa_head[i + 2] >> 1) as usize] = name | (p2 & p1 & SAINT_MIN);
6702 name += SaSint::from(p2 < 0);
6703
6704 p3 = sa_head[i + 3];
6705 sa_head[i + 3] = p3 & SAINT_MAX;
6706 sam[(sa_head[i + 3] >> 1) as usize] = name | (p3 & p2 & SAINT_MIN);
6707 name += SaSint::from(p3 < 0);
6708
6709 i += 4;
6710 }
6711
6712 j = start + size;
6713 while i < j {
6714 let p2 = p3;
6715 p3 = sa_head[i];
6716 sa_head[i] = p3 & SAINT_MAX;
6717 sam[(sa_head[i] >> 1) as usize] = name | (p3 & p2 & SAINT_MIN);
6718 name += SaSint::from(p3 < 0);
6719 i += 1;
6720 }
6721
6722 name
6723}
6724
6725#[allow(dead_code)]
6726fn mark_distinct_lms_suffixes_32s(
6727 sa: &mut [SaSint],
6728 m: SaSint,
6729 omp_block_start: isize,
6730 omp_block_size: isize,
6731) {
6732 if omp_block_size <= 0 {
6733 return;
6734 }
6735
6736 let mut i = m as usize + omp_block_start as usize;
6737 let mut j = i + (omp_block_size as usize).saturating_sub(3);
6738 let mut p3 = 0;
6739 while i < j {
6740 let mut p0 = sa[i];
6741 sa[i] = p0 & (p3 | SAINT_MAX);
6742 p0 = if p0 == 0 { p3 } else { p0 };
6743
6744 let mut p1 = sa[i + 1];
6745 sa[i + 1] = p1 & (p0 | SAINT_MAX);
6746 p1 = if p1 == 0 { p0 } else { p1 };
6747
6748 let mut p2 = sa[i + 2];
6749 sa[i + 2] = p2 & (p1 | SAINT_MAX);
6750 p2 = if p2 == 0 { p1 } else { p2 };
6751
6752 p3 = sa[i + 3];
6753 sa[i + 3] = p3 & (p2 | SAINT_MAX);
6754 p3 = if p3 == 0 { p2 } else { p3 };
6755 i += 4;
6756 }
6757
6758 j = m as usize + omp_block_start as usize + omp_block_size as usize;
6759 while i < j {
6760 let p2 = p3;
6761 p3 = sa[i];
6762 sa[i] = p3 & (p2 | SAINT_MAX);
6763 p3 = if p3 == 0 { p2 } else { p3 };
6764 i += 1;
6765 }
6766}
6767
6768#[allow(dead_code)]
6769fn clamp_lms_suffixes_length_32s(
6770 sa: &mut [SaSint],
6771 m: SaSint,
6772 omp_block_start: isize,
6773 omp_block_size: isize,
6774) {
6775 if omp_block_size <= 0 {
6776 return;
6777 }
6778
6779 let mut i = m as usize + omp_block_start as usize;
6780 let mut j = i + (omp_block_size as usize).saturating_sub(3);
6781 while i < j {
6782 let s0 = sa[i];
6783 sa[i] = if s0 < 0 { s0 } else { 0 } & SAINT_MAX;
6784
6785 let s1 = sa[i + 1];
6786 sa[i + 1] = if s1 < 0 { s1 } else { 0 } & SAINT_MAX;
6787
6788 let s2 = sa[i + 2];
6789 sa[i + 2] = if s2 < 0 { s2 } else { 0 } & SAINT_MAX;
6790
6791 let s3 = sa[i + 3];
6792 sa[i + 3] = if s3 < 0 { s3 } else { 0 } & SAINT_MAX;
6793
6794 i += 4;
6795 }
6796
6797 j = m as usize + omp_block_start as usize + omp_block_size as usize;
6798 while i < j {
6799 let s = sa[i];
6800 sa[i] = if s < 0 { s } else { 0 } & SAINT_MAX;
6801 i += 1;
6802 }
6803}
6804
6805#[allow(dead_code)]
6806fn renumber_distinct_lms_suffixes_32s_4k_omp(
6807 sa: &mut [SaSint],
6808 m: SaSint,
6809 threads: SaSint,
6810 thread_state: &mut [ThreadState],
6811) -> SaSint {
6812 if threads == 1 || m < 65_536 || thread_state.is_empty() {
6813 return renumber_distinct_lms_suffixes_32s_4k(sa, m, 1, 0, m as isize) - 1;
6814 }
6815
6816 let thread_count = usize::try_from(threads)
6817 .expect("threads must be non-negative")
6818 .min(thread_state.len());
6819 let block_stride = (m / thread_count as SaSint) & !15;
6820
6821 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
6822 let block_start = thread as SaSint * block_stride;
6823 let block_size = if thread + 1 < thread_count {
6824 block_stride
6825 } else {
6826 m - block_start
6827 };
6828 state.count = count_negative_marked_suffixes(sa, block_start, block_size);
6829 }
6830
6831 let mut count = 1;
6832 for thread in 0..thread_count {
6833 let block_start = thread as SaSint * block_stride;
6834 let block_size = if thread + 1 < thread_count {
6835 block_stride
6836 } else {
6837 m - block_start
6838 };
6839 renumber_distinct_lms_suffixes_32s_4k(
6840 sa,
6841 m,
6842 count,
6843 block_start as isize,
6844 block_size as isize,
6845 );
6846 count += thread_state[thread].count;
6847 }
6848
6849 count - 1
6850}
6851
6852#[allow(dead_code)]
6853fn mark_distinct_lms_suffixes_32s_omp(sa: &mut [SaSint], n: SaSint, m: SaSint, threads: SaSint) {
6854 let half_n = n >> 1;
6855 if threads == 1 || n < 131_072 {
6856 mark_distinct_lms_suffixes_32s(sa, m, 0, half_n as isize);
6857 return;
6858 }
6859
6860 let thread_count = threads as usize;
6861 let block_stride = (half_n / threads) & !15;
6862 for thread in 0..thread_count {
6863 let block_start = thread as SaSint * block_stride;
6864 let block_size = if thread + 1 < thread_count {
6865 block_stride
6866 } else {
6867 half_n - block_start
6868 };
6869 mark_distinct_lms_suffixes_32s(sa, m, block_start as isize, block_size as isize);
6870 }
6871}
6872
6873#[allow(dead_code)]
6874fn clamp_lms_suffixes_length_32s_omp(sa: &mut [SaSint], n: SaSint, m: SaSint, threads: SaSint) {
6875 let half_n = n >> 1;
6876 if threads == 1 || n < 131_072 {
6877 clamp_lms_suffixes_length_32s(sa, m, 0, half_n as isize);
6878 return;
6879 }
6880
6881 let thread_count = threads as usize;
6882 let block_stride = (half_n / threads) & !15;
6883 for thread in 0..thread_count {
6884 let block_start = thread as SaSint * block_stride;
6885 let block_size = if thread + 1 < thread_count {
6886 block_stride
6887 } else {
6888 half_n - block_start
6889 };
6890 clamp_lms_suffixes_length_32s(sa, m, block_start as isize, block_size as isize);
6891 }
6892}
6893
6894#[allow(dead_code)]
6895fn renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(
6896 sa: &mut [SaSint],
6897 n: SaSint,
6898 m: SaSint,
6899 threads: SaSint,
6900 thread_state: &mut [ThreadState],
6901) -> SaSint {
6902 let m_usize = m as usize;
6903 let half_n = (n >> 1) as usize;
6904 sa[m_usize..m_usize + half_n].fill(0);
6905
6906 let name = renumber_distinct_lms_suffixes_32s_4k_omp(sa, m, threads, thread_state);
6907 if name < m {
6908 mark_distinct_lms_suffixes_32s_omp(sa, n, m, threads);
6909 }
6910
6911 name
6912}
6913
6914#[allow(dead_code)]
6915fn renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(
6916 t: &[SaSint],
6917 sa: &mut [SaSint],
6918 n: SaSint,
6919 m: SaSint,
6920 threads: SaSint,
6921) -> SaSint {
6922 let m_usize = m as usize;
6923 let n_usize = n as usize;
6924
6925 gather_lms_suffixes_32s(t, sa, n);
6926 sa[m_usize..n_usize - m_usize].fill(0);
6927
6928 let mut i = n - m;
6929 let mut j = n - 1 - 64 - 3;
6930 while i < j {
6931 let s0 = (sa[i as usize] as SaUint >> 1) as usize;
6932 let s1 = (sa[(i + 1) as usize] as SaUint >> 1) as usize;
6933 let s2 = (sa[(i + 2) as usize] as SaUint >> 1) as usize;
6934 let s3 = (sa[(i + 3) as usize] as SaUint >> 1) as usize;
6935 sa[m_usize + s0] = sa[(i + 1) as usize] - sa[i as usize] + 1 + SAINT_MIN;
6936 sa[m_usize + s1] = sa[(i + 2) as usize] - sa[(i + 1) as usize] + 1 + SAINT_MIN;
6937 sa[m_usize + s2] = sa[(i + 3) as usize] - sa[(i + 2) as usize] + 1 + SAINT_MIN;
6938 sa[m_usize + s3] = sa[(i + 4) as usize] - sa[(i + 3) as usize] + 1 + SAINT_MIN;
6939 i += 4;
6940 }
6941
6942 j += 64 + 3;
6943 while i < j {
6944 let s = (sa[i as usize] as SaUint >> 1) as usize;
6945 sa[m_usize + s] = sa[(i + 1) as usize] - sa[i as usize] + 1 + SAINT_MIN;
6946 i += 1;
6947 }
6948
6949 let tail = (sa[n_usize - 1] as SaUint >> 1) as usize;
6950 sa[m_usize + tail] = 1 + SAINT_MIN;
6951
6952 clamp_lms_suffixes_length_32s_omp(sa, n, m, threads);
6953
6954 let mut name = 1;
6955 if m_usize > 0 {
6956 let mut i = 1usize;
6957 let mut j = m_usize.saturating_sub(64 + 1);
6958 let mut p = sa[0] as usize;
6959 let mut plen = sa[m_usize + (p >> 1)];
6960 let mut pdiff = SAINT_MIN;
6961
6962 while i < j {
6963 let q = sa[i] as usize;
6964 let qlen = sa[m_usize + (q >> 1)];
6965 let mut qdiff = SAINT_MIN;
6966 if plen == qlen {
6967 let mut l = 0;
6968 while l < qlen as usize {
6969 if t[p + l] != t[q + l] {
6970 break;
6971 }
6972 l += 1;
6973 }
6974 qdiff = ((l as SaSint) - qlen) & SAINT_MIN;
6975 }
6976 sa[m_usize + (p >> 1)] = name | (pdiff & qdiff);
6977 name += SaSint::from(qdiff < 0);
6978
6979 p = sa[i + 1] as usize;
6980 plen = sa[m_usize + (p >> 1)];
6981 pdiff = SAINT_MIN;
6982 if qlen == plen {
6983 let mut l = 0;
6984 while l < plen as usize {
6985 if t[q + l] != t[p + l] {
6986 break;
6987 }
6988 l += 1;
6989 }
6990 pdiff = ((l as SaSint) - plen) & SAINT_MIN;
6991 }
6992 sa[m_usize + (q >> 1)] = name | (qdiff & pdiff);
6993 name += SaSint::from(pdiff < 0);
6994 i += 2;
6995 }
6996
6997 j = m_usize;
6998 while i < j {
6999 let q = sa[i] as usize;
7000 let qlen = sa[m_usize + (q >> 1)];
7001 let mut qdiff = SAINT_MIN;
7002 if plen == qlen {
7003 let mut l = 0;
7004 while l < plen as usize {
7005 if t[p + l] != t[q + l] {
7006 break;
7007 }
7008 l += 1;
7009 }
7010 qdiff = ((l as SaSint) - plen) & SAINT_MIN;
7011 }
7012 sa[m_usize + (p >> 1)] = name | (pdiff & qdiff);
7013 name += SaSint::from(qdiff < 0);
7014 p = q;
7015 plen = qlen;
7016 pdiff = qdiff;
7017 i += 1;
7018 }
7019
7020 sa[m_usize + (p >> 1)] = name | pdiff;
7021 name += 1;
7022 }
7023
7024 if name <= m {
7025 mark_distinct_lms_suffixes_32s_omp(sa, n, m, threads);
7026 }
7027
7028 name - 1
7029}
7030
7031#[allow(dead_code)]
7032fn renumber_unique_and_nonunique_lms_suffixes_32s(
7033 t: &mut [SaSint],
7034 sa: &mut [SaSint],
7035 m: SaSint,
7036 mut f: SaSint,
7037 omp_block_start: isize,
7038 omp_block_size: isize,
7039) -> SaSint {
7040 if omp_block_size <= 0 {
7041 return f;
7042 }
7043
7044 let m_usize = m as usize;
7045 let (sa_head, sam) = sa.split_at_mut(m_usize);
7046 let mut i = omp_block_start;
7047 let mut j = omp_block_start + omp_block_size - 128 - 3;
7048 while i < j {
7049 for offset in 0..4 {
7050 let idx = (i + offset) as usize;
7051 let p = sa_head[idx] as SaUint;
7052 let mut s = sam[(p >> 1) as usize];
7053 if s < 0 {
7054 t[p as usize] |= SAINT_MIN;
7055 f += 1;
7056 s = i as SaSint + offset as SaSint + SAINT_MIN + f;
7057 }
7058 sam[(p >> 1) as usize] = s - f;
7059 }
7060 i += 4;
7061 }
7062
7063 j += 128 + 3;
7064 while i < j {
7065 let p = sa_head[i as usize] as SaUint;
7066 let mut s = sam[(p >> 1) as usize];
7067 if s < 0 {
7068 t[p as usize] |= SAINT_MIN;
7069 f += 1;
7070 s = i as SaSint + SAINT_MIN + f;
7071 }
7072 sam[(p >> 1) as usize] = s - f;
7073 i += 1;
7074 }
7075
7076 f
7077}
7078
7079#[allow(dead_code)]
7080fn compact_unique_and_nonunique_lms_suffixes_32s(
7081 sa: &mut [SaSint],
7082 m: SaSint,
7083 pl: &mut isize,
7084 pr: &mut isize,
7085 omp_block_start: isize,
7086 omp_block_size: isize,
7087) {
7088 if omp_block_size <= 0 {
7089 return;
7090 }
7091
7092 let m_usize = m as usize;
7093 let source: Vec<SaSint> = sa
7094 [m_usize + omp_block_start as usize..m_usize + (omp_block_start + omp_block_size) as usize]
7095 .to_vec();
7096 let mut l = *pl - 1;
7097 let mut r = *pr - 1;
7098
7099 for &p in source.iter().rev() {
7100 sa[l as usize] = p & SAINT_MAX;
7101 l -= isize::from(p < 0);
7102
7103 sa[r as usize] = p.wrapping_sub(1);
7104 r -= isize::from(p > 0);
7105 }
7106
7107 *pl = l + 1;
7108 *pr = r + 1;
7109}
7110
7111#[allow(dead_code)]
7112fn count_unique_suffixes(
7113 sa: &[SaSint],
7114 m: SaSint,
7115 omp_block_start: isize,
7116 omp_block_size: isize,
7117) -> SaSint {
7118 let base = m as usize;
7119 let start = omp_block_start as usize;
7120 let end = start + omp_block_size as usize;
7121 let mut count = 0;
7122 for i in start..end {
7123 count += SaSint::from(sa[base + ((sa[i] as SaUint) >> 1) as usize] < 0);
7124 }
7125 count
7126}
7127
7128#[allow(dead_code)]
7129fn renumber_unique_and_nonunique_lms_suffixes_32s_omp(
7130 t: &mut [SaSint],
7131 sa: &mut [SaSint],
7132 m: SaSint,
7133 threads: SaSint,
7134) -> SaSint {
7135 if threads == 1 || m < 65_536 {
7136 return renumber_unique_and_nonunique_lms_suffixes_32s(t, sa, m, 0, 0, m as isize);
7137 }
7138
7139 let thread_count = threads as usize;
7140 let block_stride = (m / threads) & !15;
7141 let mut counts = vec![0; thread_count];
7142
7143 for thread in 0..thread_count {
7144 let block_start = thread as SaSint * block_stride;
7145 let block_size = if thread + 1 < thread_count {
7146 block_stride
7147 } else {
7148 m - block_start
7149 };
7150 counts[thread] = count_unique_suffixes(sa, m, block_start as isize, block_size as isize);
7151 }
7152
7153 let mut f = 0;
7154 for thread in 0..thread_count {
7155 let block_start = thread as SaSint * block_stride;
7156 let block_size = if thread + 1 < thread_count {
7157 block_stride
7158 } else {
7159 m - block_start
7160 };
7161 renumber_unique_and_nonunique_lms_suffixes_32s(
7162 t,
7163 sa,
7164 m,
7165 f,
7166 block_start as isize,
7167 block_size as isize,
7168 );
7169 f += counts[thread];
7170 }
7171
7172 f
7173}
7174
7175#[allow(dead_code)]
7176fn compact_unique_and_nonunique_lms_suffixes_32s_omp(
7177 sa: &mut [SaSint],
7178 n: SaSint,
7179 m: SaSint,
7180 fs: SaSint,
7181 f: SaSint,
7182 threads: SaSint,
7183) {
7184 let half_n = n >> 1;
7185 if threads == 1 || n < 131_072 || m >= fs {
7186 let mut l = m as isize;
7187 let mut r = (n + fs) as isize;
7188 compact_unique_and_nonunique_lms_suffixes_32s(sa, m, &mut l, &mut r, 0, half_n as isize);
7189 } else {
7190 let thread_count = threads as usize;
7191 let block_stride = (half_n / threads) & !15;
7192 let mut positions = vec![0isize; thread_count];
7193 let mut counts = vec![0isize; thread_count];
7194
7195 for thread in 0..thread_count {
7196 let block_start = thread as SaSint * block_stride;
7197 let block_size = if thread + 1 < thread_count {
7198 block_stride
7199 } else {
7200 half_n - block_start
7201 };
7202 let mut position = (m + half_n + block_start + block_size) as isize;
7203 let mut count = (m + block_start + block_size) as isize;
7204 compact_unique_and_nonunique_lms_suffixes_32s(
7205 sa,
7206 m,
7207 &mut position,
7208 &mut count,
7209 block_start as isize,
7210 block_size as isize,
7211 );
7212 positions[thread] = position;
7213 counts[thread] = count;
7214 }
7215
7216 let mut position = m as isize;
7217 for thread in (0..thread_count).rev() {
7218 let block_end = if thread + 1 < thread_count {
7219 block_stride * (thread as SaSint + 1)
7220 } else {
7221 half_n
7222 };
7223 let count = (m + half_n + block_end) as isize - positions[thread];
7224 if count > 0 {
7225 position -= count;
7226 let src = positions[thread] as usize;
7227 let dst = position as usize;
7228 sa.copy_within(src..src + count as usize, dst);
7229 }
7230 }
7231
7232 let mut position = (n + fs) as isize;
7233 for thread in (0..thread_count).rev() {
7234 let block_end = if thread + 1 < thread_count {
7235 block_stride * (thread as SaSint + 1)
7236 } else {
7237 half_n
7238 };
7239 let count = (m + block_end) as isize - counts[thread];
7240 if count > 0 {
7241 position -= count;
7242 let src = counts[thread] as usize;
7243 let dst = position as usize;
7244 sa.copy_within(src..src + count as usize, dst);
7245 }
7246 }
7247 }
7248
7249 let dst = (n + fs - m) as usize;
7250 let src = (m - f) as usize;
7251 sa.copy_within(src..src + f as usize, dst);
7252}
7253
7254#[allow(dead_code)]
7255fn compact_lms_suffixes_32s_omp(
7256 t: &mut [SaSint],
7257 sa: &mut [SaSint],
7258 n: SaSint,
7259 m: SaSint,
7260 fs: SaSint,
7261 threads: SaSint,
7262) -> SaSint {
7263 let f = renumber_unique_and_nonunique_lms_suffixes_32s_omp(t, sa, m, threads);
7264 compact_unique_and_nonunique_lms_suffixes_32s_omp(sa, n, m, fs, f, threads);
7265 f
7266}
7267
7268#[allow(dead_code)]
7269fn merge_unique_lms_suffixes_32s(
7270 t: &mut [SaSint],
7271 sa: &mut [SaSint],
7272 n: SaSint,
7273 m: SaSint,
7274 l: isize,
7275 omp_block_start: isize,
7276 omp_block_size: isize,
7277) {
7278 let mut src_index = (n as isize - m as isize - 1 + l) as usize;
7279 let mut tmp = sa[src_index] as isize;
7280 src_index += 1;
7281
7282 let mut i = omp_block_start;
7283 let mut j = omp_block_start + omp_block_size - 6;
7284 while i < j {
7285 let iu = i as usize;
7286
7287 let c0 = t[iu];
7288 if c0 < 0 {
7289 t[iu] = c0 & SAINT_MAX;
7290 sa[tmp as usize] = i as SaSint;
7291 i += 1;
7292 tmp = sa[src_index] as isize;
7293 src_index += 1;
7294 }
7295
7296 let c1 = t[(i + 1) as usize];
7297 if c1 < 0 {
7298 t[(i + 1) as usize] = c1 & SAINT_MAX;
7299 sa[tmp as usize] = i as SaSint + 1;
7300 i += 1;
7301 tmp = sa[src_index] as isize;
7302 src_index += 1;
7303 }
7304
7305 let c2 = t[(i + 2) as usize];
7306 if c2 < 0 {
7307 t[(i + 2) as usize] = c2 & SAINT_MAX;
7308 sa[tmp as usize] = i as SaSint + 2;
7309 i += 1;
7310 tmp = sa[src_index] as isize;
7311 src_index += 1;
7312 }
7313
7314 let c3 = t[(i + 3) as usize];
7315 if c3 < 0 {
7316 t[(i + 3) as usize] = c3 & SAINT_MAX;
7317 sa[tmp as usize] = i as SaSint + 3;
7318 i += 1;
7319 tmp = sa[src_index] as isize;
7320 src_index += 1;
7321 }
7322
7323 i += 4;
7324 }
7325
7326 j += 6;
7327 while i < j {
7328 let c = t[i as usize];
7329 if c < 0 {
7330 t[i as usize] = c & SAINT_MAX;
7331 sa[tmp as usize] = i as SaSint;
7332 i += 1;
7333 tmp = sa[src_index] as isize;
7334 src_index += 1;
7335 }
7336 i += 1;
7337 }
7338}
7339
7340#[allow(dead_code)]
7341fn merge_nonunique_lms_suffixes_32s(
7342 sa: &mut [SaSint],
7343 n: SaSint,
7344 m: SaSint,
7345 l: isize,
7346 omp_block_start: isize,
7347 omp_block_size: isize,
7348) {
7349 let mut src_index = (n as isize - m as isize - 1 + l) as usize;
7350 let mut tmp = sa[src_index];
7351 src_index += 1;
7352
7353 let mut i = omp_block_start;
7354 let mut j = omp_block_start + omp_block_size - 3;
7355 while i < j {
7356 if sa[i as usize] == 0 {
7357 sa[i as usize] = tmp;
7358 tmp = sa[src_index];
7359 src_index += 1;
7360 }
7361 if sa[(i + 1) as usize] == 0 {
7362 sa[(i + 1) as usize] = tmp;
7363 tmp = sa[src_index];
7364 src_index += 1;
7365 }
7366 if sa[(i + 2) as usize] == 0 {
7367 sa[(i + 2) as usize] = tmp;
7368 tmp = sa[src_index];
7369 src_index += 1;
7370 }
7371 if sa[(i + 3) as usize] == 0 {
7372 sa[(i + 3) as usize] = tmp;
7373 tmp = sa[src_index];
7374 src_index += 1;
7375 }
7376 i += 4;
7377 }
7378
7379 j += 3;
7380 while i < j {
7381 if sa[i as usize] == 0 {
7382 sa[i as usize] = tmp;
7383 tmp = sa[src_index];
7384 src_index += 1;
7385 }
7386 i += 1;
7387 }
7388}
7389
7390#[allow(dead_code)]
7391fn merge_unique_lms_suffixes_32s_omp(
7392 t: &mut [SaSint],
7393 sa: &mut [SaSint],
7394 n: SaSint,
7395 m: SaSint,
7396 threads: SaSint,
7397) {
7398 if threads == 1 || n < 65_536 {
7399 merge_unique_lms_suffixes_32s(t, sa, n, m, 0, 0, n as isize);
7400 return;
7401 }
7402
7403 let thread_count = threads as usize;
7404 let block_stride = (n / threads) & !15;
7405 let mut counts = vec![0; thread_count];
7406
7407 for thread in 0..thread_count {
7408 let block_start = thread as SaSint * block_stride;
7409 let block_size = if thread + 1 < thread_count {
7410 block_stride
7411 } else {
7412 n - block_start
7413 };
7414 counts[thread] = count_negative_marked_suffixes(t, block_start, block_size);
7415 }
7416
7417 let mut count = 0;
7418 for thread in 0..thread_count {
7419 let block_start = thread as SaSint * block_stride;
7420 let block_size = if thread + 1 < thread_count {
7421 block_stride
7422 } else {
7423 n - block_start
7424 };
7425 merge_unique_lms_suffixes_32s(
7426 t,
7427 sa,
7428 n,
7429 m,
7430 count as isize,
7431 block_start as isize,
7432 block_size as isize,
7433 );
7434 count += counts[thread];
7435 }
7436}
7437
7438#[allow(dead_code)]
7439fn merge_nonunique_lms_suffixes_32s_omp(
7440 sa: &mut [SaSint],
7441 n: SaSint,
7442 m: SaSint,
7443 f: SaSint,
7444 threads: SaSint,
7445) {
7446 if threads == 1 || m < 65_536 {
7447 merge_nonunique_lms_suffixes_32s(sa, n, m, f as isize, 0, m as isize);
7448 return;
7449 }
7450
7451 let thread_count = threads as usize;
7452 let block_stride = (m / threads) & !15;
7453 let mut counts = vec![0; thread_count];
7454
7455 for thread in 0..thread_count {
7456 let block_start = thread as SaSint * block_stride;
7457 let block_size = if thread + 1 < thread_count {
7458 block_stride
7459 } else {
7460 m - block_start
7461 };
7462 counts[thread] = count_zero_marked_suffixes(sa, block_start, block_size);
7463 }
7464
7465 let mut count = f;
7466 for thread in 0..thread_count {
7467 let block_start = thread as SaSint * block_stride;
7468 let block_size = if thread + 1 < thread_count {
7469 block_stride
7470 } else {
7471 m - block_start
7472 };
7473 merge_nonunique_lms_suffixes_32s(
7474 sa,
7475 n,
7476 m,
7477 count as isize,
7478 block_start as isize,
7479 block_size as isize,
7480 );
7481 count += counts[thread];
7482 }
7483}
7484
7485#[allow(dead_code)]
7486fn merge_compacted_lms_suffixes_32s_omp(
7487 t: &mut [SaSint],
7488 sa: &mut [SaSint],
7489 n: SaSint,
7490 m: SaSint,
7491 f: SaSint,
7492 threads: SaSint,
7493) {
7494 merge_unique_lms_suffixes_32s_omp(t, sa, n, m, threads);
7495 merge_nonunique_lms_suffixes_32s_omp(sa, n, m, f, threads);
7496}
7497
7498#[allow(dead_code)]
7499fn reconstruct_compacted_lms_suffixes_32s_2k_omp(
7500 t: &mut [SaSint],
7501 sa: &mut [SaSint],
7502 n: SaSint,
7503 k: SaSint,
7504 m: SaSint,
7505 fs: SaSint,
7506 f: SaSint,
7507 buckets: &mut [SaSint],
7508 local_buckets: SaSint,
7509 threads: SaSint,
7510 thread_state: &mut [ThreadState],
7511) {
7512 if f > 0 {
7513 let dst = (n - m - 1) as usize;
7514 let src = (n + fs - m) as usize;
7515 sa.copy_within(src..src + f as usize, dst);
7516
7517 count_and_gather_compacted_lms_suffixes_32s_2k_omp(
7518 t,
7519 sa,
7520 n,
7521 k,
7522 buckets,
7523 local_buckets,
7524 threads,
7525 thread_state,
7526 );
7527 reconstruct_lms_suffixes_omp(sa, n, m - f, threads);
7528
7529 let dst = (n - m - 1 + f) as usize;
7530 sa.copy_within(0..(m - f) as usize, dst);
7531 sa[..m as usize].fill(0);
7532
7533 merge_compacted_lms_suffixes_32s_omp(t, sa, n, m, f, threads);
7534 } else {
7535 count_and_gather_lms_suffixes_32s_2k(t, sa, n, k, buckets, 0, n as isize);
7536 reconstruct_lms_suffixes_omp(sa, n, m, threads);
7537 }
7538}
7539
7540#[allow(dead_code)]
7541fn reconstruct_compacted_lms_suffixes_32s_1k_omp(
7542 t: &mut [SaSint],
7543 sa: &mut [SaSint],
7544 n: SaSint,
7545 m: SaSint,
7546 fs: SaSint,
7547 f: SaSint,
7548 threads: SaSint,
7549) {
7550 if f > 0 {
7551 let dst = (n - m - 1) as usize;
7552 let src = (n + fs - m) as usize;
7553 sa.copy_within(src..src + f as usize, dst);
7554
7555 gather_compacted_lms_suffixes_32s(t, sa, n);
7556 reconstruct_lms_suffixes_omp(sa, n, m - f, threads);
7557
7558 let dst = (n - m - 1 + f) as usize;
7559 sa.copy_within(0..(m - f) as usize, dst);
7560 sa[..m as usize].fill(0);
7561
7562 merge_compacted_lms_suffixes_32s_omp(t, sa, n, m, f, threads);
7563 } else {
7564 gather_lms_suffixes_32s(t, sa, n);
7565 reconstruct_lms_suffixes_omp(sa, n, m, threads);
7566 }
7567}
7568
7569#[allow(dead_code)]
7570fn place_lms_suffixes_interval_16u(
7571 sa: &mut [SaSint],
7572 n: SaSint,
7573 mut m: SaSint,
7574 flags: SaSint,
7575 buckets: &mut [SaSint],
7576) {
7577 if (flags & LIBSAIS_FLAGS_GSA) != 0 {
7578 buckets[7 * ALPHABET_SIZE] -= 1;
7579 }
7580
7581 let mut j = n as isize;
7582 let mut c = ALPHABET_SIZE as isize - 2;
7583 while c >= 0 {
7584 let ci = c as usize;
7585 let l =
7586 buckets[buckets_index2(ci, 1) + buckets_index2(1, 0)] - buckets[buckets_index2(ci, 1)];
7587 if l > 0 {
7588 let i = buckets[7 * ALPHABET_SIZE + ci] as isize;
7589 if j - i > 0 {
7590 sa[i as usize..j as usize].fill(0);
7591 }
7592
7593 m -= l;
7594 j = i - l as isize;
7595 let src = m as usize;
7596 let dst = j as usize;
7597 sa.copy_within(src..src + l as usize, dst);
7598 }
7599 c -= 1;
7600 }
7601
7602 sa[..j as usize].fill(0);
7603
7604 if (flags & LIBSAIS_FLAGS_GSA) != 0 {
7605 buckets[7 * ALPHABET_SIZE] += 1;
7606 }
7607}
7608
7609#[allow(dead_code)]
7610fn place_lms_suffixes_interval_32s_4k(
7611 sa: &mut [SaSint],
7612 n: SaSint,
7613 k: SaSint,
7614 mut m: SaSint,
7615 buckets: &[SaSint],
7616) {
7617 let bucket_end = &buckets[3 * k as usize..4 * k as usize];
7618 let mut j = n as usize;
7619 let mut c = k - 2;
7620 while c >= 0 {
7621 let cu = c as usize;
7622 let l =
7623 buckets[buckets_index2(cu, 1) + buckets_index2(1, 0)] - buckets[buckets_index2(cu, 1)];
7624 if l > 0 {
7625 let i = bucket_end[cu] as usize;
7626 if j > i {
7627 sa[i..j].fill(0);
7628 }
7629
7630 m -= l;
7631 let dst = i - l as usize;
7632 sa.copy_within(m as usize..m as usize + l as usize, dst);
7633 j = dst;
7634 }
7635 c -= 1;
7636 }
7637
7638 sa[..j].fill(0);
7639}
7640
7641#[allow(dead_code)]
7642fn place_lms_suffixes_interval_32s_2k(
7643 sa: &mut [SaSint],
7644 n: SaSint,
7645 k: SaSint,
7646 mut m: SaSint,
7647 buckets: &[SaSint],
7648) {
7649 let mut j = n as usize;
7650 if k > 1 {
7651 let mut c = buckets_index2(k as usize - 2, 0) as isize;
7652 while c >= buckets_index2(0, 0) as isize {
7653 let cu = c as usize;
7654 let l = buckets[cu + buckets_index2(1, 1)] - buckets[cu + buckets_index2(0, 1)];
7655 if l > 0 {
7656 let i = buckets[cu] as usize;
7657 if j > i {
7658 sa[i..j].fill(0);
7659 }
7660
7661 m -= l;
7662 let dst = i - l as usize;
7663 sa.copy_within(m as usize..m as usize + l as usize, dst);
7664 j = dst;
7665 }
7666 c -= buckets_index2(1, 0) as isize;
7667 }
7668 }
7669
7670 sa[..j].fill(0);
7671}
7672
7673#[allow(dead_code)]
7674fn place_lms_suffixes_interval_32s_1k(
7675 t: &[SaSint],
7676 sa: &mut [SaSint],
7677 k: SaSint,
7678 m: SaSint,
7679 buckets: &[SaSint],
7680) {
7681 let mut c = k - 1;
7682 let mut l = buckets[c as usize] as usize;
7683
7684 let mut i = m - 1;
7685 while i >= 0 {
7686 let p = sa[i as usize] as usize;
7687 if t[p] != c {
7688 c = t[p];
7689 let bucket_pos = buckets[c as usize] as usize;
7690 if l > bucket_pos {
7691 sa[bucket_pos..l].fill(0);
7692 }
7693 l = bucket_pos;
7694 }
7695 l -= 1;
7696 sa[l] = p as SaSint;
7697 i -= 1;
7698 }
7699
7700 sa[..l].fill(0);
7701}
7702
7703#[allow(dead_code)]
7704fn place_lms_suffixes_histogram_32s_6k(
7705 sa: &mut [SaSint],
7706 n: SaSint,
7707 k: SaSint,
7708 mut m: SaSint,
7709 buckets: &[SaSint],
7710) {
7711 let bucket_end = &buckets[5 * k as usize..6 * k as usize];
7712 let mut j = n as usize;
7713 let mut c = k - 2;
7714 while c >= 0 {
7715 let l = buckets[buckets_index4(c as usize, 1)] as usize;
7716 if l > 0 {
7717 let i = bucket_end[c as usize] as usize;
7718 if j > i {
7719 sa[i..j].fill(0);
7720 }
7721 let dst = i - l;
7722 m -= l as SaSint;
7723 sa.copy_within(m as usize..m as usize + l, dst);
7724 j = dst;
7725 }
7726 c -= 1;
7727 }
7728 sa[..j].fill(0);
7729}
7730
7731#[allow(dead_code)]
7732fn place_lms_suffixes_histogram_32s_4k(
7733 sa: &mut [SaSint],
7734 n: SaSint,
7735 k: SaSint,
7736 mut m: SaSint,
7737 buckets: &[SaSint],
7738) {
7739 let bucket_end = &buckets[3 * k as usize..4 * k as usize];
7740 let mut j = n as usize;
7741 let mut c = k - 2;
7742 while c >= 0 {
7743 let l = buckets[buckets_index2(c as usize, 1)] as usize;
7744 if l > 0 {
7745 let i = bucket_end[c as usize] as usize;
7746 if j > i {
7747 sa[i..j].fill(0);
7748 }
7749 let dst = i - l;
7750 m -= l as SaSint;
7751 sa.copy_within(m as usize..m as usize + l, dst);
7752 j = dst;
7753 }
7754 c -= 1;
7755 }
7756 sa[..j].fill(0);
7757}
7758
7759#[allow(dead_code)]
7760fn place_lms_suffixes_histogram_32s_2k(
7761 sa: &mut [SaSint],
7762 n: SaSint,
7763 k: SaSint,
7764 mut m: SaSint,
7765 buckets: &[SaSint],
7766) {
7767 let mut j = n as usize;
7768 if k > 1 {
7769 let mut c = buckets_index2(k as usize - 2, 0) as isize;
7770 while c >= buckets_index2(0, 0) as isize {
7771 let cu = c as usize;
7772 let l = buckets[cu + buckets_index2(0, 1)] as usize;
7773 if l > 0 {
7774 let i = buckets[cu] as usize;
7775 if j > i {
7776 sa[i..j].fill(0);
7777 }
7778 let dst = i - l;
7779 m -= l as SaSint;
7780 sa.copy_within(m as usize..m as usize + l, dst);
7781 j = dst;
7782 }
7783 c -= buckets_index2(1, 0) as isize;
7784 }
7785 }
7786 sa[..j].fill(0);
7787}
7788
7789#[allow(dead_code)]
7790fn final_bwt_scan_left_to_right_16u_block_prepare(
7791 t: &[u16],
7792 sa: &mut [SaSint],
7793 k: SaSint,
7794 buckets: &mut [SaSint],
7795 cache: &mut [ThreadCache],
7796 omp_block_start: SaSint,
7797 omp_block_size: SaSint,
7798) -> SaSint {
7799 buckets[..k as usize].fill(0);
7800 let mut count = 0usize;
7801 for i in omp_block_start as usize..(omp_block_start + omp_block_size) as usize {
7802 let mut p = sa[i];
7803 sa[i] = p & SAINT_MAX;
7804 if p > 0 {
7805 p -= 1;
7806 let c = t[p as usize] as usize;
7807 sa[i] = c as SaSint | SAINT_MIN;
7808 buckets[c] += 1;
7809 cache[count].symbol = c as SaSint;
7810 cache[count].index = p
7811 | ((usize::from(t[(p - SaSint::from(p > 0)) as usize] < t[p as usize]) as SaSint)
7812 << (SAINT_BIT - 1));
7813 count += 1;
7814 }
7815 }
7816 count as SaSint
7817}
7818
7819#[allow(dead_code)]
7820fn final_sorting_scan_left_to_right_16u_block_prepare(
7821 t: &[u16],
7822 sa: &mut [SaSint],
7823 k: SaSint,
7824 buckets: &mut [SaSint],
7825 cache: &mut [ThreadCache],
7826 omp_block_start: SaSint,
7827 omp_block_size: SaSint,
7828) -> SaSint {
7829 buckets[..k as usize].fill(0);
7830 let mut count = 0usize;
7831 for i in omp_block_start as usize..(omp_block_start + omp_block_size) as usize {
7832 let mut p = sa[i];
7833 sa[i] = p ^ SAINT_MIN;
7834 if p > 0 {
7835 p -= 1;
7836 let c = t[p as usize] as usize;
7837 buckets[c] += 1;
7838 cache[count].symbol = c as SaSint;
7839 cache[count].index = p
7840 | ((usize::from(t[(p - SaSint::from(p > 0)) as usize] < t[p as usize]) as SaSint)
7841 << (SAINT_BIT - 1));
7842 count += 1;
7843 }
7844 }
7845 count as SaSint
7846}
7847
7848#[allow(dead_code)]
7849fn final_order_scan_left_to_right_16u_block_place(
7850 sa: &mut [SaSint],
7851 buckets: &mut [SaSint],
7852 cache: &[ThreadCache],
7853 count: SaSint,
7854) {
7855 for entry in cache.iter().take(count as usize) {
7856 let c = entry.symbol as usize;
7857 let dst = buckets[c] as usize;
7858 sa[dst] = entry.index;
7859 buckets[c] += 1;
7860 }
7861}
7862
7863#[allow(dead_code)]
7864fn final_bwt_aux_scan_left_to_right_16u_block_place(
7865 sa: &mut [SaSint],
7866 rm: SaSint,
7867 i_sample: &mut [SaSint],
7868 buckets: &mut [SaSint],
7869 cache: &[ThreadCache],
7870 count: SaSint,
7871) {
7872 for entry in cache.iter().take(count as usize) {
7873 let c = entry.symbol as usize;
7874 let dst = buckets[c] as usize;
7875 sa[dst] = entry.index;
7876 buckets[c] += 1;
7877 let p = entry.index & SAINT_MAX;
7878 if (p & rm) == 0 {
7879 i_sample[(p / (rm + 1)) as usize] = buckets[c];
7880 }
7881 }
7882}
7883
7884#[allow(dead_code)]
7885fn final_bwt_scan_left_to_right_16u_block_omp(
7886 t: &[u16],
7887 sa: &mut [SaSint],
7888 k: SaSint,
7889 induction_bucket: &mut [SaSint],
7890 block_start: SaSint,
7891 block_size: SaSint,
7892 threads: SaSint,
7893 thread_state: &mut [ThreadState],
7894) {
7895 let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
7896 usize::try_from(threads)
7897 .expect("threads must be non-negative")
7898 .min(thread_state.len())
7899 } else {
7900 1
7901 };
7902 if thread_count <= 1 {
7903 final_bwt_scan_left_to_right_16u(t, sa, induction_bucket, block_start, block_size);
7904 return;
7905 }
7906
7907 let k_usize = usize::try_from(k).expect("k must be non-negative");
7908 let block_stride = (block_size / thread_count as SaSint) & !15;
7909
7910 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
7911 let local_start = thread as SaSint * block_stride;
7912 let local_size = if thread + 1 < thread_count {
7913 block_stride
7914 } else {
7915 block_size - local_start
7916 };
7917 state.count = final_bwt_scan_left_to_right_16u_block_prepare(
7918 t,
7919 sa,
7920 k,
7921 &mut state.buckets[..k_usize],
7922 &mut state.cache,
7923 block_start + local_start,
7924 local_size,
7925 );
7926 }
7927
7928 for state in thread_state.iter_mut().take(thread_count) {
7929 for c in 0..k_usize {
7930 let a = induction_bucket[c];
7931 let b = state.buckets[c];
7932 induction_bucket[c] = a + b;
7933 state.buckets[c] = a;
7934 }
7935 }
7936
7937 for state in thread_state.iter_mut().take(thread_count) {
7938 final_order_scan_left_to_right_16u_block_place(
7939 sa,
7940 &mut state.buckets[..k_usize],
7941 &state.cache,
7942 state.count,
7943 );
7944 }
7945}
7946
7947#[allow(dead_code)]
7948fn final_bwt_aux_scan_left_to_right_16u_block_omp(
7949 t: &[u16],
7950 sa: &mut [SaSint],
7951 k: SaSint,
7952 rm: SaSint,
7953 i_sample: &mut [SaSint],
7954 induction_bucket: &mut [SaSint],
7955 block_start: SaSint,
7956 block_size: SaSint,
7957 threads: SaSint,
7958 thread_state: &mut [ThreadState],
7959) {
7960 let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
7961 usize::try_from(threads)
7962 .expect("threads must be non-negative")
7963 .min(thread_state.len())
7964 } else {
7965 1
7966 };
7967 if thread_count <= 1 {
7968 final_bwt_aux_scan_left_to_right_16u(
7969 t,
7970 sa,
7971 rm,
7972 i_sample,
7973 induction_bucket,
7974 block_start,
7975 block_size,
7976 );
7977 return;
7978 }
7979
7980 let k_usize = usize::try_from(k).expect("k must be non-negative");
7981 let block_stride = (block_size / thread_count as SaSint) & !15;
7982
7983 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
7984 let local_start = thread as SaSint * block_stride;
7985 let local_size = if thread + 1 < thread_count {
7986 block_stride
7987 } else {
7988 block_size - local_start
7989 };
7990 state.count = final_bwt_scan_left_to_right_16u_block_prepare(
7991 t,
7992 sa,
7993 k,
7994 &mut state.buckets[..k_usize],
7995 &mut state.cache,
7996 block_start + local_start,
7997 local_size,
7998 );
7999 }
8000
8001 for state in thread_state.iter_mut().take(thread_count) {
8002 for c in 0..k_usize {
8003 let a = induction_bucket[c];
8004 let b = state.buckets[c];
8005 induction_bucket[c] = a + b;
8006 state.buckets[c] = a;
8007 }
8008 }
8009
8010 for state in thread_state.iter_mut().take(thread_count) {
8011 final_bwt_aux_scan_left_to_right_16u_block_place(
8012 sa,
8013 rm,
8014 i_sample,
8015 &mut state.buckets[..k_usize],
8016 &state.cache,
8017 state.count,
8018 );
8019 }
8020}
8021
8022#[allow(dead_code)]
8023fn final_sorting_scan_left_to_right_16u_block_omp(
8024 t: &[u16],
8025 sa: &mut [SaSint],
8026 k: SaSint,
8027 induction_bucket: &mut [SaSint],
8028 block_start: SaSint,
8029 block_size: SaSint,
8030 threads: SaSint,
8031 thread_state: &mut [ThreadState],
8032) {
8033 let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
8034 usize::try_from(threads)
8035 .expect("threads must be non-negative")
8036 .min(thread_state.len())
8037 } else {
8038 1
8039 };
8040 if thread_count <= 1 {
8041 final_sorting_scan_left_to_right_16u(t, sa, induction_bucket, block_start, block_size);
8042 return;
8043 }
8044
8045 let k_usize = usize::try_from(k).expect("k must be non-negative");
8046 let block_stride = (block_size / thread_count as SaSint) & !15;
8047
8048 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
8049 let local_start = thread as SaSint * block_stride;
8050 let local_size = if thread + 1 < thread_count {
8051 block_stride
8052 } else {
8053 block_size - local_start
8054 };
8055 state.count = final_sorting_scan_left_to_right_16u_block_prepare(
8056 t,
8057 sa,
8058 k,
8059 &mut state.buckets[..k_usize],
8060 &mut state.cache,
8061 block_start + local_start,
8062 local_size,
8063 );
8064 }
8065
8066 for state in thread_state.iter_mut().take(thread_count) {
8067 for c in 0..k_usize {
8068 let a = induction_bucket[c];
8069 let b = state.buckets[c];
8070 induction_bucket[c] = a + b;
8071 state.buckets[c] = a;
8072 }
8073 }
8074
8075 for state in thread_state.iter_mut().take(thread_count) {
8076 final_order_scan_left_to_right_16u_block_place(
8077 sa,
8078 &mut state.buckets[..k_usize],
8079 &state.cache,
8080 state.count,
8081 );
8082 }
8083}
8084
8085#[allow(dead_code)]
8086fn final_bwt_scan_left_to_right_16u_omp(
8087 t: &[u16],
8088 sa: &mut [SaSint],
8089 n: SaSint,
8090 k: SaSint,
8091 induction_bucket: &mut [SaSint],
8092 threads: SaSint,
8093) {
8094 let c = t[(n - 1) as usize] as usize;
8095 let dst = induction_bucket[c] as usize;
8096 induction_bucket[c] += 1;
8097 let mark = if t[(n - 2) as usize] < t[(n - 1) as usize] {
8098 SAINT_MIN
8099 } else {
8100 0
8101 };
8102 sa[dst] = (n - 1) | mark;
8103
8104 if threads == 1 || n < 65536 {
8105 final_bwt_scan_left_to_right_16u(t, sa, induction_bucket, 0, n);
8106 } else {
8107 let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
8108 let mut block_start = 0;
8109 while block_start < n {
8110 if sa[block_start as usize] == 0 {
8111 block_start += 1;
8112 } else {
8113 let mut block_end =
8114 block_start + threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
8115 if block_end > n {
8116 block_end = n;
8117 }
8118 let mut block_scan_end = block_start + 1;
8119 while block_scan_end < block_end && sa[block_scan_end as usize] != 0 {
8120 block_scan_end += 1;
8121 }
8122 let block_size = block_scan_end - block_start;
8123 if block_size < 32 {
8124 while block_start < block_scan_end {
8125 let mut p = sa[block_start as usize];
8126 sa[block_start as usize] = p & SAINT_MAX;
8127 if p > 0 {
8128 p -= 1;
8129 let c = t[p as usize] as usize;
8130 sa[block_start as usize] = c as SaSint | SAINT_MIN;
8131 let dst = induction_bucket[c] as usize;
8132 induction_bucket[c] += 1;
8133 let mark = if t[(p - SaSint::from(p > 0)) as usize] < t[p as usize] {
8134 SAINT_MIN
8135 } else {
8136 0
8137 };
8138 sa[dst] = p | mark;
8139 }
8140 block_start += 1;
8141 }
8142 } else {
8143 final_bwt_scan_left_to_right_16u_block_omp(
8144 t,
8145 sa,
8146 k,
8147 induction_bucket,
8148 block_start,
8149 block_size,
8150 threads,
8151 &mut thread_state,
8152 );
8153 block_start = block_scan_end;
8154 }
8155 }
8156 }
8157 }
8158}
8159
8160#[allow(dead_code)]
8161fn final_bwt_aux_scan_left_to_right_16u_omp(
8162 t: &[u16],
8163 sa: &mut [SaSint],
8164 n: SaSint,
8165 k: SaSint,
8166 rm: SaSint,
8167 i_sample: &mut [SaSint],
8168 induction_bucket: &mut [SaSint],
8169 threads: SaSint,
8170) {
8171 let c = t[(n - 1) as usize] as usize;
8172 let dst = induction_bucket[c] as usize;
8173 induction_bucket[c] += 1;
8174 let mark = if t[(n - 2) as usize] < t[(n - 1) as usize] {
8175 SAINT_MIN
8176 } else {
8177 0
8178 };
8179 sa[dst] = (n - 1) | mark;
8180
8181 if ((n - 1) & rm) == 0 {
8182 i_sample[((n - 1) / (rm + 1)) as usize] = induction_bucket[c];
8183 }
8184
8185 if threads == 1 || n < 65536 {
8186 final_bwt_aux_scan_left_to_right_16u(t, sa, rm, i_sample, induction_bucket, 0, n);
8187 } else {
8188 let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
8189 let mut block_start = 0;
8190 while block_start < n {
8191 if sa[block_start as usize] == 0 {
8192 block_start += 1;
8193 } else {
8194 let mut block_end =
8195 block_start + threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
8196 if block_end > n {
8197 block_end = n;
8198 }
8199 let mut block_scan_end = block_start + 1;
8200 while block_scan_end < block_end && sa[block_scan_end as usize] != 0 {
8201 block_scan_end += 1;
8202 }
8203 let block_size = block_scan_end - block_start;
8204 if block_size < 32 {
8205 while block_start < block_scan_end {
8206 let mut p = sa[block_start as usize];
8207 sa[block_start as usize] = p & SAINT_MAX;
8208 if p > 0 {
8209 p -= 1;
8210 let c = t[p as usize] as usize;
8211 sa[block_start as usize] = c as SaSint | SAINT_MIN;
8212 let dst = induction_bucket[c] as usize;
8213 induction_bucket[c] += 1;
8214 let mark = if t[(p - SaSint::from(p > 0)) as usize] < t[p as usize] {
8215 SAINT_MIN
8216 } else {
8217 0
8218 };
8219 sa[dst] = p | mark;
8220 if (p & rm) == 0 {
8221 i_sample[(p / (rm + 1)) as usize] = induction_bucket[c];
8222 }
8223 }
8224 block_start += 1;
8225 }
8226 } else {
8227 final_bwt_aux_scan_left_to_right_16u_block_omp(
8228 t,
8229 sa,
8230 k,
8231 rm,
8232 i_sample,
8233 induction_bucket,
8234 block_start,
8235 block_size,
8236 threads,
8237 &mut thread_state,
8238 );
8239 block_start = block_scan_end;
8240 }
8241 }
8242 }
8243 }
8244}
8245
8246#[allow(dead_code)]
8247fn final_sorting_scan_left_to_right_16u_omp(
8248 t: &[u16],
8249 sa: &mut [SaSint],
8250 n: SaSint,
8251 k: SaSint,
8252 induction_bucket: &mut [SaSint],
8253 threads: SaSint,
8254) {
8255 let c = t[(n - 1) as usize] as usize;
8256 let dst = induction_bucket[c] as usize;
8257 induction_bucket[c] += 1;
8258 let mark = if t[(n - 2) as usize] < t[(n - 1) as usize] {
8259 SAINT_MIN
8260 } else {
8261 0
8262 };
8263 sa[dst] = (n - 1) | mark;
8264
8265 if threads == 1 || n < 65536 {
8266 final_sorting_scan_left_to_right_16u(t, sa, induction_bucket, 0, n);
8267 } else {
8268 let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
8269 let mut block_start = 0;
8270 while block_start < n {
8271 if sa[block_start as usize] == 0 {
8272 block_start += 1;
8273 } else {
8274 let mut block_end =
8275 block_start + threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
8276 if block_end > n {
8277 block_end = n;
8278 }
8279 let mut block_scan_end = block_start + 1;
8280 while block_scan_end < block_end && sa[block_scan_end as usize] != 0 {
8281 block_scan_end += 1;
8282 }
8283 let block_size = block_scan_end - block_start;
8284 if block_size < 32 {
8285 while block_start < block_scan_end {
8286 let mut p = sa[block_start as usize];
8287 sa[block_start as usize] = p ^ SAINT_MIN;
8288 if p > 0 {
8289 p -= 1;
8290 let c = t[p as usize] as usize;
8291 let dst = induction_bucket[c] as usize;
8292 induction_bucket[c] += 1;
8293 let mark = if t[(p - SaSint::from(p > 0)) as usize] < t[p as usize] {
8294 SAINT_MIN
8295 } else {
8296 0
8297 };
8298 sa[dst] = p | mark;
8299 }
8300 block_start += 1;
8301 }
8302 } else {
8303 final_sorting_scan_left_to_right_16u_block_omp(
8304 t,
8305 sa,
8306 k,
8307 induction_bucket,
8308 block_start,
8309 block_size,
8310 threads,
8311 &mut thread_state,
8312 );
8313 block_start = block_scan_end;
8314 }
8315 }
8316 }
8317 }
8318}
8319
8320#[allow(dead_code)]
8321fn final_bwt_scan_right_to_left_16u_block_prepare(
8322 t: &[u16],
8323 sa: &mut [SaSint],
8324 k: SaSint,
8325 buckets: &mut [SaSint],
8326 cache: &mut [ThreadCache],
8327 omp_block_start: SaSint,
8328 omp_block_size: SaSint,
8329) -> SaSint {
8330 buckets[..k as usize].fill(0);
8331 let mut count = 0usize;
8332 for i in (omp_block_start as usize..(omp_block_start + omp_block_size) as usize).rev() {
8333 let mut p = sa[i];
8334 sa[i] = p & SAINT_MAX;
8335 if p > 0 {
8336 p -= 1;
8337 let c0 = t[(p - SaSint::from(p > 0)) as usize];
8338 let c1 = t[p as usize];
8339 sa[i] = c1 as SaSint;
8340 buckets[c1 as usize] += 1;
8341 cache[count].symbol = c1 as SaSint;
8342 cache[count].index = if c0 <= c1 {
8343 p
8344 } else {
8345 c0 as SaSint | SAINT_MIN
8346 };
8347 count += 1;
8348 }
8349 }
8350 count as SaSint
8351}
8352
8353#[allow(dead_code)]
8354fn final_bwt_aux_scan_right_to_left_16u_block_prepare(
8355 t: &[u16],
8356 sa: &mut [SaSint],
8357 k: SaSint,
8358 buckets: &mut [SaSint],
8359 cache: &mut [ThreadCache],
8360 omp_block_start: SaSint,
8361 omp_block_size: SaSint,
8362) -> SaSint {
8363 buckets[..k as usize].fill(0);
8364 let mut count = 0usize;
8365 for i in (omp_block_start as usize..(omp_block_start + omp_block_size) as usize).rev() {
8366 let mut p = sa[i];
8367 sa[i] = p & SAINT_MAX;
8368 if p > 0 {
8369 p -= 1;
8370 let c0 = t[(p - SaSint::from(p > 0)) as usize];
8371 let c1 = t[p as usize];
8372 sa[i] = c1 as SaSint;
8373 buckets[c1 as usize] += 1;
8374 cache[count].symbol = c1 as SaSint;
8375 cache[count].index = if c0 <= c1 {
8376 p
8377 } else {
8378 c0 as SaSint | SAINT_MIN
8379 };
8380 cache[count + 1].index = p;
8381 count += 2;
8382 }
8383 }
8384 count as SaSint
8385}
8386
8387#[allow(dead_code)]
8388fn final_sorting_scan_right_to_left_16u_block_prepare(
8389 t: &[u16],
8390 sa: &mut [SaSint],
8391 k: SaSint,
8392 buckets: &mut [SaSint],
8393 cache: &mut [ThreadCache],
8394 omp_block_start: SaSint,
8395 omp_block_size: SaSint,
8396) -> SaSint {
8397 buckets[..k as usize].fill(0);
8398 let mut count = 0usize;
8399 for i in (omp_block_start as usize..(omp_block_start + omp_block_size) as usize).rev() {
8400 let mut p = sa[i];
8401 sa[i] = p & SAINT_MAX;
8402 if p > 0 {
8403 p -= 1;
8404 let c = t[p as usize] as usize;
8405 buckets[c] += 1;
8406 cache[count].symbol = c as SaSint;
8407 cache[count].index = p
8408 | ((usize::from(t[(p - SaSint::from(p > 0)) as usize] > t[p as usize]) as SaSint)
8409 << (SAINT_BIT - 1));
8410 count += 1;
8411 }
8412 }
8413 count as SaSint
8414}
8415
8416#[allow(dead_code)]
8417fn final_order_scan_right_to_left_16u_block_place(
8418 sa: &mut [SaSint],
8419 buckets: &mut [SaSint],
8420 cache: &[ThreadCache],
8421 count: SaSint,
8422) {
8423 for entry in cache.iter().take(count as usize) {
8424 let c = entry.symbol as usize;
8425 buckets[c] -= 1;
8426 sa[buckets[c] as usize] = entry.index;
8427 }
8428}
8429
8430#[allow(dead_code)]
8431fn final_gsa_scan_right_to_left_16u_block_place(
8432 sa: &mut [SaSint],
8433 buckets: &mut [SaSint],
8434 cache: &[ThreadCache],
8435 count: SaSint,
8436) {
8437 for entry in cache.iter().take(count as usize) {
8438 let c = entry.symbol as usize;
8439 if c > 0 {
8440 buckets[c] -= 1;
8441 sa[buckets[c] as usize] = entry.index;
8442 }
8443 }
8444}
8445
8446#[allow(dead_code)]
8447fn final_bwt_aux_scan_right_to_left_16u_block_place(
8448 sa: &mut [SaSint],
8449 rm: SaSint,
8450 i_sample: &mut [SaSint],
8451 buckets: &mut [SaSint],
8452 cache: &[ThreadCache],
8453 count: SaSint,
8454) {
8455 let mut i = 0usize;
8456 while i < count as usize {
8457 let c = cache[i].symbol as usize;
8458 buckets[c] -= 1;
8459 sa[buckets[c] as usize] = cache[i].index;
8460 let p = cache[i + 1].index;
8461 if (p & rm) == 0 {
8462 i_sample[(p / (rm + 1)) as usize] = buckets[c] + 1;
8463 }
8464 i += 2;
8465 }
8466}
8467
8468#[allow(dead_code)]
8469fn final_bwt_scan_right_to_left_16u_block_omp(
8470 t: &[u16],
8471 sa: &mut [SaSint],
8472 k: SaSint,
8473 induction_bucket: &mut [SaSint],
8474 block_start: SaSint,
8475 block_size: SaSint,
8476 threads: SaSint,
8477 thread_state: &mut [ThreadState],
8478) -> SaSint {
8479 let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
8480 usize::try_from(threads)
8481 .expect("threads must be non-negative")
8482 .min(thread_state.len())
8483 } else {
8484 1
8485 };
8486 if thread_count <= 1 {
8487 return final_bwt_scan_right_to_left_16u(t, sa, induction_bucket, block_start, block_size);
8488 }
8489
8490 let k_usize = usize::try_from(k).expect("k must be non-negative");
8491 let block_stride = (block_size / thread_count as SaSint) & !15;
8492
8493 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
8494 let local_start = thread as SaSint * block_stride;
8495 let local_size = if thread + 1 < thread_count {
8496 block_stride
8497 } else {
8498 block_size - local_start
8499 };
8500 state.count = final_bwt_scan_right_to_left_16u_block_prepare(
8501 t,
8502 sa,
8503 k,
8504 &mut state.buckets[..k_usize],
8505 &mut state.cache,
8506 block_start + local_start,
8507 local_size,
8508 );
8509 }
8510
8511 for state in thread_state.iter_mut().take(thread_count).rev() {
8512 for c in 0..k_usize {
8513 let a = induction_bucket[c];
8514 let b = state.buckets[c];
8515 induction_bucket[c] = a - b;
8516 state.buckets[c] = a;
8517 }
8518 }
8519
8520 for state in thread_state.iter_mut().take(thread_count) {
8521 final_order_scan_right_to_left_16u_block_place(
8522 sa,
8523 &mut state.buckets[..k_usize],
8524 &state.cache,
8525 state.count,
8526 );
8527 }
8528
8529 -1
8530}
8531
8532#[allow(dead_code)]
8533fn final_bwt_aux_scan_right_to_left_16u_block_omp(
8534 t: &[u16],
8535 sa: &mut [SaSint],
8536 k: SaSint,
8537 rm: SaSint,
8538 i_sample: &mut [SaSint],
8539 induction_bucket: &mut [SaSint],
8540 block_start: SaSint,
8541 block_size: SaSint,
8542 threads: SaSint,
8543 thread_state: &mut [ThreadState],
8544) {
8545 let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
8546 usize::try_from(threads)
8547 .expect("threads must be non-negative")
8548 .min(thread_state.len())
8549 } else {
8550 1
8551 };
8552 if thread_count <= 1 {
8553 final_bwt_aux_scan_right_to_left_16u(
8554 t,
8555 sa,
8556 rm,
8557 i_sample,
8558 induction_bucket,
8559 block_start,
8560 block_size,
8561 );
8562 return;
8563 }
8564
8565 let k_usize = usize::try_from(k).expect("k must be non-negative");
8566 let block_stride = (block_size / thread_count as SaSint) & !15;
8567
8568 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
8569 let local_start = thread as SaSint * block_stride;
8570 let local_size = if thread + 1 < thread_count {
8571 block_stride
8572 } else {
8573 block_size - local_start
8574 };
8575 state.count = final_bwt_aux_scan_right_to_left_16u_block_prepare(
8576 t,
8577 sa,
8578 k,
8579 &mut state.buckets[..k_usize],
8580 &mut state.cache,
8581 block_start + local_start,
8582 local_size,
8583 );
8584 }
8585
8586 for state in thread_state.iter_mut().take(thread_count).rev() {
8587 for c in 0..k_usize {
8588 let a = induction_bucket[c];
8589 let b = state.buckets[c];
8590 induction_bucket[c] = a - b;
8591 state.buckets[c] = a;
8592 }
8593 }
8594
8595 for state in thread_state.iter_mut().take(thread_count) {
8596 final_bwt_aux_scan_right_to_left_16u_block_place(
8597 sa,
8598 rm,
8599 i_sample,
8600 &mut state.buckets[..k_usize],
8601 &state.cache,
8602 state.count,
8603 );
8604 }
8605}
8606
8607#[allow(dead_code)]
8608fn final_sorting_scan_right_to_left_16u_block_omp(
8609 t: &[u16],
8610 sa: &mut [SaSint],
8611 k: SaSint,
8612 induction_bucket: &mut [SaSint],
8613 block_start: SaSint,
8614 block_size: SaSint,
8615 threads: SaSint,
8616 thread_state: &mut [ThreadState],
8617) {
8618 let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
8619 usize::try_from(threads)
8620 .expect("threads must be non-negative")
8621 .min(thread_state.len())
8622 } else {
8623 1
8624 };
8625 if thread_count <= 1 {
8626 final_sorting_scan_right_to_left_16u(t, sa, induction_bucket, block_start, block_size);
8627 return;
8628 }
8629
8630 let k_usize = usize::try_from(k).expect("k must be non-negative");
8631 let block_stride = (block_size / thread_count as SaSint) & !15;
8632
8633 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
8634 let local_start = thread as SaSint * block_stride;
8635 let local_size = if thread + 1 < thread_count {
8636 block_stride
8637 } else {
8638 block_size - local_start
8639 };
8640 state.count = final_sorting_scan_right_to_left_16u_block_prepare(
8641 t,
8642 sa,
8643 k,
8644 &mut state.buckets[..k_usize],
8645 &mut state.cache,
8646 block_start + local_start,
8647 local_size,
8648 );
8649 }
8650
8651 for state in thread_state.iter_mut().take(thread_count).rev() {
8652 for c in 0..k_usize {
8653 let a = induction_bucket[c];
8654 let b = state.buckets[c];
8655 induction_bucket[c] = a - b;
8656 state.buckets[c] = a;
8657 }
8658 }
8659
8660 for state in thread_state.iter_mut().take(thread_count) {
8661 final_order_scan_right_to_left_16u_block_place(
8662 sa,
8663 &mut state.buckets[..k_usize],
8664 &state.cache,
8665 state.count,
8666 );
8667 }
8668}
8669
8670#[allow(dead_code)]
8671fn final_gsa_scan_right_to_left_16u_block_omp(
8672 t: &[u16],
8673 sa: &mut [SaSint],
8674 k: SaSint,
8675 induction_bucket: &mut [SaSint],
8676 block_start: SaSint,
8677 block_size: SaSint,
8678 threads: SaSint,
8679 thread_state: &mut [ThreadState],
8680) {
8681 let thread_count = if threads > 1 && block_size >= 64 * k.max(256) {
8682 usize::try_from(threads)
8683 .expect("threads must be non-negative")
8684 .min(thread_state.len())
8685 } else {
8686 1
8687 };
8688 if thread_count <= 1 {
8689 final_gsa_scan_right_to_left_16u(t, sa, induction_bucket, block_start, block_size);
8690 return;
8691 }
8692
8693 let k_usize = usize::try_from(k).expect("k must be non-negative");
8694 let block_stride = (block_size / thread_count as SaSint) & !15;
8695
8696 for (thread, state) in thread_state.iter_mut().take(thread_count).enumerate() {
8697 let local_start = thread as SaSint * block_stride;
8698 let local_size = if thread + 1 < thread_count {
8699 block_stride
8700 } else {
8701 block_size - local_start
8702 };
8703 state.count = final_sorting_scan_right_to_left_16u_block_prepare(
8704 t,
8705 sa,
8706 k,
8707 &mut state.buckets[..k_usize],
8708 &mut state.cache,
8709 block_start + local_start,
8710 local_size,
8711 );
8712 }
8713
8714 for state in thread_state.iter_mut().take(thread_count).rev() {
8715 for c in 0..k_usize {
8716 let a = induction_bucket[c];
8717 let b = state.buckets[c];
8718 induction_bucket[c] = a - b;
8719 state.buckets[c] = a;
8720 }
8721 }
8722
8723 for state in thread_state.iter_mut().take(thread_count) {
8724 final_gsa_scan_right_to_left_16u_block_place(
8725 sa,
8726 &mut state.buckets[..k_usize],
8727 &state.cache,
8728 state.count,
8729 );
8730 }
8731}
8732
8733#[allow(dead_code)]
8734fn final_bwt_scan_right_to_left_16u_omp(
8735 t: &[u16],
8736 sa: &mut [SaSint],
8737 n: SaSint,
8738 k: SaSint,
8739 induction_bucket: &mut [SaSint],
8740 threads: SaSint,
8741) -> SaSint {
8742 let mut index = -1;
8743
8744 if threads == 1 || n < 65536 {
8745 index = final_bwt_scan_right_to_left_16u(t, sa, induction_bucket, 0, n);
8746 } else {
8747 let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
8748 let mut block_start = n - 1;
8749 while block_start >= 0 {
8750 if sa[block_start as usize] == 0 {
8751 index = block_start;
8752 block_start -= 1;
8753 } else {
8754 let mut block_max_end =
8755 block_start - threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
8756 if block_max_end < 0 {
8757 block_max_end = -1;
8758 }
8759 let mut block_end = block_start - 1;
8760 while block_end > block_max_end && sa[block_end as usize] != 0 {
8761 block_end -= 1;
8762 }
8763 let block_size = block_start - block_end;
8764 if block_size < 32 {
8765 while block_start > block_end {
8766 let mut p = sa[block_start as usize];
8767 sa[block_start as usize] = p & SAINT_MAX;
8768 if p > 0 {
8769 p -= 1;
8770 let c0 = t[(p - SaSint::from(p > 0)) as usize];
8771 let c1 = t[p as usize] as usize;
8772 sa[block_start as usize] = c1 as SaSint;
8773 induction_bucket[c1] -= 1;
8774 sa[induction_bucket[c1] as usize] = if c0 <= c1 as u16 {
8775 p
8776 } else {
8777 c0 as SaSint | SAINT_MIN
8778 };
8779 }
8780 block_start -= 1;
8781 }
8782 } else {
8783 final_bwt_scan_right_to_left_16u_block_omp(
8784 t,
8785 sa,
8786 k,
8787 induction_bucket,
8788 block_end + 1,
8789 block_size,
8790 threads,
8791 &mut thread_state,
8792 );
8793 block_start = block_end;
8794 }
8795 }
8796 }
8797 }
8798 index
8799}
8800
8801#[allow(dead_code)]
8802fn final_bwt_aux_scan_right_to_left_16u_omp(
8803 t: &[u16],
8804 sa: &mut [SaSint],
8805 n: SaSint,
8806 k: SaSint,
8807 rm: SaSint,
8808 i_sample: &mut [SaSint],
8809 induction_bucket: &mut [SaSint],
8810 threads: SaSint,
8811) {
8812 if threads == 1 || n < 65536 {
8813 final_bwt_aux_scan_right_to_left_16u(t, sa, rm, i_sample, induction_bucket, 0, n);
8814 } else {
8815 let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
8816 let mut block_start = n - 1;
8817 while block_start >= 0 {
8818 if sa[block_start as usize] == 0 {
8819 block_start -= 1;
8820 } else {
8821 let mut block_max_end =
8822 block_start - threads * ((PER_THREAD_CACHE_SIZE as SaSint - 16 * threads) / 2);
8823 if block_max_end < 0 {
8824 block_max_end = -1;
8825 }
8826 let mut block_end = block_start - 1;
8827 while block_end > block_max_end && sa[block_end as usize] != 0 {
8828 block_end -= 1;
8829 }
8830 let block_size = block_start - block_end;
8831 if block_size < 32 {
8832 while block_start > block_end {
8833 let mut p = sa[block_start as usize];
8834 sa[block_start as usize] = p & SAINT_MAX;
8835 if p > 0 {
8836 p -= 1;
8837 let c0 = t[(p - SaSint::from(p > 0)) as usize];
8838 let c1 = t[p as usize] as usize;
8839 sa[block_start as usize] = c1 as SaSint;
8840 induction_bucket[c1] -= 1;
8841 sa[induction_bucket[c1] as usize] = if c0 <= c1 as u16 {
8842 p
8843 } else {
8844 c0 as SaSint | SAINT_MIN
8845 };
8846 if (p & rm) == 0 {
8847 i_sample[(p / (rm + 1)) as usize] = induction_bucket[c1] + 1;
8848 }
8849 }
8850 block_start -= 1;
8851 }
8852 } else {
8853 final_bwt_aux_scan_right_to_left_16u_block_omp(
8854 t,
8855 sa,
8856 k,
8857 rm,
8858 i_sample,
8859 induction_bucket,
8860 block_end + 1,
8861 block_size,
8862 threads,
8863 &mut thread_state,
8864 );
8865 block_start = block_end;
8866 }
8867 }
8868 }
8869 }
8870}
8871
8872#[allow(dead_code)]
8873fn final_sorting_scan_right_to_left_16u_omp(
8874 t: &[u16],
8875 sa: &mut [SaSint],
8876 omp_block_start: SaSint,
8877 omp_block_size: SaSint,
8878 k: SaSint,
8879 induction_bucket: &mut [SaSint],
8880 threads: SaSint,
8881) {
8882 if threads == 1 || omp_block_size < 65536 {
8883 final_sorting_scan_right_to_left_16u(
8884 t,
8885 sa,
8886 induction_bucket,
8887 omp_block_start,
8888 omp_block_size,
8889 );
8890 } else {
8891 let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
8892 let mut block_start = omp_block_start + omp_block_size - 1;
8893 while block_start >= omp_block_start {
8894 if sa[block_start as usize] == 0 {
8895 block_start -= 1;
8896 } else {
8897 let mut block_max_end =
8898 block_start - threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
8899 if block_max_end < omp_block_start {
8900 block_max_end = omp_block_start - 1;
8901 }
8902 let mut block_end = block_start - 1;
8903 while block_end > block_max_end && sa[block_end as usize] != 0 {
8904 block_end -= 1;
8905 }
8906 let block_size = block_start - block_end;
8907 if block_size < 32 {
8908 while block_start > block_end {
8909 let mut p = sa[block_start as usize];
8910 sa[block_start as usize] = p & SAINT_MAX;
8911 if p > 0 {
8912 p -= 1;
8913 let c = t[p as usize] as usize;
8914 induction_bucket[c] -= 1;
8915 let mark = if t[(p - SaSint::from(p > 0)) as usize] > t[p as usize] {
8916 SAINT_MIN
8917 } else {
8918 0
8919 };
8920 sa[induction_bucket[c] as usize] = p | mark;
8921 }
8922 block_start -= 1;
8923 }
8924 } else {
8925 final_sorting_scan_right_to_left_16u_block_omp(
8926 t,
8927 sa,
8928 k,
8929 induction_bucket,
8930 block_end + 1,
8931 block_size,
8932 threads,
8933 &mut thread_state,
8934 );
8935 block_start = block_end;
8936 }
8937 }
8938 }
8939 }
8940}
8941
8942#[allow(dead_code)]
8943fn final_gsa_scan_right_to_left_16u_omp(
8944 t: &[u16],
8945 sa: &mut [SaSint],
8946 omp_block_start: SaSint,
8947 omp_block_size: SaSint,
8948 k: SaSint,
8949 induction_bucket: &mut [SaSint],
8950 threads: SaSint,
8951) {
8952 if threads == 1 || omp_block_size < 65536 {
8953 final_gsa_scan_right_to_left_16u(t, sa, induction_bucket, omp_block_start, omp_block_size);
8954 } else {
8955 let mut thread_state = alloc_thread_state(threads).unwrap_or_default();
8956 let mut block_start = omp_block_start + omp_block_size - 1;
8957 while block_start >= omp_block_start {
8958 if sa[block_start as usize] == 0 {
8959 block_start -= 1;
8960 } else {
8961 let mut block_max_end =
8962 block_start - threads * (PER_THREAD_CACHE_SIZE as SaSint - 16 * threads);
8963 if block_max_end < omp_block_start {
8964 block_max_end = omp_block_start - 1;
8965 }
8966 let mut block_end = block_start - 1;
8967 while block_end > block_max_end && sa[block_end as usize] != 0 {
8968 block_end -= 1;
8969 }
8970 let block_size = block_start - block_end;
8971 if block_size < 32 {
8972 while block_start > block_end {
8973 let mut p = sa[block_start as usize];
8974 sa[block_start as usize] = p & SAINT_MAX;
8975 if p > 0 && t[(p - 1) as usize] > 0 {
8976 p -= 1;
8977 let c = t[p as usize] as usize;
8978 induction_bucket[c] -= 1;
8979 let mark = if t[(p - SaSint::from(p > 0)) as usize] > t[p as usize] {
8980 SAINT_MIN
8981 } else {
8982 0
8983 };
8984 sa[induction_bucket[c] as usize] = p | mark;
8985 }
8986 block_start -= 1;
8987 }
8988 } else {
8989 final_gsa_scan_right_to_left_16u_block_omp(
8990 t,
8991 sa,
8992 k,
8993 induction_bucket,
8994 block_end + 1,
8995 block_size,
8996 threads,
8997 &mut thread_state,
8998 );
8999 block_start = block_end;
9000 }
9001 }
9002 }
9003 }
9004}
9005
9006#[allow(dead_code)]
9007fn induce_final_order_16u_omp(
9008 t: &[u16],
9009 sa: &mut [SaSint],
9010 n: SaSint,
9011 k: SaSint,
9012 flags: SaSint,
9013 r: SaSint,
9014 i_out: Option<&mut [SaSint]>,
9015 buckets: &mut [SaSint],
9016 threads: SaSint,
9017 _thread_state: &mut [ThreadState],
9018) -> SaSint {
9019 if (flags & LIBSAIS_FLAGS_BWT) == 0 {
9020 if (flags & LIBSAIS_FLAGS_GSA) != 0 {
9021 buckets[6 * ALPHABET_SIZE] = buckets[7 * ALPHABET_SIZE] - 1;
9022 }
9023
9024 let (left_buckets, right_tail) = buckets.split_at_mut(7 * ALPHABET_SIZE);
9025 let bucket_start = &mut left_buckets[6 * ALPHABET_SIZE..7 * ALPHABET_SIZE];
9026 let bucket_end = &mut right_tail[..ALPHABET_SIZE];
9027
9028 final_sorting_scan_left_to_right_16u_omp(t, sa, n, k, bucket_start, threads);
9029 if threads > 1 && n >= 65_536 {
9030 clear_lms_suffixes_omp(
9031 sa,
9032 n,
9033 ALPHABET_SIZE as SaSint,
9034 bucket_start,
9035 bucket_end,
9036 threads,
9037 );
9038 }
9039
9040 if (flags & LIBSAIS_FLAGS_GSA) != 0 {
9041 flip_suffix_markers_omp(sa, bucket_end[0], threads);
9042 final_gsa_scan_right_to_left_16u_omp(
9043 t,
9044 sa,
9045 bucket_end[0],
9046 n - bucket_end[0],
9047 k,
9048 bucket_end,
9049 threads,
9050 );
9051 } else {
9052 final_sorting_scan_right_to_left_16u_omp(t, sa, 0, n, k, bucket_end, threads);
9053 }
9054
9055 0
9056 } else if let Some(i_out) = i_out {
9057 let (left_buckets, right_tail) = buckets.split_at_mut(7 * ALPHABET_SIZE);
9058 let bucket_start = &mut left_buckets[6 * ALPHABET_SIZE..7 * ALPHABET_SIZE];
9059 let bucket_end = &mut right_tail[..ALPHABET_SIZE];
9060
9061 final_bwt_aux_scan_left_to_right_16u_omp(t, sa, n, k, r - 1, i_out, bucket_start, threads);
9062 if threads > 1 && n >= 65_536 {
9063 clear_lms_suffixes_omp(
9064 sa,
9065 n,
9066 ALPHABET_SIZE as SaSint,
9067 bucket_start,
9068 bucket_end,
9069 threads,
9070 );
9071 }
9072 final_bwt_aux_scan_right_to_left_16u_omp(t, sa, n, k, r - 1, i_out, bucket_end, threads);
9073 0
9074 } else {
9075 let (left_buckets, right_tail) = buckets.split_at_mut(7 * ALPHABET_SIZE);
9076 let bucket_start = &mut left_buckets[6 * ALPHABET_SIZE..7 * ALPHABET_SIZE];
9077 let bucket_end = &mut right_tail[..ALPHABET_SIZE];
9078
9079 final_bwt_scan_left_to_right_16u_omp(t, sa, n, k, bucket_start, threads);
9080 if threads > 1 && n >= 65_536 {
9081 clear_lms_suffixes_omp(
9082 sa,
9083 n,
9084 ALPHABET_SIZE as SaSint,
9085 bucket_start,
9086 bucket_end,
9087 threads,
9088 );
9089 }
9090 final_bwt_scan_right_to_left_16u_omp(t, sa, n, k, bucket_end, threads)
9091 }
9092}
9093
9094#[allow(dead_code)]
9095fn bwt_copy_16u(u: &mut [u16], a: &[SaSint], n: SaSint) {
9096 let mut i = 0isize;
9097 let mut j = n as isize - 7;
9098 while i < j {
9099 u[i as usize] = a[i as usize] as u16;
9100 u[(i + 1) as usize] = a[(i + 1) as usize] as u16;
9101 u[(i + 2) as usize] = a[(i + 2) as usize] as u16;
9102 u[(i + 3) as usize] = a[(i + 3) as usize] as u16;
9103 u[(i + 4) as usize] = a[(i + 4) as usize] as u16;
9104 u[(i + 5) as usize] = a[(i + 5) as usize] as u16;
9105 u[(i + 6) as usize] = a[(i + 6) as usize] as u16;
9106 u[(i + 7) as usize] = a[(i + 7) as usize] as u16;
9107 i += 8;
9108 }
9109
9110 j += 7;
9111 while i < j {
9112 u[i as usize] = a[i as usize] as u16;
9113 i += 1;
9114 }
9115}
9116
9117#[allow(dead_code)]
9118fn bwt_copy_16u_omp(u: &mut [u16], a: &[SaSint], n: SaSint, threads: SaSint) {
9119 if threads == 1 || n < 65_536 {
9120 bwt_copy_16u(u, a, n);
9121 return;
9122 }
9123
9124 let block_stride = (n / threads) & !15;
9125 for thread in 0..threads {
9126 let block_start = thread * block_stride;
9127 let block_size = if thread < threads - 1 {
9128 block_stride
9129 } else {
9130 n - block_start
9131 };
9132 let start = block_start as usize;
9133 bwt_copy_16u(&mut u[start..], &a[start..], block_size);
9134 }
9135}
9136
9137#[allow(dead_code)]
9138fn convert_32u_to_64u(s: &[u32], d: &mut [u64], block_start: usize, block_size: usize) {
9139 for i in block_start..block_start + block_size {
9140 d[i] = u64::from(s[i]);
9141 }
9142}
9143
9144#[allow(dead_code)]
9145fn convert_inplace_32u_to_64u(v: &mut [u32], block_start: usize, block_size: usize) {
9146 for i in (block_start..block_start + block_size).rev() {
9147 v[i + i] = v[i];
9148 v[i + i + 1] = 0;
9149 }
9150}
9151
9152#[allow(dead_code)]
9153fn convert_inplace_64u_to_32u(v: &mut [u32], block_start: usize, block_size: usize) {
9154 for i in block_start..block_start + block_size {
9155 v[i] = v[i + i];
9156 }
9157}
9158
9159#[allow(dead_code)]
9160fn convert_inplace_32u_to_64u_omp(v: &mut [u32], n: SaSint, threads: SaSint) {
9161 let mut n = usize::try_from(n).expect("n must be non-negative");
9162 let threads = usize::try_from(threads.max(1)).expect("threads must be non-negative");
9163
9164 while n >= 65_536 {
9165 let block_size = n >> 1;
9166 n -= block_size;
9167
9168 let omp_block_stride = (block_size / threads) & !15usize;
9169 for thread in 0..threads {
9170 let block_start = thread * omp_block_stride;
9171 let size = if thread + 1 < threads {
9172 omp_block_stride
9173 } else {
9174 block_size - block_start
9175 };
9176 convert_inplace_32u_to_64u(v, n + block_start, size);
9177 }
9178 }
9179
9180 convert_inplace_32u_to_64u(v, 0, n);
9181}
9182
9183#[allow(dead_code)]
9184fn final_bwt_ltr_step(t: &[u16], sa: &mut [SaSint], induction_bucket: &mut [SaSint], index: usize) {
9185 let mut p = sa[index];
9186 sa[index] = p & SAINT_MAX;
9187 if p > 0 {
9188 p -= 1;
9189 let c = t[p as usize] as usize;
9190 sa[index] = t[p as usize] as SaSint | SAINT_MIN;
9191 let mark = if t[(p - SaSint::from(p > 0)) as usize] < t[p as usize] {
9192 SAINT_MIN
9193 } else {
9194 0
9195 };
9196 let dst = induction_bucket[c] as usize;
9197 sa[dst] = p | mark;
9198 induction_bucket[c] += 1;
9199 }
9200}
9201
9202#[allow(dead_code)]
9203fn final_bwt_rtl_step(
9204 t: &[u16],
9205 sa: &mut [SaSint],
9206 induction_bucket: &mut [SaSint],
9207 index: usize,
9208 primary_index: &mut SaSint,
9209) {
9210 let mut p = sa[index];
9211 if p == 0 {
9212 *primary_index = index as SaSint;
9213 }
9214 sa[index] = p & SAINT_MAX;
9215 if p > 0 {
9216 p -= 1;
9217 let c0 = t[(p - SaSint::from(p > 0)) as usize];
9218 let c1 = t[p as usize];
9219 sa[index] = c1 as SaSint;
9220 let induced = if c0 <= c1 {
9221 p
9222 } else {
9223 c0 as SaSint | SAINT_MIN
9224 };
9225 induction_bucket[c1 as usize] -= 1;
9226 sa[induction_bucket[c1 as usize] as usize] = induced;
9227 }
9228}
9229
9230#[allow(dead_code)]
9231fn final_bwt_aux_ltr_step(
9232 t: &[u16],
9233 sa: &mut [SaSint],
9234 rm: SaSint,
9235 i_sample: &mut [SaSint],
9236 induction_bucket: &mut [SaSint],
9237 index: usize,
9238) {
9239 let mut p = sa[index];
9240 sa[index] = p & SAINT_MAX;
9241 if p > 0 {
9242 p -= 1;
9243 let c = t[p as usize] as usize;
9244 sa[index] = t[p as usize] as SaSint | SAINT_MIN;
9245 let mark = if t[(p - SaSint::from(p > 0)) as usize] < t[p as usize] {
9246 SAINT_MIN
9247 } else {
9248 0
9249 };
9250 let dst = induction_bucket[c] as usize;
9251 sa[dst] = p | mark;
9252 induction_bucket[c] += 1;
9253 if (p & rm) == 0 {
9254 i_sample[(p / (rm + 1)) as usize] = induction_bucket[c];
9255 }
9256 }
9257}
9258
9259#[allow(dead_code)]
9260fn final_bwt_aux_rtl_step(
9261 t: &[u16],
9262 sa: &mut [SaSint],
9263 rm: SaSint,
9264 i_sample: &mut [SaSint],
9265 induction_bucket: &mut [SaSint],
9266 index: usize,
9267) {
9268 let mut p = sa[index];
9269 sa[index] = p & SAINT_MAX;
9270 if p > 0 {
9271 p -= 1;
9272 let c0 = t[(p - SaSint::from(p > 0)) as usize];
9273 let c1 = t[p as usize];
9274 sa[index] = c1 as SaSint;
9275 let induced = if c0 <= c1 {
9276 p
9277 } else {
9278 c0 as SaSint | SAINT_MIN
9279 };
9280 induction_bucket[c1 as usize] -= 1;
9281 sa[induction_bucket[c1 as usize] as usize] = induced;
9282 if (p & rm) == 0 {
9283 i_sample[(p / (rm + 1)) as usize] = induction_bucket[c1 as usize] + 1;
9284 }
9285 }
9286}
9287
9288#[allow(dead_code)]
9289fn main_32s_recursion(
9290 t_ptr: *mut SaSint,
9291 sa_ptr: *mut SaSint,
9292 sa_capacity: usize,
9293 n: SaSint,
9294 k: SaSint,
9295 fs: SaSint,
9296 threads: SaSint,
9297 thread_state: &mut [ThreadState],
9298 local_buffer: &mut [SaSint],
9299) -> SaSint {
9300 let fs = fs.min(SAINT_MAX - n);
9301 let local_buffer_size = SaSint::try_from(LIBSAIS_LOCAL_BUFFER_SIZE).expect("fits");
9302 let n_usize = usize::try_from(n).expect("n must be non-negative");
9303 let fs_usize = usize::try_from(fs).expect("fs must be non-negative");
9304 let total_len = n_usize + fs_usize;
9305 assert!(total_len <= sa_capacity);
9306
9307 if n <= i32::MAX as SaSint && k > 0 {
9308 let doubled_space = i128::from(fs) + i128::from(fs) + i128::from(n) + i128::from(n);
9309 let new_fs = if doubled_space <= i128::from(i32::MAX) {
9310 fs + fs + n
9311 } else {
9312 i32::MAX as SaSint - n
9313 };
9314
9315 if (new_fs / k >= 6)
9316 || (new_fs / k >= 4 && n <= (i32::MAX as SaSint) / 2)
9317 || (new_fs / k < 4 && new_fs >= fs)
9318 {
9319 let t = unsafe { std::slice::from_raw_parts_mut(t_ptr, n_usize) };
9320 let mut t32 = Vec::with_capacity(n_usize);
9321 for &value in t.iter() {
9322 let Ok(value) = i32::try_from(value) else {
9323 break;
9324 };
9325 t32.push(value);
9326 }
9327
9328 if t32.len() == n_usize {
9329 let mut sa32 = vec![0_i32; n_usize + usize::try_from(new_fs).expect("fits")];
9330 let index = crate::libsais16::libsais16_int_omp(
9331 &mut t32,
9332 &mut sa32,
9333 k as i32,
9334 new_fs as i32,
9335 threads as i32,
9336 ) as SaSint;
9337
9338 if index >= 0 {
9339 let sa = unsafe { std::slice::from_raw_parts_mut(sa_ptr, n_usize) };
9340 for (dst, src) in sa.iter_mut().zip(sa32.iter()) {
9341 *dst = SaSint::from(*src);
9342 }
9343 }
9344
9345 return index;
9346 }
9347 }
9348 }
9349
9350 if k > 0 && ((fs / k) >= 6 || (local_buffer_size / k) >= 6) {
9351 let k_usize = usize::try_from(k).expect("k must be non-negative");
9352 let alignment = if fs >= 1024 && ((fs - 1024) / k) >= 6 {
9353 1024usize
9354 } else {
9355 16usize
9356 };
9357 let need = 6 * k_usize;
9358 let use_local_buffer = local_buffer_size > fs;
9359 let buckets_ptr = if use_local_buffer {
9360 local_buffer.as_mut_ptr()
9361 } else {
9362 unsafe {
9363 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9364 let start =
9365 if fs_usize >= need + alignment && ((fs_usize - alignment) / k_usize) >= 6 {
9366 let byte_ptr = sa[total_len - need - alignment..].as_mut_ptr() as usize;
9367 let aligned = align_up(byte_ptr, alignment * mem::size_of::<SaSint>());
9368 (aligned - sa_ptr as usize) / mem::size_of::<SaSint>()
9369 } else {
9370 total_len - need
9371 };
9372 sa[start..].as_mut_ptr()
9373 }
9374 };
9375
9376 let m = unsafe {
9377 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9378 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9379 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9380 count_and_gather_lms_suffixes_32s_4k_omp(
9381 t,
9382 sa,
9383 n,
9384 k,
9385 buckets,
9386 SaSint::from(use_local_buffer),
9387 threads,
9388 thread_state,
9389 )
9390 };
9391 if m > 1 {
9392 let m_usize = usize::try_from(m).expect("m must be non-negative");
9393 unsafe {
9394 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9395 sa[..n_usize - m_usize].fill(0);
9396 }
9397
9398 let first_lms_suffix = unsafe {
9399 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9400 sa[n_usize - m_usize]
9401 };
9402 let left_suffixes_count = unsafe {
9403 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9404 initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(
9405 std::slice::from_raw_parts(t_ptr, n_usize),
9406 k,
9407 buckets,
9408 first_lms_suffix,
9409 )
9410 };
9411
9412 unsafe {
9413 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9414 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9415 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9416 let (_, induction_bucket) = buckets.split_at_mut(4 * k_usize);
9417 radix_sort_lms_suffixes_32s_6k_omp(t, sa, n, m, induction_bucket, threads);
9418 if (n / 8192) < k {
9419 radix_sort_set_markers_32s_6k_omp(sa, k, induction_bucket, threads);
9420 }
9421 if threads > 1 && n >= 65_536 {
9422 sa[n_usize - m_usize..n_usize].fill(0);
9423 }
9424 initialize_buckets_for_partial_sorting_32s_6k(
9425 t,
9426 k,
9427 buckets,
9428 first_lms_suffix,
9429 left_suffixes_count,
9430 );
9431 induce_partial_order_32s_6k_omp(
9432 t,
9433 sa,
9434 n,
9435 k,
9436 buckets,
9437 first_lms_suffix,
9438 left_suffixes_count,
9439 threads,
9440 thread_state,
9441 );
9442 }
9443
9444 let names = unsafe {
9445 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9446 if (n / 8192) < k {
9447 renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(
9448 sa,
9449 n,
9450 m,
9451 threads,
9452 thread_state,
9453 )
9454 } else {
9455 renumber_and_gather_lms_suffixes_omp(sa, n, m, fs, threads, thread_state)
9456 }
9457 };
9458
9459 if names < m {
9460 let f = if (n / 8192) < k {
9461 unsafe {
9462 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9463 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9464 compact_lms_suffixes_32s_omp(t, sa, n, m, fs, threads)
9465 }
9466 } else {
9467 0
9468 };
9469
9470 let new_t_start =
9471 total_len - usize::try_from(m - f).expect("m - f must be non-negative");
9472 if main_32s_recursion(
9473 unsafe {
9474 std::slice::from_raw_parts_mut(sa_ptr, total_len)[new_t_start..]
9475 .as_mut_ptr()
9476 },
9477 sa_ptr,
9478 sa_capacity,
9479 m - f,
9480 names - f,
9481 fs + n - 2 * m + f,
9482 threads,
9483 thread_state,
9484 local_buffer,
9485 ) != 0
9486 {
9487 return -2;
9488 }
9489
9490 unsafe {
9491 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9492 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9493 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9494 reconstruct_compacted_lms_suffixes_32s_2k_omp(
9495 t,
9496 sa,
9497 n,
9498 k,
9499 m,
9500 fs,
9501 f,
9502 buckets,
9503 SaSint::from(use_local_buffer),
9504 threads,
9505 thread_state,
9506 );
9507 }
9508 } else {
9509 unsafe {
9510 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9511 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9512 count_lms_suffixes_32s_2k(t, n, k, buckets);
9513 }
9514 }
9515
9516 unsafe {
9517 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9518 initialize_buckets_start_and_end_32s_4k(k, buckets);
9519 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9520 place_lms_suffixes_histogram_32s_4k(sa, n, k, m, buckets);
9521 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9522 induce_final_order_32s_4k(t, sa, n, k, buckets, threads, thread_state);
9523 }
9524 } else {
9525 unsafe {
9526 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9527 sa[0] = sa[n_usize - 1];
9528 }
9529
9530 unsafe {
9531 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9532 initialize_buckets_start_and_end_32s_6k(k, buckets);
9533 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9534 place_lms_suffixes_histogram_32s_6k(sa, n, k, m, buckets);
9535 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9536 induce_final_order_32s_6k(t, sa, n, k, buckets, threads, thread_state);
9537 }
9538 }
9539
9540 return 0;
9541 } else if k > 0 && n <= SAINT_MAX / 2 && ((fs / k) >= 4 || (local_buffer_size / k) >= 4) {
9542 let k_usize = usize::try_from(k).expect("k must be non-negative");
9543 let alignment = if fs >= 1024 && ((fs - 1024) / k) >= 4 {
9544 1024usize
9545 } else {
9546 16usize
9547 };
9548 let need = 4 * k_usize;
9549 let use_local_buffer = local_buffer_size > fs;
9550 let buckets_ptr = if use_local_buffer {
9551 local_buffer.as_mut_ptr()
9552 } else {
9553 unsafe {
9554 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9555 let start =
9556 if fs_usize >= need + alignment && ((fs_usize - alignment) / k_usize) >= 4 {
9557 let byte_ptr = sa[total_len - need - alignment..].as_mut_ptr() as usize;
9558 let aligned = align_up(byte_ptr, alignment * mem::size_of::<SaSint>());
9559 (aligned - sa_ptr as usize) / mem::size_of::<SaSint>()
9560 } else {
9561 total_len - need
9562 };
9563 sa[start..].as_mut_ptr()
9564 }
9565 };
9566
9567 let m = unsafe {
9568 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9569 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9570 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9571 count_and_gather_lms_suffixes_32s_2k_omp(
9572 t,
9573 sa,
9574 n,
9575 k,
9576 buckets,
9577 SaSint::from(use_local_buffer),
9578 threads,
9579 thread_state,
9580 )
9581 };
9582 if m > 1 {
9583 let m_usize = usize::try_from(m).expect("m must be non-negative");
9584 unsafe {
9585 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9586 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9587 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9588 initialize_buckets_for_radix_and_partial_sorting_32s_4k(
9589 t,
9590 k,
9591 buckets,
9592 sa[n_usize - m_usize],
9593 );
9594 let (_, induction_bucket) = buckets.split_at_mut(1);
9595 radix_sort_lms_suffixes_32s_2k_omp(t, sa, n, m, induction_bucket, threads);
9596 radix_sort_set_markers_32s_4k_omp(sa, k, induction_bucket, threads);
9597 place_lms_suffixes_interval_32s_4k(sa, n, k, m - 1, buckets);
9598 induce_partial_order_32s_4k_omp(t, sa, n, k, buckets, threads, thread_state);
9599 }
9600
9601 let names = unsafe {
9602 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9603 renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(sa, n, m, threads, thread_state)
9604 };
9605 if names < m {
9606 let f = unsafe {
9607 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9608 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9609 compact_lms_suffixes_32s_omp(t, sa, n, m, fs, threads)
9610 };
9611
9612 let new_t_start =
9613 total_len - usize::try_from(m - f).expect("m - f must be non-negative");
9614 if main_32s_recursion(
9615 unsafe {
9616 std::slice::from_raw_parts_mut(sa_ptr, total_len)[new_t_start..]
9617 .as_mut_ptr()
9618 },
9619 sa_ptr,
9620 sa_capacity,
9621 m - f,
9622 names - f,
9623 fs + n - 2 * m + f,
9624 threads,
9625 thread_state,
9626 local_buffer,
9627 ) != 0
9628 {
9629 return -2;
9630 }
9631
9632 unsafe {
9633 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9634 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9635 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9636 reconstruct_compacted_lms_suffixes_32s_2k_omp(
9637 t,
9638 sa,
9639 n,
9640 k,
9641 m,
9642 fs,
9643 f,
9644 buckets,
9645 SaSint::from(use_local_buffer),
9646 threads,
9647 thread_state,
9648 );
9649 }
9650 } else {
9651 unsafe {
9652 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9653 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9654 count_lms_suffixes_32s_2k(t, n, k, buckets);
9655 }
9656 }
9657 } else {
9658 unsafe {
9659 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9660 sa[0] = sa[n_usize - 1];
9661 }
9662 }
9663
9664 unsafe {
9665 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9666 initialize_buckets_start_and_end_32s_4k(k, buckets);
9667 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9668 place_lms_suffixes_histogram_32s_4k(sa, n, k, m, buckets);
9669 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9670 induce_final_order_32s_4k(t, sa, n, k, buckets, threads, thread_state);
9671 }
9672
9673 return 0;
9674 } else if k > 0 && ((fs / k) >= 2 || (local_buffer_size / k) >= 2) {
9675 let k_usize = usize::try_from(k).expect("k must be non-negative");
9676 let alignment = if fs >= 1024 && ((fs - 1024) / k) >= 2 {
9677 1024usize
9678 } else {
9679 16usize
9680 };
9681 let need = 2 * k_usize;
9682 let use_local_buffer = local_buffer_size > fs;
9683 let buckets_ptr = if use_local_buffer {
9684 local_buffer.as_mut_ptr()
9685 } else {
9686 unsafe {
9687 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9688 let start =
9689 if fs_usize >= need + alignment && ((fs_usize - alignment) / k_usize) >= 2 {
9690 let byte_ptr = sa[total_len - need - alignment..].as_mut_ptr() as usize;
9691 let aligned = align_up(byte_ptr, alignment * mem::size_of::<SaSint>());
9692 (aligned - sa_ptr as usize) / mem::size_of::<SaSint>()
9693 } else {
9694 total_len - need
9695 };
9696 sa[start..].as_mut_ptr()
9697 }
9698 };
9699
9700 let m = unsafe {
9701 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9702 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9703 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9704 count_and_gather_lms_suffixes_32s_2k_omp(
9705 t,
9706 sa,
9707 n,
9708 k,
9709 buckets,
9710 SaSint::from(use_local_buffer),
9711 threads,
9712 thread_state,
9713 )
9714 };
9715 if m > 1 {
9716 let m_usize = usize::try_from(m).expect("m must be non-negative");
9717 unsafe {
9718 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9719 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9720 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9721 initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(
9722 t,
9723 k,
9724 buckets,
9725 sa[n_usize - m_usize],
9726 );
9727 let (_, induction_bucket) = buckets.split_at_mut(1);
9728 radix_sort_lms_suffixes_32s_2k_omp(t, sa, n, m, induction_bucket, threads);
9729 place_lms_suffixes_interval_32s_2k(sa, n, k, m - 1, buckets);
9730 }
9731
9732 unsafe {
9733 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9734 initialize_buckets_start_and_end_32s_2k(k, buckets);
9735 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9736 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9737 induce_partial_order_32s_2k_omp(t, sa, n, k, buckets, threads, thread_state);
9738 }
9739
9740 let names = unsafe {
9741 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9742 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9743 renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(t, sa, n, m, threads)
9744 };
9745 if names < m {
9746 let f = unsafe {
9747 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9748 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9749 compact_lms_suffixes_32s_omp(t, sa, n, m, fs, threads)
9750 };
9751
9752 let new_t_start =
9753 total_len - usize::try_from(m - f).expect("m - f must be non-negative");
9754 if main_32s_recursion(
9755 unsafe {
9756 std::slice::from_raw_parts_mut(sa_ptr, total_len)[new_t_start..]
9757 .as_mut_ptr()
9758 },
9759 sa_ptr,
9760 sa_capacity,
9761 m - f,
9762 names - f,
9763 fs + n - 2 * m + f,
9764 threads,
9765 thread_state,
9766 local_buffer,
9767 ) != 0
9768 {
9769 return -2;
9770 }
9771
9772 unsafe {
9773 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9774 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9775 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9776 reconstruct_compacted_lms_suffixes_32s_2k_omp(
9777 t,
9778 sa,
9779 n,
9780 k,
9781 m,
9782 fs,
9783 f,
9784 buckets,
9785 SaSint::from(use_local_buffer),
9786 threads,
9787 thread_state,
9788 );
9789 }
9790 } else {
9791 unsafe {
9792 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9793 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9794 count_lms_suffixes_32s_2k(t, n, k, buckets);
9795 }
9796 }
9797 } else {
9798 unsafe {
9799 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9800 sa[0] = sa[n_usize - 1];
9801 }
9802 }
9803
9804 unsafe {
9805 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9806 initialize_buckets_end_32s_2k(k, buckets);
9807 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9808 place_lms_suffixes_histogram_32s_2k(sa, n, k, m, buckets);
9809 }
9810
9811 unsafe {
9812 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
9813 initialize_buckets_start_and_end_32s_2k(k, buckets);
9814 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9815 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9816 induce_final_order_32s_2k(t, sa, n, k, buckets, threads, thread_state);
9817 }
9818
9819 0
9820 } else {
9821 let k_usize = usize::try_from(k).expect("k must be non-negative");
9822 let mut heap_buckets = if fs < k { Some(vec![0; k_usize]) } else { None };
9823 let alignment = if fs >= 1024 && (fs - 1024) >= k {
9824 1024usize
9825 } else {
9826 16usize
9827 };
9828 let mut buckets_ptr = if let Some(ref mut heap) = heap_buckets {
9829 heap.as_mut_ptr()
9830 } else {
9831 unsafe {
9832 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9833 let start = if fs_usize >= k_usize + alignment {
9834 let byte_ptr = sa[total_len - k_usize - alignment..].as_mut_ptr() as usize;
9835 let aligned = align_up(byte_ptr, alignment * mem::size_of::<SaSint>());
9836 (aligned - sa_ptr as usize) / mem::size_of::<SaSint>()
9837 } else {
9838 total_len - k_usize
9839 };
9840 sa[start..].as_mut_ptr()
9841 }
9842 };
9843
9844 if buckets_ptr.is_null() {
9845 return -2;
9846 }
9847
9848 unsafe {
9849 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9850 sa[..n_usize].fill(0);
9851 }
9852
9853 unsafe {
9854 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9855 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
9856 count_suffixes_32s(t, n, k, buckets);
9857 initialize_buckets_end_32s_1k(k, buckets);
9858 }
9859
9860 let m = unsafe {
9861 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9862 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9863 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
9864 radix_sort_lms_suffixes_32s_1k(t, sa, n, buckets)
9865 };
9866 if m > 1 {
9867 unsafe {
9868 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9869 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9870 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
9871 induce_partial_order_32s_1k_omp(t, sa, n, k, buckets, threads, thread_state);
9872 }
9873
9874 let names = unsafe {
9875 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9876 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9877 renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(t, sa, n, m, threads)
9878 };
9879 if names < m {
9880 if heap_buckets.is_some() {
9881 let _ = heap_buckets.take();
9882 buckets_ptr = std::ptr::null_mut();
9883 }
9884
9885 let f = unsafe {
9886 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9887 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9888 compact_lms_suffixes_32s_omp(t, sa, n, m, fs, threads)
9889 };
9890
9891 let new_t_start =
9892 total_len - usize::try_from(m - f).expect("m - f must be non-negative");
9893 if main_32s_recursion(
9894 unsafe {
9895 std::slice::from_raw_parts_mut(sa_ptr, total_len)[new_t_start..]
9896 .as_mut_ptr()
9897 },
9898 sa_ptr,
9899 sa_capacity,
9900 m - f,
9901 names - f,
9902 fs + n - 2 * m + f,
9903 threads,
9904 thread_state,
9905 local_buffer,
9906 ) != 0
9907 {
9908 return -2;
9909 }
9910
9911 unsafe {
9912 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
9913 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9914 reconstruct_compacted_lms_suffixes_32s_1k_omp(t, sa, n, m, fs, f, threads);
9915 }
9916
9917 if buckets_ptr.is_null() {
9918 heap_buckets = Some(vec![0; k_usize]);
9919 buckets_ptr = heap_buckets.as_mut().unwrap().as_mut_ptr();
9920 if buckets_ptr.is_null() {
9921 return -2;
9922 }
9923 }
9924 }
9925
9926 unsafe {
9927 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9928 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
9929 count_suffixes_32s(t, n, k, buckets);
9930 initialize_buckets_end_32s_1k(k, buckets);
9931 }
9932 unsafe {
9933 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9934 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9935 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
9936 place_lms_suffixes_interval_32s_1k(t, sa, k, m, buckets);
9937 }
9938 }
9939
9940 unsafe {
9941 let t = std::slice::from_raw_parts(t_ptr, n_usize);
9942 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
9943 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
9944 induce_final_order_32s_1k(t, sa, n, k, buckets, threads, thread_state);
9945 }
9946
9947 0
9948 }
9949}
9950
9951#[allow(dead_code)]
9952fn main_32s_entry(
9953 t_ptr: *mut SaSint,
9954 sa: &mut [SaSint],
9955 n: SaSint,
9956 k: SaSint,
9957 fs: SaSint,
9958 threads: SaSint,
9959 thread_state: &mut [ThreadState],
9960) -> SaSint {
9961 let mut local_buffer = [0; 2 * LIBSAIS_LOCAL_BUFFER_SIZE];
9962 main_32s_recursion(
9963 t_ptr,
9964 sa.as_mut_ptr(),
9965 sa.len(),
9966 n,
9967 k,
9968 fs,
9969 threads,
9970 thread_state,
9971 &mut local_buffer[LIBSAIS_LOCAL_BUFFER_SIZE..],
9972 )
9973}
9974
9975#[allow(dead_code)]
9976fn main_16u(
9977 t: &[u16],
9978 sa: &mut [SaSint],
9979 n: SaSint,
9980 buckets: &mut [SaSint],
9981 flags: SaSint,
9982 r: SaSint,
9983 i_out: Option<&mut [SaSint]>,
9984 fs: SaSint,
9985 freq: Option<&mut [SaSint]>,
9986 threads: SaSint,
9987 thread_state: &mut [ThreadState],
9988) -> SaSint {
9989 let fs = fs.min(SAINT_MAX - n);
9990
9991 let m = count_and_gather_lms_suffixes_16u_omp(t, sa, n, buckets, threads, thread_state);
9992 let k = initialize_buckets_start_and_end_16u(buckets, freq);
9993
9994 if (flags & LIBSAIS_FLAGS_GSA) != 0 && (buckets[0] != 0 || buckets[2] != 0 || buckets[3] != 1) {
9995 return -1;
9996 }
9997
9998 if m > 0 {
9999 let first_lms_suffix = sa[(n - m) as usize];
10000 let left_suffixes_count =
10001 initialize_buckets_for_lms_suffixes_radix_sort_16u(t, buckets, first_lms_suffix);
10002
10003 if threads > 1 && n >= 65_536 {
10004 sa[..(n - m) as usize].fill(0);
10005 }
10006 radix_sort_lms_suffixes_16u_omp(t, sa, n, m, flags, buckets, threads, thread_state);
10007 if threads > 1 && n >= 65_536 {
10008 sa[(n - m) as usize..n as usize].fill(0);
10009 }
10010
10011 initialize_buckets_for_partial_sorting_16u(
10012 t,
10013 buckets,
10014 first_lms_suffix,
10015 left_suffixes_count,
10016 );
10017 induce_partial_order_16u_omp(
10018 t,
10019 sa,
10020 n,
10021 k,
10022 flags,
10023 buckets,
10024 first_lms_suffix,
10025 left_suffixes_count,
10026 threads,
10027 );
10028
10029 let names = renumber_and_gather_lms_suffixes_omp(sa, n, m, fs, threads, thread_state);
10030 if names < m {
10031 let recursive_t_start = (n + fs - m) as usize;
10032 let recursive_t_ptr = sa[recursive_t_start..].as_mut_ptr();
10033 if main_32s_entry(
10034 recursive_t_ptr,
10035 sa,
10036 m,
10037 names,
10038 fs + n - 2 * m,
10039 threads,
10040 thread_state,
10041 ) != 0
10042 {
10043 return -2;
10044 }
10045
10046 gather_lms_suffixes_16u_omp(t, sa, n, threads, thread_state);
10047 reconstruct_lms_suffixes_omp(sa, n, m, threads);
10048 }
10049
10050 place_lms_suffixes_interval_16u(sa, n, m, flags, buckets);
10051 } else {
10052 sa[..n as usize].fill(0);
10053 }
10054
10055 induce_final_order_16u_omp(t, sa, n, k, flags, r, i_out, buckets, threads, thread_state)
10056}
10057
10058#[allow(dead_code)]
10059fn main_16u_alloc(
10060 t: &[u16],
10061 sa: &mut [SaSint],
10062 flags: SaSint,
10063 r: SaSint,
10064 i_out: Option<&mut [SaSint]>,
10065 fs: SaSint,
10066 freq: Option<&mut [SaSint]>,
10067 threads: SaSint,
10068) -> SaSint {
10069 if fs < 0
10070 || threads < 0
10071 || sa.len()
10072 < t.len()
10073 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
10074 || freq.as_ref().is_some_and(|freq| freq.len() < ALPHABET_SIZE)
10075 {
10076 return -1;
10077 }
10078
10079 fill_freq(t, freq);
10080 if t.len() <= 1 {
10081 if t.len() == 1 {
10082 sa[0] = 0;
10083 }
10084 return if (flags & LIBSAIS_FLAGS_BWT) != 0 {
10085 t.len() as SaSint
10086 } else {
10087 0
10088 };
10089 }
10090
10091 let mut buckets = vec![0; 8 * ALPHABET_SIZE];
10092 let threads = normalize_threads(threads);
10093 let mut thread_state = if threads > 1 {
10094 match alloc_thread_state(threads) {
10095 Some(thread_state) => thread_state,
10096 None => return -2,
10097 }
10098 } else {
10099 Vec::new()
10100 };
10101
10102 main_16u(
10103 t,
10104 sa,
10105 t.len() as SaSint,
10106 &mut buckets,
10107 flags,
10108 r,
10109 i_out,
10110 fs,
10111 None,
10112 threads,
10113 &mut thread_state,
10114 )
10115}
10116
10117fn main_16u_ctx(
10118 ctx: &mut Context,
10119 t: &[u16],
10120 sa: &mut [SaSint],
10121 flags: SaSint,
10122 r: SaSint,
10123 i_out: Option<&mut [SaSint]>,
10124 fs: SaSint,
10125 freq: Option<&mut [SaSint]>,
10126) -> SaSint {
10127 if fs < 0
10128 || sa.len()
10129 < t.len()
10130 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
10131 || freq.as_ref().is_some_and(|freq| freq.len() < ALPHABET_SIZE)
10132 {
10133 return -1;
10134 }
10135
10136 if ctx.threads <= 0 || ctx.buckets.len() < 8 * ALPHABET_SIZE {
10137 return -2;
10138 }
10139
10140 fill_freq(t, freq);
10141 if t.len() <= 1 {
10142 if t.len() == 1 {
10143 sa[0] = 0;
10144 }
10145 return if (flags & LIBSAIS_FLAGS_BWT) != 0 {
10146 t.len() as SaSint
10147 } else {
10148 0
10149 };
10150 }
10151
10152 let mut empty_thread_state = [];
10153 let thread_state = if ctx.threads > 1 {
10154 match ctx.thread_state.as_deref_mut() {
10155 Some(thread_state) if thread_state.len() >= ctx.threads as usize => thread_state,
10156 None => return -2,
10157 Some(_) => return -2,
10158 }
10159 } else {
10160 &mut empty_thread_state
10161 };
10162
10163 main_16u(
10164 t,
10165 sa,
10166 t.len() as SaSint,
10167 &mut ctx.buckets,
10168 flags,
10169 r,
10170 i_out,
10171 fs,
10172 None,
10173 ctx.threads,
10174 thread_state,
10175 )
10176}
10177
10178fn main_long(
10179 t: &mut [SaSint],
10180 sa: &mut [SaSint],
10181 k: SaSint,
10182 fs: SaSint,
10183 threads: SaSint,
10184) -> SaSint {
10185 let threads = normalize_threads(threads);
10186 let mut thread_state = if threads > 1 {
10187 match alloc_thread_state(threads) {
10188 Some(thread_state) => thread_state,
10189 None => return -2,
10190 }
10191 } else {
10192 Vec::new()
10193 };
10194
10195 main_32s_entry(
10196 t.as_mut_ptr(),
10197 sa,
10198 t.len() as SaSint,
10199 k,
10200 fs,
10201 threads,
10202 &mut thread_state,
10203 )
10204}
10205
10206pub fn libsais16x64(
10215 t: &[u16],
10216 sa: &mut [SaSint],
10217 fs: SaSint,
10218 freq: Option<&mut [SaSint]>,
10219) -> SaSint {
10220 main_16u_alloc(t, sa, 0, 0, None, fs, freq, 1)
10221}
10222
10223pub fn libsais16x64_gsa(
10232 t: &[u16],
10233 sa: &mut [SaSint],
10234 fs: SaSint,
10235 freq: Option<&mut [SaSint]>,
10236) -> SaSint {
10237 main_16u_alloc(t, sa, LIBSAIS_FLAGS_GSA, 0, None, fs, freq, 1)
10238}
10239
10240pub fn libsais16x64_int(t: &mut [SaSint], sa: &mut [SaSint], k: SaSint, fs: SaSint) -> SaSint {
10242 if fs < 0
10243 || sa.len()
10244 < t.len()
10245 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
10246 {
10247 return -1;
10248 }
10249
10250 if t.len() <= 1 {
10251 if t.len() == 1 {
10252 sa[0] = 0;
10253 }
10254 return 0;
10255 }
10256
10257 main_long(t, sa, k, fs, 1)
10258}
10259
10260pub fn libsais16x64_long(t: &mut [SaSint], sa: &mut [SaSint], k: SaSint, fs: SaSint) -> SaSint {
10271 libsais16x64_int(t, sa, k, fs)
10272}
10273
10274pub fn libsais16x64_ctx(
10284 ctx: &mut Context,
10285 t: &[u16],
10286 sa: &mut [SaSint],
10287 fs: SaSint,
10288 freq: Option<&mut [SaSint]>,
10289) -> SaSint {
10290 main_16u_ctx(ctx, t, sa, 0, 0, None, fs, freq)
10291}
10292
10293pub fn libsais16x64_gsa_ctx(
10303 ctx: &mut Context,
10304 t: &[u16],
10305 sa: &mut [SaSint],
10306 fs: SaSint,
10307 freq: Option<&mut [SaSint]>,
10308) -> SaSint {
10309 main_16u_ctx(ctx, t, sa, LIBSAIS_FLAGS_GSA, 0, None, fs, freq)
10310}
10311
10312pub fn libsais16x64_omp(
10322 t: &[u16],
10323 sa: &mut [SaSint],
10324 fs: SaSint,
10325 freq: Option<&mut [SaSint]>,
10326 threads: SaSint,
10327) -> SaSint {
10328 if threads < 0 {
10329 -1
10330 } else {
10331 main_16u_alloc(t, sa, 0, 0, None, fs, freq, threads)
10332 }
10333}
10334
10335pub fn libsais16x64_gsa_omp(
10345 t: &[u16],
10346 sa: &mut [SaSint],
10347 fs: SaSint,
10348 freq: Option<&mut [SaSint]>,
10349 threads: SaSint,
10350) -> SaSint {
10351 if threads < 0 {
10352 -1
10353 } else {
10354 main_16u_alloc(t, sa, LIBSAIS_FLAGS_GSA, 0, None, fs, freq, threads)
10355 }
10356}
10357
10358pub fn libsais16x64_int_omp(
10360 t: &mut [SaSint],
10361 sa: &mut [SaSint],
10362 k: SaSint,
10363 fs: SaSint,
10364 threads: SaSint,
10365) -> SaSint {
10366 if threads < 0
10367 || fs < 0
10368 || sa.len()
10369 < t.len()
10370 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
10371 {
10372 return -1;
10373 }
10374
10375 if t.len() <= 1 {
10376 if t.len() == 1 {
10377 sa[0] = 0;
10378 }
10379 return 0;
10380 }
10381
10382 main_long(t, sa, k, fs, threads)
10383}
10384
10385pub fn libsais16x64_long_omp(
10397 t: &mut [SaSint],
10398 sa: &mut [SaSint],
10399 k: SaSint,
10400 fs: SaSint,
10401 threads: SaSint,
10402) -> SaSint {
10403 libsais16x64_int_omp(t, sa, k, fs, threads)
10404}
10405
10406fn build_bwt(
10407 t: &[u16],
10408 u: &mut [u16],
10409 a: &mut [SaSint],
10410 fs: SaSint,
10411 freq: Option<&mut [SaSint]>,
10412 threads: SaSint,
10413) -> SaSint {
10414 if fs < 0
10415 || threads < 0
10416 || u.len() < t.len()
10417 || a.len()
10418 < t.len()
10419 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
10420 || freq.as_ref().is_some_and(|freq| freq.len() < ALPHABET_SIZE)
10421 {
10422 return -1;
10423 }
10424 if t.len() <= 1 {
10425 fill_freq(t, freq);
10426 if t.len() == 1 {
10427 u[0] = t[0];
10428 }
10429 return t.len() as SaSint;
10430 }
10431
10432 let n = t.len();
10433 let mut index = main_16u_alloc(t, a, LIBSAIS_FLAGS_BWT, 0, None, fs, freq, threads);
10434 if index >= 0 {
10435 index += 1;
10436 u[0] = t[n - 1];
10437 bwt_copy_16u(&mut u[1..], a, index - 1);
10438 bwt_copy_16u(
10439 &mut u[index as usize..],
10440 &a[index as usize..],
10441 n as SaSint - index,
10442 );
10443 }
10444 index
10445}
10446
10447pub fn libsais16x64_bwt(
10457 t: &[u16],
10458 u: &mut [u16],
10459 a: &mut [SaSint],
10460 fs: SaSint,
10461 freq: Option<&mut [SaSint]>,
10462) -> SaSint {
10463 build_bwt(t, u, a, fs, freq, 1)
10464}
10465
10466fn build_bwt_aux(
10467 t: &[u16],
10468 u: &mut [u16],
10469 a: &mut [SaSint],
10470 fs: SaSint,
10471 freq: Option<&mut [SaSint]>,
10472 r: SaSint,
10473 i: &mut [SaSint],
10474 threads: SaSint,
10475) -> SaSint {
10476 if threads < 0 || r < 2 || (r & (r - 1)) != 0 {
10477 return -1;
10478 }
10479 let samples = if t.is_empty() {
10480 1
10481 } else {
10482 (t.len() - 1) / r as usize + 1
10483 };
10484 if i.len() < samples {
10485 return -1;
10486 }
10487 let n = t.len();
10488 if n <= 1 {
10489 fill_freq(t, freq);
10490 if n == 1 {
10491 u[0] = t[0];
10492 }
10493 i[0] = n as SaSint;
10494 return 0;
10495 }
10496
10497 let index = main_16u_alloc(t, a, LIBSAIS_FLAGS_BWT, r, Some(i), fs, freq, threads);
10498 if index == 0 {
10499 u[0] = t[n - 1];
10500 bwt_copy_16u(&mut u[1..], a, i[0] - 1);
10501 bwt_copy_16u(
10502 &mut u[i[0] as usize..],
10503 &a[i[0] as usize..],
10504 n as SaSint - i[0],
10505 );
10506 }
10507 index
10508}
10509
10510pub fn libsais16x64_bwt_aux(
10522 t: &[u16],
10523 u: &mut [u16],
10524 a: &mut [SaSint],
10525 fs: SaSint,
10526 freq: Option<&mut [SaSint]>,
10527 r: SaSint,
10528 i: &mut [SaSint],
10529) -> SaSint {
10530 build_bwt_aux(t, u, a, fs, freq, r, i, 1)
10531}
10532
10533pub fn libsais16x64_bwt_ctx(
10544 ctx: &mut Context,
10545 t: &[u16],
10546 u: &mut [u16],
10547 a: &mut [SaSint],
10548 fs: SaSint,
10549 freq: Option<&mut [SaSint]>,
10550) -> SaSint {
10551 if fs < 0
10552 || u.len() < t.len()
10553 || a.len()
10554 < t.len()
10555 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
10556 || freq.as_ref().is_some_and(|freq| freq.len() < ALPHABET_SIZE)
10557 {
10558 return -1;
10559 }
10560 if t.len() <= 1 {
10561 fill_freq(t, freq);
10562 if t.len() == 1 {
10563 u[0] = t[0];
10564 }
10565 return t.len() as SaSint;
10566 }
10567
10568 let n = t.len();
10569 let mut index = main_16u_ctx(ctx, t, a, LIBSAIS_FLAGS_BWT, 0, None, fs, freq);
10570 if index >= 0 {
10571 index += 1;
10572 u[0] = t[n - 1];
10573 bwt_copy_16u(&mut u[1..], a, index - 1);
10574 bwt_copy_16u(
10575 &mut u[index as usize..],
10576 &a[index as usize..],
10577 n as SaSint - index,
10578 );
10579 }
10580 index
10581}
10582
10583pub fn libsais16x64_bwt_aux_ctx(
10596 ctx: &mut Context,
10597 t: &[u16],
10598 u: &mut [u16],
10599 a: &mut [SaSint],
10600 fs: SaSint,
10601 freq: Option<&mut [SaSint]>,
10602 r: SaSint,
10603 i: &mut [SaSint],
10604) -> SaSint {
10605 if fs < 0 || r < 2 || (r & (r - 1)) != 0 {
10606 return -1;
10607 }
10608 let samples = if t.is_empty() {
10609 1
10610 } else {
10611 (t.len() - 1) / r as usize + 1
10612 };
10613 if u.len() < t.len()
10614 || a.len()
10615 < t.len()
10616 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
10617 || i.len() < samples
10618 || freq.as_ref().is_some_and(|freq| freq.len() < ALPHABET_SIZE)
10619 {
10620 return -1;
10621 }
10622 if t.len() <= 1 {
10623 fill_freq(t, freq);
10624 if t.len() == 1 {
10625 u[0] = t[0];
10626 }
10627 i[0] = t.len() as SaSint;
10628 return 0;
10629 }
10630
10631 let n = t.len();
10632 let index = main_16u_ctx(ctx, t, a, LIBSAIS_FLAGS_BWT, r, Some(i), fs, freq);
10633 if index == 0 {
10634 u[0] = t[n - 1];
10635 bwt_copy_16u(&mut u[1..], a, i[0] - 1);
10636 bwt_copy_16u(
10637 &mut u[i[0] as usize..],
10638 &a[i[0] as usize..],
10639 n as SaSint - i[0],
10640 );
10641 }
10642 index
10643}
10644
10645pub fn libsais16x64_bwt_omp(
10656 t: &[u16],
10657 u: &mut [u16],
10658 a: &mut [SaSint],
10659 fs: SaSint,
10660 freq: Option<&mut [SaSint]>,
10661 threads: SaSint,
10662) -> SaSint {
10663 if threads < 0 {
10664 -1
10665 } else {
10666 build_bwt(t, u, a, fs, freq, threads)
10667 }
10668}
10669
10670pub fn libsais16x64_bwt_aux_omp(
10683 t: &[u16],
10684 u: &mut [u16],
10685 a: &mut [SaSint],
10686 fs: SaSint,
10687 freq: Option<&mut [SaSint]>,
10688 r: SaSint,
10689 i: &mut [SaSint],
10690 threads: SaSint,
10691) -> SaSint {
10692 if threads < 0 {
10693 -1
10694 } else {
10695 build_bwt_aux(t, u, a, fs, freq, r, i, threads)
10696 }
10697}
10698
10699fn validate_unbwt_aux(
10700 t: &[u16],
10701 u: &[u16],
10702 a: &[SaSint],
10703 freq: Option<&[SaSint]>,
10704 r: SaSint,
10705 i: &[SaSint],
10706) -> SaSint {
10707 let n = t.len();
10708 if u.len() < n
10709 || a.len() < n
10710 || freq.is_some_and(|freq| freq.len() < ALPHABET_SIZE)
10711 || ((r != n as SaSint) && (r < 2 || (r & (r - 1)) != 0))
10712 || i.is_empty()
10713 {
10714 return -1;
10715 }
10716 if n <= 1 {
10717 return if i[0] == n as SaSint { 0 } else { -1 };
10718 }
10719
10720 let samples = (n - 1) / r as usize + 1;
10721 if i.len() < samples {
10722 return -1;
10723 }
10724
10725 for &index in &i[..samples] {
10726 if index <= 0 || index as usize > n {
10727 return -1;
10728 }
10729 }
10730 0
10731}
10732
10733fn unbwt_compute_histogram(t: &[u16], count: &mut [usize]) {
10734 for &symbol in t {
10735 count[symbol as usize] += 1;
10736 }
10737}
10738
10739fn unbwt_shift(n: usize) -> usize {
10740 let mut shift = 0usize;
10741 while (n >> shift) > (1usize << UNBWT_FASTBITS) {
10742 shift += 1;
10743 }
10744 shift
10745}
10746
10747fn unbwt_calculate_fastbits(bucket2: &mut [usize], fastbits: &mut [u16], shift: usize) {
10748 let mut v = 0usize;
10749 let mut sum = 1usize;
10750 for (w, bucket) in bucket2.iter_mut().enumerate().take(ALPHABET_SIZE) {
10751 let prev = sum;
10752 sum += *bucket;
10753 *bucket = prev;
10754 if prev != sum {
10755 while v <= ((sum - 1) >> shift) {
10756 fastbits[v] = w as u16;
10757 v += 1;
10758 }
10759 }
10760 }
10761}
10762
10763fn unbwt_calculate_p(t: &[u16], p: &mut [usize], bucket2: &mut [usize], index: usize) {
10764 for row in 0..index {
10765 let symbol = t[row] as usize;
10766 p[bucket2[symbol]] = row;
10767 bucket2[symbol] += 1;
10768 }
10769
10770 for row in index + 1..=t.len() {
10771 let symbol = t[row - 1] as usize;
10772 p[bucket2[symbol]] = row;
10773 bucket2[symbol] += 1;
10774 }
10775}
10776
10777#[allow(dead_code, non_snake_case)]
10778fn unbwt_calculate_P(
10779 t: &[u16],
10780 p: &mut [usize],
10781 bucket2: &mut [usize],
10782 index: usize,
10783 block_start: usize,
10784 block_end: usize,
10785) {
10786 let first_end = index.min(block_end);
10787 for row in block_start..first_end {
10788 let symbol = t[row] as usize;
10789 p[bucket2[symbol]] = row;
10790 bucket2[symbol] += 1;
10791 }
10792
10793 let second_start = block_start.max(index) + 1;
10794 for row in second_start..=block_end {
10795 let symbol = t[row - 1] as usize;
10796 p[bucket2[symbol]] = row;
10797 bucket2[symbol] += 1;
10798 }
10799}
10800
10801fn unbwt_init_single(
10802 t: &[u16],
10803 p: &mut [usize],
10804 freq: Option<&[SaSint]>,
10805 i: &[SaSint],
10806 bucket2: &mut [usize],
10807 fastbits: &mut [u16],
10808) {
10809 let shift = unbwt_shift(t.len());
10810 if let Some(freq) = freq {
10811 for c in 0..ALPHABET_SIZE {
10812 bucket2[c] = freq[c] as usize;
10813 }
10814 } else {
10815 bucket2.fill(0);
10816 unbwt_compute_histogram(t, bucket2);
10817 }
10818
10819 unbwt_calculate_fastbits(bucket2, fastbits, shift);
10820 unbwt_calculate_p(t, p, bucket2, i[0] as usize);
10821}
10822
10823#[allow(dead_code)]
10824fn unbwt_init_parallel(
10825 t: &[u16],
10826 p: &mut [usize],
10827 freq: Option<&[SaSint]>,
10828 i: &[SaSint],
10829 bucket2: &mut [usize],
10830 fastbits: &mut [u16],
10831 buckets: &mut [usize],
10832 threads: SaSint,
10833) {
10834 let n = t.len();
10835 let available_threads = buckets.len() / ALPHABET_SIZE;
10836 let num_threads = if threads > 1 && n >= 65_536 && available_threads > 1 {
10837 usize::try_from(threads)
10838 .expect("threads must be non-negative")
10839 .min(available_threads)
10840 .max(1)
10841 } else {
10842 1
10843 };
10844
10845 if num_threads == 1 {
10846 unbwt_init_single(t, p, freq, i, bucket2, fastbits);
10847 return;
10848 }
10849
10850 let index = usize::try_from(i[0]).expect("primary index must be non-negative");
10851 let shift = unbwt_shift(n);
10852 let block_stride = (n / num_threads) & !15usize;
10853
10854 for thread in 0..num_threads {
10855 let block_start = thread * block_stride;
10856 let block_size = if thread + 1 < num_threads {
10857 block_stride
10858 } else {
10859 n - block_start
10860 };
10861 let local = &mut buckets[thread * ALPHABET_SIZE..(thread + 1) * ALPHABET_SIZE];
10862 local.fill(0);
10863 unbwt_compute_histogram(&t[block_start..block_start + block_size], local);
10864 }
10865
10866 bucket2.fill(0);
10867 for thread in 0..num_threads {
10868 let local = &mut buckets[thread * ALPHABET_SIZE..(thread + 1) * ALPHABET_SIZE];
10869 for c in 0..ALPHABET_SIZE {
10870 let a = bucket2[c];
10871 let b = local[c];
10872 bucket2[c] = a + b;
10873 local[c] = a;
10874 }
10875 }
10876
10877 unbwt_calculate_fastbits(bucket2, fastbits, shift);
10878
10879 for thread in 0..num_threads {
10880 let block_start = thread * block_stride;
10881 let block_size = if thread + 1 < num_threads {
10882 block_stride
10883 } else {
10884 n - block_start
10885 };
10886 let local = &mut buckets[thread * ALPHABET_SIZE..(thread + 1) * ALPHABET_SIZE];
10887 for c in 0..ALPHABET_SIZE {
10888 local[c] += bucket2[c];
10889 }
10890 unbwt_calculate_P(t, p, local, index, block_start, block_start + block_size);
10891 }
10892
10893 let last_local = &buckets[(num_threads - 1) * ALPHABET_SIZE..num_threads * ALPHABET_SIZE];
10894 bucket2.copy_from_slice(last_local);
10895}
10896
10897fn unbwt_decode_symbol(
10898 p0: usize,
10899 p: &[usize],
10900 bucket2: &[usize],
10901 fastbits: &[u16],
10902 shift: usize,
10903) -> (u16, usize) {
10904 let mut c0 = fastbits[p0 >> shift] as usize;
10905 if bucket2[c0] <= p0 {
10906 while bucket2[c0] <= p0 {
10907 c0 += 1;
10908 }
10909 }
10910 (c0 as u16, p[p0])
10911}
10912
10913#[allow(dead_code)]
10914fn unbwt_decode_1(
10915 u: &mut [u16],
10916 p: &[usize],
10917 bucket2: &[usize],
10918 fastbits: &[u16],
10919 shift: usize,
10920 i0: &mut usize,
10921 k: usize,
10922) {
10923 let mut cursors = [*i0];
10924 unbwt_decode_lanes::<1>(u, p, bucket2, fastbits, shift, k, &mut cursors, k);
10925 *i0 = cursors[0];
10926}
10927
10928#[allow(dead_code)]
10929fn unbwt_decode_2(
10930 u: &mut [u16],
10931 p: &[usize],
10932 bucket2: &[usize],
10933 fastbits: &[u16],
10934 shift: usize,
10935 r: usize,
10936 i0: &mut usize,
10937 i1: &mut usize,
10938 k: usize,
10939) {
10940 let mut cursors = [*i0, *i1];
10941 unbwt_decode_lanes::<2>(u, p, bucket2, fastbits, shift, r, &mut cursors, k);
10942 *i0 = cursors[0];
10943 *i1 = cursors[1];
10944}
10945
10946#[allow(dead_code)]
10947fn unbwt_decode_3(
10948 u: &mut [u16],
10949 p: &[usize],
10950 bucket2: &[usize],
10951 fastbits: &[u16],
10952 shift: usize,
10953 r: usize,
10954 i0: &mut usize,
10955 i1: &mut usize,
10956 i2: &mut usize,
10957 k: usize,
10958) {
10959 let mut cursors = [*i0, *i1, *i2];
10960 unbwt_decode_lanes::<3>(u, p, bucket2, fastbits, shift, r, &mut cursors, k);
10961 *i0 = cursors[0];
10962 *i1 = cursors[1];
10963 *i2 = cursors[2];
10964}
10965
10966#[allow(dead_code)]
10967fn unbwt_decode_4(
10968 u: &mut [u16],
10969 p: &[usize],
10970 bucket2: &[usize],
10971 fastbits: &[u16],
10972 shift: usize,
10973 r: usize,
10974 i0: &mut usize,
10975 i1: &mut usize,
10976 i2: &mut usize,
10977 i3: &mut usize,
10978 k: usize,
10979) {
10980 let mut cursors = [*i0, *i1, *i2, *i3];
10981 unbwt_decode_lanes::<4>(u, p, bucket2, fastbits, shift, r, &mut cursors, k);
10982 *i0 = cursors[0];
10983 *i1 = cursors[1];
10984 *i2 = cursors[2];
10985 *i3 = cursors[3];
10986}
10987
10988#[allow(dead_code)]
10989fn unbwt_decode_5(
10990 u: &mut [u16],
10991 p: &[usize],
10992 bucket2: &[usize],
10993 fastbits: &[u16],
10994 shift: usize,
10995 r: usize,
10996 cursors: &mut [usize; 5],
10997 k: usize,
10998) {
10999 unbwt_decode_lanes::<5>(u, p, bucket2, fastbits, shift, r, cursors, k);
11000}
11001
11002#[allow(dead_code)]
11003fn unbwt_decode_6(
11004 u: &mut [u16],
11005 p: &[usize],
11006 bucket2: &[usize],
11007 fastbits: &[u16],
11008 shift: usize,
11009 r: usize,
11010 cursors: &mut [usize; 6],
11011 k: usize,
11012) {
11013 unbwt_decode_lanes::<6>(u, p, bucket2, fastbits, shift, r, cursors, k);
11014}
11015
11016#[allow(dead_code)]
11017fn unbwt_decode_7(
11018 u: &mut [u16],
11019 p: &[usize],
11020 bucket2: &[usize],
11021 fastbits: &[u16],
11022 shift: usize,
11023 r: usize,
11024 cursors: &mut [usize; 7],
11025 k: usize,
11026) {
11027 unbwt_decode_lanes::<7>(u, p, bucket2, fastbits, shift, r, cursors, k);
11028}
11029
11030#[allow(dead_code)]
11031fn unbwt_decode_8(
11032 u: &mut [u16],
11033 p: &[usize],
11034 bucket2: &[usize],
11035 fastbits: &[u16],
11036 shift: usize,
11037 r: usize,
11038 cursors: &mut [usize; 8],
11039 k: usize,
11040) {
11041 unbwt_decode_lanes::<8>(u, p, bucket2, fastbits, shift, r, cursors, k);
11042}
11043
11044fn unbwt_decode(
11045 u: &mut [u16],
11046 p: &[usize],
11047 n: usize,
11048 r: usize,
11049 i: &[SaSint],
11050 bucket2: &[usize],
11051 fastbits: &[u16],
11052) {
11053 let shift = unbwt_shift(n);
11054 let blocks = 1 + (n - 1) / r;
11055 let remainder = n - r * (blocks - 1);
11056 unbwt_decode_blocks(u, p, r, i, bucket2, fastbits, shift, blocks, remainder);
11057}
11058
11059fn unbwt_decode_blocks(
11060 u: &mut [u16],
11061 p: &[usize],
11062 r: usize,
11063 i: &[SaSint],
11064 bucket2: &[usize],
11065 fastbits: &[u16],
11066 shift: usize,
11067 blocks: usize,
11068 remainder: usize,
11069) {
11070 let mut blocks_left = blocks;
11071 let mut i_offset = 0usize;
11072 let mut u_offset = 0usize;
11073
11074 while blocks_left > 8 {
11075 let mut cursors = [
11076 i[i_offset] as usize,
11077 i[i_offset + 1] as usize,
11078 i[i_offset + 2] as usize,
11079 i[i_offset + 3] as usize,
11080 i[i_offset + 4] as usize,
11081 i[i_offset + 5] as usize,
11082 i[i_offset + 6] as usize,
11083 i[i_offset + 7] as usize,
11084 ];
11085 unbwt_decode_lanes::<8>(
11086 &mut u[u_offset..],
11087 p,
11088 bucket2,
11089 fastbits,
11090 shift,
11091 r,
11092 &mut cursors,
11093 r,
11094 );
11095 i_offset += 8;
11096 blocks_left -= 8;
11097 u_offset += 8 * r;
11098 }
11099
11100 match blocks_left {
11101 1 => {
11102 let mut cursors = [i[i_offset] as usize];
11103 unbwt_decode_lanes::<1>(
11104 &mut u[u_offset..],
11105 p,
11106 bucket2,
11107 fastbits,
11108 shift,
11109 r,
11110 &mut cursors,
11111 remainder,
11112 );
11113 }
11114 2 => {
11115 let mut cursors = [i[i_offset] as usize, i[i_offset + 1] as usize];
11116 unbwt_decode_lanes::<2>(
11117 &mut u[u_offset..],
11118 p,
11119 bucket2,
11120 fastbits,
11121 shift,
11122 r,
11123 &mut cursors,
11124 remainder,
11125 );
11126 let mut first = [cursors[0]];
11127 unbwt_decode_lanes::<1>(
11128 &mut u[u_offset + remainder..],
11129 p,
11130 bucket2,
11131 fastbits,
11132 shift,
11133 r,
11134 &mut first,
11135 r - remainder,
11136 );
11137 }
11138 3 => {
11139 let mut cursors = [
11140 i[i_offset] as usize,
11141 i[i_offset + 1] as usize,
11142 i[i_offset + 2] as usize,
11143 ];
11144 unbwt_decode_lanes::<3>(
11145 &mut u[u_offset..],
11146 p,
11147 bucket2,
11148 fastbits,
11149 shift,
11150 r,
11151 &mut cursors,
11152 remainder,
11153 );
11154 let mut first = [cursors[0], cursors[1]];
11155 unbwt_decode_lanes::<2>(
11156 &mut u[u_offset + remainder..],
11157 p,
11158 bucket2,
11159 fastbits,
11160 shift,
11161 r,
11162 &mut first,
11163 r - remainder,
11164 );
11165 }
11166 4 => {
11167 let mut cursors = [
11168 i[i_offset] as usize,
11169 i[i_offset + 1] as usize,
11170 i[i_offset + 2] as usize,
11171 i[i_offset + 3] as usize,
11172 ];
11173 unbwt_decode_lanes::<4>(
11174 &mut u[u_offset..],
11175 p,
11176 bucket2,
11177 fastbits,
11178 shift,
11179 r,
11180 &mut cursors,
11181 remainder,
11182 );
11183 let mut first = [cursors[0], cursors[1], cursors[2]];
11184 unbwt_decode_lanes::<3>(
11185 &mut u[u_offset + remainder..],
11186 p,
11187 bucket2,
11188 fastbits,
11189 shift,
11190 r,
11191 &mut first,
11192 r - remainder,
11193 );
11194 }
11195 5 => {
11196 let mut cursors = [
11197 i[i_offset] as usize,
11198 i[i_offset + 1] as usize,
11199 i[i_offset + 2] as usize,
11200 i[i_offset + 3] as usize,
11201 i[i_offset + 4] as usize,
11202 ];
11203 unbwt_decode_lanes::<5>(
11204 &mut u[u_offset..],
11205 p,
11206 bucket2,
11207 fastbits,
11208 shift,
11209 r,
11210 &mut cursors,
11211 remainder,
11212 );
11213 let mut first = [cursors[0], cursors[1], cursors[2], cursors[3]];
11214 unbwt_decode_lanes::<4>(
11215 &mut u[u_offset + remainder..],
11216 p,
11217 bucket2,
11218 fastbits,
11219 shift,
11220 r,
11221 &mut first,
11222 r - remainder,
11223 );
11224 }
11225 6 => {
11226 let mut cursors = [
11227 i[i_offset] as usize,
11228 i[i_offset + 1] as usize,
11229 i[i_offset + 2] as usize,
11230 i[i_offset + 3] as usize,
11231 i[i_offset + 4] as usize,
11232 i[i_offset + 5] as usize,
11233 ];
11234 unbwt_decode_lanes::<6>(
11235 &mut u[u_offset..],
11236 p,
11237 bucket2,
11238 fastbits,
11239 shift,
11240 r,
11241 &mut cursors,
11242 remainder,
11243 );
11244 let mut first = [cursors[0], cursors[1], cursors[2], cursors[3], cursors[4]];
11245 unbwt_decode_lanes::<5>(
11246 &mut u[u_offset + remainder..],
11247 p,
11248 bucket2,
11249 fastbits,
11250 shift,
11251 r,
11252 &mut first,
11253 r - remainder,
11254 );
11255 }
11256 7 => {
11257 let mut cursors = [
11258 i[i_offset] as usize,
11259 i[i_offset + 1] as usize,
11260 i[i_offset + 2] as usize,
11261 i[i_offset + 3] as usize,
11262 i[i_offset + 4] as usize,
11263 i[i_offset + 5] as usize,
11264 i[i_offset + 6] as usize,
11265 ];
11266 unbwt_decode_lanes::<7>(
11267 &mut u[u_offset..],
11268 p,
11269 bucket2,
11270 fastbits,
11271 shift,
11272 r,
11273 &mut cursors,
11274 remainder,
11275 );
11276 let mut first = [
11277 cursors[0], cursors[1], cursors[2], cursors[3], cursors[4], cursors[5],
11278 ];
11279 unbwt_decode_lanes::<6>(
11280 &mut u[u_offset + remainder..],
11281 p,
11282 bucket2,
11283 fastbits,
11284 shift,
11285 r,
11286 &mut first,
11287 r - remainder,
11288 );
11289 }
11290 _ => {
11291 let mut cursors = [
11292 i[i_offset] as usize,
11293 i[i_offset + 1] as usize,
11294 i[i_offset + 2] as usize,
11295 i[i_offset + 3] as usize,
11296 i[i_offset + 4] as usize,
11297 i[i_offset + 5] as usize,
11298 i[i_offset + 6] as usize,
11299 i[i_offset + 7] as usize,
11300 ];
11301 unbwt_decode_lanes::<8>(
11302 &mut u[u_offset..],
11303 p,
11304 bucket2,
11305 fastbits,
11306 shift,
11307 r,
11308 &mut cursors,
11309 remainder,
11310 );
11311 let mut first = [
11312 cursors[0], cursors[1], cursors[2], cursors[3], cursors[4], cursors[5], cursors[6],
11313 ];
11314 unbwt_decode_lanes::<7>(
11315 &mut u[u_offset + remainder..],
11316 p,
11317 bucket2,
11318 fastbits,
11319 shift,
11320 r,
11321 &mut first,
11322 r - remainder,
11323 );
11324 }
11325 }
11326}
11327
11328#[allow(dead_code)]
11329fn unbwt_decode_omp(
11330 u: &mut [u16],
11331 p: &[usize],
11332 n: usize,
11333 r: usize,
11334 i: &[SaSint],
11335 bucket2: &[usize],
11336 fastbits: &[u16],
11337 threads: SaSint,
11338) {
11339 let blocks = 1 + (n - 1) / r;
11340 let remainder = n - r * (blocks - 1);
11341 let num_threads = if threads > 1 && n >= 65_536 {
11342 usize::try_from(threads)
11343 .expect("threads must be non-negative")
11344 .min(blocks)
11345 .max(1)
11346 } else {
11347 1
11348 };
11349
11350 if num_threads == 1 {
11351 unbwt_decode(u, p, n, r, i, bucket2, fastbits);
11352 return;
11353 }
11354
11355 let shift = unbwt_shift(n);
11356 let block_stride = blocks / num_threads;
11357 let block_remainder = blocks % num_threads;
11358 for thread in 0..num_threads {
11359 let block_count = block_stride + usize::from(thread < block_remainder);
11360 let block_start = block_stride * thread + thread.min(block_remainder);
11361 let tail = if thread + 1 < num_threads {
11362 r
11363 } else {
11364 remainder
11365 };
11366 unbwt_decode_blocks(
11367 &mut u[r * block_start..],
11368 p,
11369 r,
11370 &i[block_start..],
11371 bucket2,
11372 fastbits,
11373 shift,
11374 block_count,
11375 tail,
11376 );
11377 }
11378}
11379
11380fn unbwt_decode_lanes<const LANES: usize>(
11381 u: &mut [u16],
11382 p: &[usize],
11383 bucket2: &[usize],
11384 fastbits: &[u16],
11385 shift: usize,
11386 r: usize,
11387 cursors: &mut [usize; LANES],
11388 k: usize,
11389) {
11390 for pos in 0..k {
11391 for lane in 0..LANES {
11392 let (symbol, next) = unbwt_decode_symbol(cursors[lane], p, bucket2, fastbits, shift);
11393 cursors[lane] = next;
11394 u[lane * r + pos] = symbol;
11395 }
11396 }
11397}
11398
11399fn unbwt_core(
11400 t: &[u16],
11401 u: &mut [u16],
11402 a: &mut [SaSint],
11403 freq: Option<&[SaSint]>,
11404 r: SaSint,
11405 i: &[SaSint],
11406) -> SaSint {
11407 let n = t.len();
11408 let shift = unbwt_shift(n);
11409 let mut bucket2 = vec![0usize; ALPHABET_SIZE];
11410 let mut fastbits = vec![0u16; 1 + (n >> shift)];
11411
11412 unbwt_core_with_buffers(t, u, a, freq, r, i, &mut bucket2, &mut fastbits, 1)
11413}
11414
11415fn unbwt_core_with_buffers(
11416 t: &[u16],
11417 u: &mut [u16],
11418 a: &mut [SaSint],
11419 freq: Option<&[SaSint]>,
11420 r: SaSint,
11421 i: &[SaSint],
11422 bucket2: &mut [usize],
11423 fastbits: &mut [u16],
11424 threads: SaSint,
11425) -> SaSint {
11426 let n = t.len();
11427 let shift = unbwt_shift(n);
11428 if bucket2.len() < ALPHABET_SIZE || fastbits.len() < 1 + (n >> shift) {
11429 return -2;
11430 }
11431
11432 let mut p = vec![0usize; n + 1];
11433 unbwt_init_single(
11434 t,
11435 &mut p,
11436 freq,
11437 i,
11438 &mut bucket2[..ALPHABET_SIZE],
11439 &mut fastbits[..1 + (n >> shift)],
11440 );
11441 unbwt_decode_omp(
11442 u,
11443 &p,
11444 n,
11445 r as usize,
11446 i,
11447 &bucket2[..ALPHABET_SIZE],
11448 &fastbits[..1 + (n >> shift)],
11449 threads,
11450 );
11451
11452 for (dst, &src) in a.iter_mut().zip(p.iter().skip(1)) {
11453 *dst = src as SaSint;
11454 }
11455 0
11456}
11457
11458fn inverse_bwt(
11459 t: &[u16],
11460 u: &mut [u16],
11461 a: &mut [SaSint],
11462 freq: Option<&[SaSint]>,
11463 primary: SaSint,
11464) -> SaSint {
11465 let n = t.len();
11466 let i = [primary];
11467 let rc = validate_unbwt_aux(t, u, a, freq, n as SaSint, &i);
11468 if rc != 0 {
11469 return rc;
11470 }
11471 if n <= 1 {
11472 if n == 1 {
11473 u[0] = t[0];
11474 }
11475 return 0;
11476 }
11477 unbwt_core(t, u, a, freq, n as SaSint, &i)
11478}
11479
11480pub fn libsais16x64_unbwt(
11490 t: &[u16],
11491 u: &mut [u16],
11492 a: &mut [SaSint],
11493 freq: Option<&[SaSint]>,
11494 i: SaSint,
11495) -> SaSint {
11496 inverse_bwt(t, u, a, freq, i)
11497}
11498
11499pub fn libsais16x64_unbwt_ctx(
11510 ctx: &mut UnbwtContext,
11511 t: &[u16],
11512 u: &mut [u16],
11513 a: &mut [SaSint],
11514 freq: Option<&[SaSint]>,
11515 i: SaSint,
11516) -> SaSint {
11517 libsais16x64_unbwt_aux_ctx(ctx, t, u, a, freq, t.len() as SaSint, &[i])
11518}
11519
11520pub fn libsais16x64_unbwt_aux(
11531 t: &[u16],
11532 u: &mut [u16],
11533 a: &mut [SaSint],
11534 freq: Option<&[SaSint]>,
11535 r: SaSint,
11536 i: &[SaSint],
11537) -> SaSint {
11538 let rc = validate_unbwt_aux(t, u, a, freq, r, i);
11539 if rc != 0 {
11540 return rc;
11541 }
11542 if t.len() <= 1 {
11543 if t.len() == 1 {
11544 u[0] = t[0];
11545 }
11546 return 0;
11547 }
11548 unbwt_core(t, u, a, freq, r, i)
11549}
11550
11551pub fn libsais16x64_unbwt_aux_ctx(
11563 ctx: &mut UnbwtContext,
11564 t: &[u16],
11565 u: &mut [u16],
11566 a: &mut [SaSint],
11567 freq: Option<&[SaSint]>,
11568 r: SaSint,
11569 i: &[SaSint],
11570) -> SaSint {
11571 let rc = validate_unbwt_aux(t, u, a, freq, r, i);
11572 if rc != 0 {
11573 return rc;
11574 }
11575 if t.len() <= 1 {
11576 if t.len() == 1 {
11577 u[0] = t[0];
11578 }
11579 return 0;
11580 }
11581 unbwt_core_with_buffers(
11582 t,
11583 u,
11584 a,
11585 freq,
11586 r,
11587 i,
11588 &mut ctx.bucket2,
11589 &mut ctx.fastbits,
11590 ctx.threads,
11591 )
11592}
11593
11594pub fn libsais16x64_unbwt_omp(
11605 t: &[u16],
11606 u: &mut [u16],
11607 a: &mut [SaSint],
11608 freq: Option<&[SaSint]>,
11609 i: SaSint,
11610 threads: SaSint,
11611) -> SaSint {
11612 if threads < 0 {
11613 -1
11614 } else {
11615 let primary = [i];
11616 libsais16x64_unbwt_aux_omp(t, u, a, freq, t.len() as SaSint, &primary, threads)
11617 }
11618}
11619
11620pub fn libsais16x64_unbwt_aux_omp(
11632 t: &[u16],
11633 u: &mut [u16],
11634 a: &mut [SaSint],
11635 freq: Option<&[SaSint]>,
11636 r: SaSint,
11637 i: &[SaSint],
11638 threads: SaSint,
11639) -> SaSint {
11640 if threads < 0 {
11641 -1
11642 } else {
11643 let rc = validate_unbwt_aux(t, u, a, freq, r, i);
11644 if rc != 0 {
11645 return rc;
11646 }
11647 if t.len() <= 1 {
11648 if t.len() == 1 {
11649 u[0] = t[0];
11650 }
11651 return 0;
11652 }
11653 let n = t.len();
11654 let shift = unbwt_shift(n);
11655 let mut bucket2 = vec![0usize; ALPHABET_SIZE];
11656 let mut fastbits = vec![0u16; 1 + (n >> shift)];
11657 unbwt_core_with_buffers(
11658 t,
11659 u,
11660 a,
11661 freq,
11662 r,
11663 i,
11664 &mut bucket2,
11665 &mut fastbits,
11666 normalize_threads(threads),
11667 )
11668 }
11669}
11670
11671pub fn libsais16x64_plcp(t: &[u16], sa: &[SaSint], plcp: &mut [SaSint]) -> SaSint {
11679 compute_plcp(t, sa, plcp, false)
11680}
11681
11682pub fn libsais16x64_plcp_gsa(t: &[u16], sa: &[SaSint], plcp: &mut [SaSint]) -> SaSint {
11690 if t.last().copied().unwrap_or(0) != 0 {
11691 -1
11692 } else {
11693 compute_plcp(t, sa, plcp, true)
11694 }
11695}
11696
11697fn compute_plcp(t: &[u16], sa: &[SaSint], plcp: &mut [SaSint], gsa: bool) -> SaSint {
11698 if sa.len() != t.len() || plcp.len() != t.len() {
11699 return -1;
11700 }
11701 if t.len() <= 1 {
11702 if t.len() == 1 {
11703 plcp[0] = 0;
11704 }
11705 return 0;
11706 }
11707
11708 if compute_phi(sa, plcp) != 0 {
11709 return -1;
11710 }
11711
11712 compute_plcp_from_phi(t, plcp, gsa)
11713}
11714
11715fn compute_phi(sa: &[SaSint], plcp: &mut [SaSint]) -> SaSint {
11716 let n = sa.len();
11717 let mut previous = n as SaSint;
11718 for &suffix_value in sa {
11719 let Some(suffix) = suffix_index(suffix_value, n) else {
11720 return -1;
11721 };
11722 plcp[suffix] = previous;
11723 previous = suffix_value;
11724 }
11725 0
11726}
11727
11728fn compute_plcp_from_phi(t: &[u16], plcp: &mut [SaSint], gsa: bool) -> SaSint {
11729 let n = t.len();
11730 let mut l = 0usize;
11731 for i in 0..t.len() {
11732 let previous = plcp[i];
11733 if previous == n as SaSint {
11734 plcp[i] = 0;
11735 l = 0;
11736 continue;
11737 }
11738
11739 let Some(prev) = suffix_index(previous, n) else {
11740 return -1;
11741 };
11742
11743 while i + l < t.len()
11744 && prev + l < t.len()
11745 && t[i + l] == t[prev + l]
11746 && (!gsa || t[i + l] != 0)
11747 {
11748 l += 1;
11749 }
11750 plcp[i] = l as SaSint;
11751 l = l.saturating_sub(1);
11752 }
11753 0
11754}
11755
11756#[allow(dead_code)]
11757fn compute_phi_omp(sa: &[SaSint], plcp: &mut [SaSint], n: SaSint, threads: SaSint) -> SaSint {
11758 let n_usize = n as usize;
11759 if threads == 1 || n < 65_536 {
11760 return compute_phi(&sa[..n_usize], &mut plcp[..n_usize]);
11761 }
11762
11763 let block_stride = (n / threads) & !15;
11764 for thread in 0..threads {
11765 let block_start = thread * block_stride;
11766 let block_size = if thread < threads - 1 {
11767 block_stride
11768 } else {
11769 n - block_start
11770 };
11771 let start = block_start as usize;
11772 let end = (block_start + block_size) as usize;
11773 let mut previous = if start > 0 { sa[start - 1] } else { n };
11774 for &suffix_value in &sa[start..end] {
11775 let Some(suffix) = suffix_index(suffix_value, n_usize) else {
11776 return -1;
11777 };
11778 plcp[suffix] = previous;
11779 previous = suffix_value;
11780 }
11781 }
11782 0
11783}
11784
11785#[allow(dead_code)]
11786fn compute_plcp_omp(t: &[u16], plcp: &mut [SaSint], n: SaSint, threads: SaSint) -> SaSint {
11787 if threads == 1 || n < 65_536 {
11788 let n = n as usize;
11789 return compute_plcp_from_phi(&t[..n], &mut plcp[..n], false);
11790 }
11791
11792 let block_stride = (n / threads) & !15;
11793 for thread in 0..threads {
11794 let block_start = thread * block_stride;
11795 let block_size = if thread < threads - 1 {
11796 block_stride
11797 } else {
11798 n - block_start
11799 };
11800 let rc = compute_plcp_range(
11801 t,
11802 plcp,
11803 n as usize,
11804 block_start as isize,
11805 block_size as isize,
11806 false,
11807 );
11808 if rc != 0 {
11809 return rc;
11810 }
11811 }
11812 0
11813}
11814
11815fn compute_plcp_range(
11816 t: &[u16],
11817 plcp: &mut [SaSint],
11818 n: usize,
11819 omp_block_start: isize,
11820 omp_block_size: isize,
11821 gsa: bool,
11822) -> SaSint {
11823 let mut l = 0usize;
11824 let end = (omp_block_start + omp_block_size) as usize;
11825 for i in omp_block_start as usize..end {
11826 let previous = plcp[i];
11827 if previous == n as SaSint {
11828 plcp[i] = 0;
11829 l = 0;
11830 continue;
11831 }
11832
11833 let Some(prev) = suffix_index(previous, n) else {
11834 return -1;
11835 };
11836
11837 while i + l < t.len()
11838 && prev + l < t.len()
11839 && t[i + l] == t[prev + l]
11840 && (!gsa || t[i + l] != 0)
11841 {
11842 l += 1;
11843 }
11844 plcp[i] = l as SaSint;
11845 l = l.saturating_sub(1);
11846 }
11847 0
11848}
11849
11850#[allow(dead_code)]
11851fn compute_plcp_gsa(
11852 t: &[u16],
11853 plcp: &mut [SaSint],
11854 omp_block_start: isize,
11855 omp_block_size: isize,
11856) -> SaSint {
11857 let n = t.len();
11858 let mut l = 0usize;
11859 let end = (omp_block_start + omp_block_size) as usize;
11860 for i in omp_block_start as usize..end {
11861 let previous = plcp[i];
11862 if previous == n as SaSint {
11863 plcp[i] = 0;
11864 l = 0;
11865 continue;
11866 }
11867
11868 let Some(prev) = suffix_index(previous, n) else {
11869 return -1;
11870 };
11871
11872 while i + l < t.len() && prev + l < t.len() && t[i + l] == t[prev + l] && t[i + l] != 0 {
11873 l += 1;
11874 }
11875 plcp[i] = l as SaSint;
11876 l = l.saturating_sub(1);
11877 }
11878 0
11879}
11880
11881#[allow(dead_code)]
11882fn compute_plcp_gsa_omp(t: &[u16], plcp: &mut [SaSint], n: SaSint, threads: SaSint) -> SaSint {
11883 if threads == 1 || n < 65_536 {
11884 return compute_plcp_gsa(t, plcp, 0, n as isize);
11885 }
11886
11887 let block_stride = (n / threads) & !15;
11888 for thread in 0..threads {
11889 let block_start = thread * block_stride;
11890 let block_size = if thread < threads - 1 {
11891 block_stride
11892 } else {
11893 n - block_start
11894 };
11895 let rc = compute_plcp_gsa(t, plcp, block_start as isize, block_size as isize);
11896 if rc != 0 {
11897 return rc;
11898 }
11899 }
11900 0
11901}
11902
11903#[allow(dead_code)]
11904fn compute_lcp(
11905 plcp: &[SaSint],
11906 sa: &[SaSint],
11907 lcp: &mut [SaSint],
11908 omp_block_start: isize,
11909 omp_block_size: isize,
11910) -> SaSint {
11911 let end = (omp_block_start + omp_block_size) as usize;
11912 for row in omp_block_start as usize..end {
11913 let Some(suffix) = suffix_index(sa[row], plcp.len()) else {
11914 return -1;
11915 };
11916 lcp[row] = plcp[suffix];
11917 }
11918 0
11919}
11920
11921#[allow(dead_code)]
11922fn compute_lcp_omp(
11923 plcp: &[SaSint],
11924 sa: &[SaSint],
11925 lcp: &mut [SaSint],
11926 n: SaSint,
11927 threads: SaSint,
11928) -> SaSint {
11929 if threads == 1 || n < 65_536 {
11930 return compute_lcp(plcp, sa, lcp, 0, n as isize);
11931 }
11932
11933 let block_stride = (n / threads) & !15;
11934 for thread in 0..threads {
11935 let block_start = thread * block_stride;
11936 let block_size = if thread < threads - 1 {
11937 block_stride
11938 } else {
11939 n - block_start
11940 };
11941 let rc = compute_lcp(plcp, sa, lcp, block_start as isize, block_size as isize);
11942 if rc != 0 {
11943 return rc;
11944 }
11945 }
11946 0
11947}
11948
11949pub fn libsais16x64_lcp(plcp: &[SaSint], sa: &[SaSint], lcp: &mut [SaSint]) -> SaSint {
11957 if plcp.len() != sa.len() || lcp.len() != sa.len() {
11958 return -1;
11959 }
11960 for (row, &suffix) in sa.iter().enumerate() {
11961 let Some(suffix) = suffix_index(suffix, plcp.len()) else {
11962 return -1;
11963 };
11964 lcp[row] = plcp[suffix];
11965 }
11966 0
11967}
11968
11969fn suffix_index(value: SaSint, len: usize) -> Option<usize> {
11970 usize::try_from(value).ok().filter(|&index| index < len)
11971}
11972
11973pub fn libsais16x64_plcp_omp(
11982 t: &[u16],
11983 sa: &[SaSint],
11984 plcp: &mut [SaSint],
11985 threads: SaSint,
11986) -> SaSint {
11987 if threads < 0 {
11988 return -1;
11989 }
11990 if sa.len() != t.len() || plcp.len() != t.len() {
11991 return -1;
11992 }
11993 if t.len() <= 1 {
11994 if t.len() == 1 {
11995 plcp[0] = 0;
11996 }
11997 return 0;
11998 }
11999
12000 let n = t.len() as SaSint;
12001 let threads = normalize_threads(threads);
12002 if compute_phi_omp(sa, plcp, n, threads) != 0 {
12003 return -1;
12004 }
12005 compute_plcp_omp(t, plcp, n, threads)
12006}
12007
12008pub fn libsais16x64_plcp_gsa_omp(
12017 t: &[u16],
12018 sa: &[SaSint],
12019 plcp: &mut [SaSint],
12020 threads: SaSint,
12021) -> SaSint {
12022 if threads < 0 {
12023 return -1;
12024 }
12025 if t.last().copied().unwrap_or(0) != 0 {
12026 return -1;
12027 }
12028 if sa.len() != t.len() || plcp.len() != t.len() {
12029 return -1;
12030 }
12031 if t.len() <= 1 {
12032 if t.len() == 1 {
12033 plcp[0] = 0;
12034 }
12035 return 0;
12036 }
12037
12038 let n = t.len() as SaSint;
12039 let threads = normalize_threads(threads);
12040 if compute_phi_omp(sa, plcp, n, threads) != 0 {
12041 return -1;
12042 }
12043 compute_plcp_gsa_omp(t, plcp, n, threads)
12044}
12045
12046pub fn libsais16x64_lcp_omp(
12055 plcp: &[SaSint],
12056 sa: &[SaSint],
12057 lcp: &mut [SaSint],
12058 threads: SaSint,
12059) -> SaSint {
12060 if threads < 0 {
12061 return -1;
12062 }
12063 if plcp.len() != sa.len() || lcp.len() != sa.len() {
12064 return -1;
12065 }
12066
12067 compute_lcp_omp(
12068 plcp,
12069 sa,
12070 lcp,
12071 sa.len() as SaSint,
12072 normalize_threads(threads),
12073 )
12074}
12075
12076#[cfg(all(test, feature = "upstream-c"))]
12077mod tests {
12078 use super::*;
12079
12080 unsafe extern "C" {
12081 fn probe_public_libsais16x64(
12082 t: *const u16,
12083 sa: *mut SaSint,
12084 n: SaSint,
12085 fs: SaSint,
12086 ) -> SaSint;
12087 fn probe_public_libsais16x64_freq(
12088 t: *const u16,
12089 sa: *mut SaSint,
12090 n: SaSint,
12091 fs: SaSint,
12092 freq: *mut SaSint,
12093 ) -> SaSint;
12094 fn probe_public_libsais16x64_gsa(
12095 t: *const u16,
12096 sa: *mut SaSint,
12097 n: SaSint,
12098 fs: SaSint,
12099 ) -> SaSint;
12100 fn probe_public_libsais16x64_gsa_freq(
12101 t: *const u16,
12102 sa: *mut SaSint,
12103 n: SaSint,
12104 fs: SaSint,
12105 freq: *mut SaSint,
12106 ) -> SaSint;
12107 fn probe_public_libsais16x64_long(
12108 t: *mut SaSint,
12109 sa: *mut SaSint,
12110 n: SaSint,
12111 k: SaSint,
12112 fs: SaSint,
12113 ) -> SaSint;
12114 fn probe_libsais16x64_main_32s_entry(
12115 t: *mut SaSint,
12116 sa: *mut SaSint,
12117 n: SaSint,
12118 k: SaSint,
12119 fs: SaSint,
12120 threads: SaSint,
12121 ) -> SaSint;
12122 fn probe_libsais16x64_final_sorting_scan_left_to_right_32s(
12123 t: *const SaSint,
12124 sa: *mut SaSint,
12125 induction_bucket: *mut SaSint,
12126 omp_block_start: SaSint,
12127 omp_block_size: SaSint,
12128 );
12129 fn probe_libsais16x64_final_sorting_scan_right_to_left_32s(
12130 t: *const SaSint,
12131 sa: *mut SaSint,
12132 induction_bucket: *mut SaSint,
12133 omp_block_start: SaSint,
12134 omp_block_size: SaSint,
12135 );
12136 fn probe_libsais16x64_clear_lms_suffixes_omp(
12137 sa: *mut SaSint,
12138 n: SaSint,
12139 k: SaSint,
12140 bucket_start: *mut SaSint,
12141 bucket_end: *mut SaSint,
12142 threads: SaSint,
12143 );
12144 fn probe_libsais16x64_flip_suffix_markers_omp(sa: *mut SaSint, l: SaSint, threads: SaSint);
12145 fn probe_libsais16x64_induce_final_order_32s_6k(
12146 t: *const SaSint,
12147 sa: *mut SaSint,
12148 n: SaSint,
12149 k: SaSint,
12150 buckets: *mut SaSint,
12151 threads: SaSint,
12152 );
12153 fn probe_libsais16x64_induce_final_order_32s_4k(
12154 t: *const SaSint,
12155 sa: *mut SaSint,
12156 n: SaSint,
12157 k: SaSint,
12158 buckets: *mut SaSint,
12159 threads: SaSint,
12160 );
12161 fn probe_libsais16x64_induce_final_order_32s_2k(
12162 t: *const SaSint,
12163 sa: *mut SaSint,
12164 n: SaSint,
12165 k: SaSint,
12166 buckets: *mut SaSint,
12167 threads: SaSint,
12168 );
12169 fn probe_libsais16x64_induce_final_order_32s_1k(
12170 t: *const SaSint,
12171 sa: *mut SaSint,
12172 n: SaSint,
12173 k: SaSint,
12174 buckets: *mut SaSint,
12175 threads: SaSint,
12176 );
12177 fn probe_libsais16x64_induce_partial_order_32s_6k_omp(
12178 t: *const SaSint,
12179 sa: *mut SaSint,
12180 n: SaSint,
12181 k: SaSint,
12182 buckets: *mut SaSint,
12183 first_lms_suffix: SaSint,
12184 left_suffixes_count: SaSint,
12185 threads: SaSint,
12186 );
12187 fn probe_libsais16x64_induce_partial_order_32s_4k_omp(
12188 t: *const SaSint,
12189 sa: *mut SaSint,
12190 n: SaSint,
12191 k: SaSint,
12192 buckets: *mut SaSint,
12193 threads: SaSint,
12194 );
12195 fn probe_libsais16x64_induce_partial_order_32s_2k_omp(
12196 t: *const SaSint,
12197 sa: *mut SaSint,
12198 n: SaSint,
12199 k: SaSint,
12200 buckets: *mut SaSint,
12201 threads: SaSint,
12202 );
12203 fn probe_libsais16x64_induce_partial_order_32s_1k_omp(
12204 t: *const SaSint,
12205 sa: *mut SaSint,
12206 n: SaSint,
12207 k: SaSint,
12208 buckets: *mut SaSint,
12209 threads: SaSint,
12210 );
12211 fn probe_libsais16x64_induce_partial_order_16u_omp(
12212 t: *const u16,
12213 sa: *mut SaSint,
12214 n: SaSint,
12215 k: SaSint,
12216 flags: SaSint,
12217 buckets: *mut SaSint,
12218 first_lms_suffix: SaSint,
12219 left_suffixes_count: SaSint,
12220 threads: SaSint,
12221 );
12222 fn probe_libsais16x64_induce_final_order_16u_omp(
12223 t: *const u16,
12224 sa: *mut SaSint,
12225 n: SaSint,
12226 k: SaSint,
12227 flags: SaSint,
12228 r: SaSint,
12229 i: *mut SaSint,
12230 buckets: *mut SaSint,
12231 threads: SaSint,
12232 ) -> SaSint;
12233 fn probe_public_libsais16x64_bwt(
12234 t: *const u16,
12235 u: *mut u16,
12236 a: *mut SaSint,
12237 n: SaSint,
12238 fs: SaSint,
12239 ) -> SaSint;
12240 fn probe_public_libsais16x64_bwt_freq(
12241 t: *const u16,
12242 u: *mut u16,
12243 a: *mut SaSint,
12244 n: SaSint,
12245 fs: SaSint,
12246 freq: *mut SaSint,
12247 ) -> SaSint;
12248 fn probe_public_libsais16x64_bwt_aux(
12249 t: *const u16,
12250 u: *mut u16,
12251 a: *mut SaSint,
12252 n: SaSint,
12253 fs: SaSint,
12254 r: SaSint,
12255 i: *mut SaSint,
12256 ) -> SaSint;
12257 fn probe_public_libsais16x64_bwt_aux_freq(
12258 t: *const u16,
12259 u: *mut u16,
12260 a: *mut SaSint,
12261 n: SaSint,
12262 fs: SaSint,
12263 freq: *mut SaSint,
12264 r: SaSint,
12265 i: *mut SaSint,
12266 ) -> SaSint;
12267 fn probe_public_libsais16x64_unbwt(
12268 t: *const u16,
12269 u: *mut u16,
12270 a: *mut SaSint,
12271 n: SaSint,
12272 i: SaSint,
12273 ) -> SaSint;
12274 fn probe_public_libsais16x64_unbwt_freq(
12275 t: *const u16,
12276 u: *mut u16,
12277 a: *mut SaSint,
12278 n: SaSint,
12279 freq: *const SaSint,
12280 i: SaSint,
12281 ) -> SaSint;
12282 fn probe_public_libsais16x64_unbwt_aux(
12283 t: *const u16,
12284 u: *mut u16,
12285 a: *mut SaSint,
12286 n: SaSint,
12287 r: SaSint,
12288 i: *const SaSint,
12289 ) -> SaSint;
12290 fn probe_public_libsais16x64_unbwt_aux_freq(
12291 t: *const u16,
12292 u: *mut u16,
12293 a: *mut SaSint,
12294 n: SaSint,
12295 freq: *const SaSint,
12296 r: SaSint,
12297 i: *const SaSint,
12298 ) -> SaSint;
12299 fn probe_public_libsais16x64_plcp(
12300 t: *const u16,
12301 sa: *const SaSint,
12302 plcp: *mut SaSint,
12303 n: SaSint,
12304 ) -> SaSint;
12305 fn probe_public_libsais16x64_plcp_gsa(
12306 t: *const u16,
12307 sa: *const SaSint,
12308 plcp: *mut SaSint,
12309 n: SaSint,
12310 ) -> SaSint;
12311 fn probe_public_libsais16x64_lcp(
12312 plcp: *const SaSint,
12313 sa: *const SaSint,
12314 lcp: *mut SaSint,
12315 n: SaSint,
12316 ) -> SaSint;
12317 fn probe_libsais16x64_gather_lms_suffixes_16u(
12318 t: *const u16,
12319 sa: *mut SaSint,
12320 n: SaSint,
12321 m: SaSint,
12322 omp_block_start: SaSint,
12323 omp_block_size: SaSint,
12324 );
12325 fn probe_libsais16x64_count_and_gather_lms_suffixes_16u(
12326 t: *const u16,
12327 sa: *mut SaSint,
12328 n: SaSint,
12329 buckets: *mut SaSint,
12330 omp_block_start: SaSint,
12331 omp_block_size: SaSint,
12332 ) -> SaSint;
12333 fn probe_libsais16x64_initialize_buckets_start_and_end_16u(
12334 buckets: *mut SaSint,
12335 freq: *mut SaSint,
12336 ) -> SaSint;
12337 fn probe_libsais16x64_initialize_buckets_for_lms_suffixes_radix_sort_16u(
12338 t: *const u16,
12339 buckets: *mut SaSint,
12340 first_lms_suffix: SaSint,
12341 ) -> SaSint;
12342 fn probe_libsais16x64_radix_sort_lms_suffixes_16u(
12343 t: *const u16,
12344 sa: *mut SaSint,
12345 induction_bucket: *mut SaSint,
12346 omp_block_start: SaSint,
12347 omp_block_size: SaSint,
12348 );
12349 fn probe_libsais16x64_initialize_buckets_for_partial_sorting_16u(
12350 t: *const u16,
12351 buckets: *mut SaSint,
12352 first_lms_suffix: SaSint,
12353 left_suffixes_count: SaSint,
12354 );
12355 fn probe_libsais16x64_partial_sorting_scan_left_to_right_16u(
12356 t: *const u16,
12357 sa: *mut SaSint,
12358 buckets: *mut SaSint,
12359 d: SaSint,
12360 omp_block_start: SaSint,
12361 omp_block_size: SaSint,
12362 ) -> SaSint;
12363 fn probe_libsais16x64_partial_sorting_scan_right_to_left_16u(
12364 t: *const u16,
12365 sa: *mut SaSint,
12366 buckets: *mut SaSint,
12367 d: SaSint,
12368 omp_block_start: SaSint,
12369 omp_block_size: SaSint,
12370 ) -> SaSint;
12371 fn probe_libsais16x64_partial_gsa_scan_right_to_left_16u(
12372 t: *const u16,
12373 sa: *mut SaSint,
12374 buckets: *mut SaSint,
12375 d: SaSint,
12376 omp_block_start: SaSint,
12377 omp_block_size: SaSint,
12378 ) -> SaSint;
12379 fn probe_libsais16x64_partial_sorting_shift_markers_16u_omp(
12380 sa: *mut SaSint,
12381 n: SaSint,
12382 buckets: *const SaSint,
12383 threads: SaSint,
12384 );
12385 fn probe_libsais16x64_final_sorting_scan_left_to_right_16u(
12386 t: *const u16,
12387 sa: *mut SaSint,
12388 induction_bucket: *mut SaSint,
12389 omp_block_start: SaSint,
12390 omp_block_size: SaSint,
12391 );
12392 fn probe_libsais16x64_final_sorting_scan_right_to_left_16u(
12393 t: *const u16,
12394 sa: *mut SaSint,
12395 induction_bucket: *mut SaSint,
12396 omp_block_start: SaSint,
12397 omp_block_size: SaSint,
12398 );
12399 fn probe_libsais16x64_final_gsa_scan_right_to_left_16u(
12400 t: *const u16,
12401 sa: *mut SaSint,
12402 induction_bucket: *mut SaSint,
12403 omp_block_start: SaSint,
12404 omp_block_size: SaSint,
12405 );
12406 fn probe_libsais16x64_final_bwt_scan_left_to_right_16u(
12407 t: *const u16,
12408 sa: *mut SaSint,
12409 induction_bucket: *mut SaSint,
12410 omp_block_start: SaSint,
12411 omp_block_size: SaSint,
12412 );
12413 fn probe_libsais16x64_final_bwt_scan_right_to_left_16u(
12414 t: *const u16,
12415 sa: *mut SaSint,
12416 induction_bucket: *mut SaSint,
12417 omp_block_start: SaSint,
12418 omp_block_size: SaSint,
12419 ) -> SaSint;
12420 fn probe_libsais16x64_final_bwt_aux_scan_left_to_right_16u(
12421 t: *const u16,
12422 sa: *mut SaSint,
12423 rm: SaSint,
12424 i_sample: *mut SaSint,
12425 induction_bucket: *mut SaSint,
12426 omp_block_start: SaSint,
12427 omp_block_size: SaSint,
12428 );
12429 fn probe_libsais16x64_final_bwt_aux_scan_right_to_left_16u(
12430 t: *const u16,
12431 sa: *mut SaSint,
12432 rm: SaSint,
12433 i_sample: *mut SaSint,
12434 induction_bucket: *mut SaSint,
12435 omp_block_start: SaSint,
12436 omp_block_size: SaSint,
12437 );
12438 fn probe_libsais16x64_renumber_lms_suffixes_16u(
12439 sa: *mut SaSint,
12440 m: SaSint,
12441 name: SaSint,
12442 omp_block_start: SaSint,
12443 omp_block_size: SaSint,
12444 ) -> SaSint;
12445 fn probe_libsais16x64_place_lms_suffixes_interval_16u(
12446 sa: *mut SaSint,
12447 n: SaSint,
12448 m: SaSint,
12449 flags: SaSint,
12450 buckets: *mut SaSint,
12451 );
12452 fn probe_libsais16x64_bwt_copy_16u(u: *mut u16, a: *mut SaSint, n: SaSint);
12453 fn probe_libsais16x64_gather_lms_suffixes_16u_omp(
12454 t: *const u16,
12455 sa: *mut SaSint,
12456 n: SaSint,
12457 threads: SaSint,
12458 );
12459 fn probe_libsais16x64_count_and_gather_lms_suffixes_16u_omp(
12460 t: *const u16,
12461 sa: *mut SaSint,
12462 n: SaSint,
12463 buckets: *mut SaSint,
12464 threads: SaSint,
12465 ) -> SaSint;
12466 fn probe_libsais16x64_radix_sort_lms_suffixes_16u_omp(
12467 t: *const u16,
12468 sa: *mut SaSint,
12469 n: SaSint,
12470 m: SaSint,
12471 flags: SaSint,
12472 buckets: *mut SaSint,
12473 threads: SaSint,
12474 );
12475 fn probe_libsais16x64_partial_sorting_scan_left_to_right_16u_omp(
12476 t: *const u16,
12477 sa: *mut SaSint,
12478 n: SaSint,
12479 k: SaSint,
12480 buckets: *mut SaSint,
12481 left_suffixes_count: SaSint,
12482 d: SaSint,
12483 threads: SaSint,
12484 ) -> SaSint;
12485 fn probe_libsais16x64_partial_sorting_scan_right_to_left_16u_omp(
12486 t: *const u16,
12487 sa: *mut SaSint,
12488 n: SaSint,
12489 k: SaSint,
12490 buckets: *mut SaSint,
12491 first_lms_suffix: SaSint,
12492 left_suffixes_count: SaSint,
12493 d: SaSint,
12494 threads: SaSint,
12495 );
12496 fn probe_libsais16x64_partial_gsa_scan_right_to_left_16u_omp(
12497 t: *const u16,
12498 sa: *mut SaSint,
12499 n: SaSint,
12500 k: SaSint,
12501 buckets: *mut SaSint,
12502 first_lms_suffix: SaSint,
12503 left_suffixes_count: SaSint,
12504 d: SaSint,
12505 threads: SaSint,
12506 );
12507 fn probe_libsais16x64_renumber_lms_suffixes_16u_omp(
12508 sa: *mut SaSint,
12509 m: SaSint,
12510 threads: SaSint,
12511 ) -> SaSint;
12512 fn probe_libsais16x64_final_bwt_scan_left_to_right_16u_omp(
12513 t: *const u16,
12514 sa: *mut SaSint,
12515 n: SaSint,
12516 k: SaSint,
12517 induction_bucket: *mut SaSint,
12518 threads: SaSint,
12519 );
12520 fn probe_libsais16x64_final_bwt_aux_scan_left_to_right_16u_omp(
12521 t: *const u16,
12522 sa: *mut SaSint,
12523 n: SaSint,
12524 k: SaSint,
12525 rm: SaSint,
12526 i_sample: *mut SaSint,
12527 induction_bucket: *mut SaSint,
12528 threads: SaSint,
12529 );
12530 fn probe_libsais16x64_final_sorting_scan_left_to_right_16u_omp(
12531 t: *const u16,
12532 sa: *mut SaSint,
12533 n: SaSint,
12534 k: SaSint,
12535 induction_bucket: *mut SaSint,
12536 threads: SaSint,
12537 );
12538 fn probe_libsais16x64_final_bwt_scan_right_to_left_16u_omp(
12539 t: *const u16,
12540 sa: *mut SaSint,
12541 n: SaSint,
12542 k: SaSint,
12543 induction_bucket: *mut SaSint,
12544 threads: SaSint,
12545 ) -> SaSint;
12546 fn probe_libsais16x64_final_bwt_aux_scan_right_to_left_16u_omp(
12547 t: *const u16,
12548 sa: *mut SaSint,
12549 n: SaSint,
12550 k: SaSint,
12551 rm: SaSint,
12552 i_sample: *mut SaSint,
12553 induction_bucket: *mut SaSint,
12554 threads: SaSint,
12555 );
12556 fn probe_libsais16x64_final_sorting_scan_right_to_left_16u_omp(
12557 t: *const u16,
12558 sa: *mut SaSint,
12559 omp_block_start: SaSint,
12560 omp_block_size: SaSint,
12561 k: SaSint,
12562 induction_bucket: *mut SaSint,
12563 threads: SaSint,
12564 );
12565 fn probe_libsais16x64_final_gsa_scan_right_to_left_16u_omp(
12566 t: *const u16,
12567 sa: *mut SaSint,
12568 omp_block_start: SaSint,
12569 omp_block_size: SaSint,
12570 k: SaSint,
12571 induction_bucket: *mut SaSint,
12572 threads: SaSint,
12573 );
12574 fn probe_libsais16x64_bwt_copy_16u_omp(
12575 u: *mut u16,
12576 a: *mut SaSint,
12577 n: SaSint,
12578 threads: SaSint,
12579 );
12580 fn probe_libsais16x64_gather_marked_lms_suffixes(
12581 sa: *mut SaSint,
12582 m: SaSint,
12583 l: SaSint,
12584 omp_block_start: SaSint,
12585 omp_block_size: SaSint,
12586 ) -> SaSint;
12587 fn probe_libsais16x64_gather_marked_lms_suffixes_omp(
12588 sa: *mut SaSint,
12589 n: SaSint,
12590 m: SaSint,
12591 fs: SaSint,
12592 threads: SaSint,
12593 );
12594 fn probe_libsais16x64_renumber_and_gather_lms_suffixes_omp(
12595 sa: *mut SaSint,
12596 n: SaSint,
12597 m: SaSint,
12598 fs: SaSint,
12599 threads: SaSint,
12600 ) -> SaSint;
12601 fn probe_libsais16x64_reconstruct_lms_suffixes(
12602 sa: *mut SaSint,
12603 n: SaSint,
12604 m: SaSint,
12605 omp_block_start: SaSint,
12606 omp_block_size: SaSint,
12607 );
12608 fn probe_libsais16x64_reconstruct_lms_suffixes_omp(
12609 sa: *mut SaSint,
12610 n: SaSint,
12611 m: SaSint,
12612 threads: SaSint,
12613 );
12614 fn probe_libsais16x64_renumber_distinct_lms_suffixes_32s_4k(
12615 sa: *mut SaSint,
12616 m: SaSint,
12617 name: SaSint,
12618 omp_block_start: SaSint,
12619 omp_block_size: SaSint,
12620 ) -> SaSint;
12621 fn probe_libsais16x64_mark_distinct_lms_suffixes_32s(
12622 sa: *mut SaSint,
12623 m: SaSint,
12624 omp_block_start: SaSint,
12625 omp_block_size: SaSint,
12626 );
12627 fn probe_libsais16x64_clamp_lms_suffixes_length_32s(
12628 sa: *mut SaSint,
12629 m: SaSint,
12630 omp_block_start: SaSint,
12631 omp_block_size: SaSint,
12632 );
12633 fn probe_libsais16x64_renumber_distinct_lms_suffixes_32s_4k_omp(
12634 sa: *mut SaSint,
12635 m: SaSint,
12636 threads: SaSint,
12637 ) -> SaSint;
12638 fn probe_libsais16x64_mark_distinct_lms_suffixes_32s_omp(
12639 sa: *mut SaSint,
12640 n: SaSint,
12641 m: SaSint,
12642 threads: SaSint,
12643 );
12644 fn probe_libsais16x64_clamp_lms_suffixes_length_32s_omp(
12645 sa: *mut SaSint,
12646 n: SaSint,
12647 m: SaSint,
12648 threads: SaSint,
12649 );
12650 fn probe_libsais16x64_renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(
12651 sa: *mut SaSint,
12652 n: SaSint,
12653 m: SaSint,
12654 threads: SaSint,
12655 ) -> SaSint;
12656 fn probe_libsais16x64_renumber_unique_and_nonunique_lms_suffixes_32s(
12657 t: *mut SaSint,
12658 sa: *mut SaSint,
12659 m: SaSint,
12660 f: SaSint,
12661 omp_block_start: SaSint,
12662 omp_block_size: SaSint,
12663 ) -> SaSint;
12664 fn probe_libsais16x64_compact_unique_and_nonunique_lms_suffixes_32s(
12665 sa: *mut SaSint,
12666 m: SaSint,
12667 pl: *mut SaSint,
12668 pr: *mut SaSint,
12669 omp_block_start: SaSint,
12670 omp_block_size: SaSint,
12671 );
12672 fn probe_libsais16x64_renumber_unique_and_nonunique_lms_suffixes_32s_omp(
12673 t: *mut SaSint,
12674 sa: *mut SaSint,
12675 m: SaSint,
12676 threads: SaSint,
12677 ) -> SaSint;
12678 fn probe_libsais16x64_compact_unique_and_nonunique_lms_suffixes_32s_omp(
12679 sa: *mut SaSint,
12680 n: SaSint,
12681 m: SaSint,
12682 fs: SaSint,
12683 f: SaSint,
12684 threads: SaSint,
12685 );
12686 fn probe_libsais16x64_compact_lms_suffixes_32s_omp(
12687 t: *mut SaSint,
12688 sa: *mut SaSint,
12689 n: SaSint,
12690 m: SaSint,
12691 fs: SaSint,
12692 threads: SaSint,
12693 ) -> SaSint;
12694 fn probe_libsais16x64_merge_unique_lms_suffixes_32s(
12695 t: *mut SaSint,
12696 sa: *mut SaSint,
12697 n: SaSint,
12698 m: SaSint,
12699 l: SaSint,
12700 omp_block_start: SaSint,
12701 omp_block_size: SaSint,
12702 );
12703 fn probe_libsais16x64_merge_nonunique_lms_suffixes_32s(
12704 sa: *mut SaSint,
12705 n: SaSint,
12706 m: SaSint,
12707 l: SaSint,
12708 omp_block_start: SaSint,
12709 omp_block_size: SaSint,
12710 );
12711 fn probe_libsais16x64_merge_unique_lms_suffixes_32s_omp(
12712 t: *mut SaSint,
12713 sa: *mut SaSint,
12714 n: SaSint,
12715 m: SaSint,
12716 threads: SaSint,
12717 );
12718 fn probe_libsais16x64_merge_nonunique_lms_suffixes_32s_omp(
12719 sa: *mut SaSint,
12720 n: SaSint,
12721 m: SaSint,
12722 f: SaSint,
12723 threads: SaSint,
12724 );
12725 fn probe_libsais16x64_merge_compacted_lms_suffixes_32s_omp(
12726 t: *mut SaSint,
12727 sa: *mut SaSint,
12728 n: SaSint,
12729 m: SaSint,
12730 f: SaSint,
12731 threads: SaSint,
12732 );
12733 fn probe_libsais16x64_radix_sort_lms_suffixes_32s_6k(
12734 t: *const SaSint,
12735 sa: *mut SaSint,
12736 induction_bucket: *mut SaSint,
12737 omp_block_start: SaSint,
12738 omp_block_size: SaSint,
12739 );
12740 fn probe_libsais16x64_radix_sort_lms_suffixes_32s_2k(
12741 t: *const SaSint,
12742 sa: *mut SaSint,
12743 induction_bucket: *mut SaSint,
12744 omp_block_start: SaSint,
12745 omp_block_size: SaSint,
12746 );
12747 fn probe_libsais16x64_radix_sort_lms_suffixes_32s_6k_omp(
12748 t: *const SaSint,
12749 sa: *mut SaSint,
12750 n: SaSint,
12751 m: SaSint,
12752 induction_bucket: *mut SaSint,
12753 threads: SaSint,
12754 );
12755 fn probe_libsais16x64_radix_sort_lms_suffixes_32s_2k_omp(
12756 t: *const SaSint,
12757 sa: *mut SaSint,
12758 n: SaSint,
12759 m: SaSint,
12760 induction_bucket: *mut SaSint,
12761 threads: SaSint,
12762 );
12763 fn probe_libsais16x64_radix_sort_lms_suffixes_32s_1k(
12764 t: *const SaSint,
12765 sa: *mut SaSint,
12766 n: SaSint,
12767 buckets: *mut SaSint,
12768 ) -> SaSint;
12769 fn probe_libsais16x64_radix_sort_set_markers_32s_6k(
12770 sa: *mut SaSint,
12771 induction_bucket: *mut SaSint,
12772 omp_block_start: SaSint,
12773 omp_block_size: SaSint,
12774 );
12775 fn probe_libsais16x64_radix_sort_set_markers_32s_4k(
12776 sa: *mut SaSint,
12777 induction_bucket: *mut SaSint,
12778 omp_block_start: SaSint,
12779 omp_block_size: SaSint,
12780 );
12781 fn probe_libsais16x64_radix_sort_set_markers_32s_6k_omp(
12782 sa: *mut SaSint,
12783 k: SaSint,
12784 induction_bucket: *mut SaSint,
12785 threads: SaSint,
12786 );
12787 fn probe_libsais16x64_radix_sort_set_markers_32s_4k_omp(
12788 sa: *mut SaSint,
12789 k: SaSint,
12790 induction_bucket: *mut SaSint,
12791 threads: SaSint,
12792 );
12793 fn probe_libsais16x64_place_lms_suffixes_histogram_32s_6k(
12794 sa: *mut SaSint,
12795 n: SaSint,
12796 k: SaSint,
12797 m: SaSint,
12798 buckets: *const SaSint,
12799 );
12800 fn probe_libsais16x64_place_lms_suffixes_histogram_32s_4k(
12801 sa: *mut SaSint,
12802 n: SaSint,
12803 k: SaSint,
12804 m: SaSint,
12805 buckets: *const SaSint,
12806 );
12807 fn probe_libsais16x64_place_lms_suffixes_histogram_32s_2k(
12808 sa: *mut SaSint,
12809 n: SaSint,
12810 k: SaSint,
12811 m: SaSint,
12812 buckets: *const SaSint,
12813 );
12814 fn probe_libsais16x64_gather_lms_suffixes_32s(
12815 t: *const SaSint,
12816 sa: *mut SaSint,
12817 n: SaSint,
12818 ) -> SaSint;
12819 fn probe_libsais16x64_gather_compacted_lms_suffixes_32s(
12820 t: *const SaSint,
12821 sa: *mut SaSint,
12822 n: SaSint,
12823 ) -> SaSint;
12824 fn probe_libsais16x64_count_lms_suffixes_32s_2k(
12825 t: *const SaSint,
12826 n: SaSint,
12827 k: SaSint,
12828 buckets: *mut SaSint,
12829 );
12830 fn probe_libsais16x64_count_and_gather_lms_suffixes_32s_4k(
12831 t: *const SaSint,
12832 sa: *mut SaSint,
12833 n: SaSint,
12834 k: SaSint,
12835 buckets: *mut SaSint,
12836 omp_block_start: SaSint,
12837 omp_block_size: SaSint,
12838 ) -> SaSint;
12839 fn probe_libsais16x64_count_and_gather_lms_suffixes_32s_4k_omp(
12840 t: *const SaSint,
12841 sa: *mut SaSint,
12842 n: SaSint,
12843 k: SaSint,
12844 buckets: *mut SaSint,
12845 local_buckets: SaSint,
12846 threads: SaSint,
12847 ) -> SaSint;
12848 fn probe_libsais16x64_count_suffixes_32s(
12849 t: *const SaSint,
12850 n: SaSint,
12851 k: SaSint,
12852 buckets: *mut SaSint,
12853 );
12854 fn probe_libsais16x64_initialize_buckets_start_and_end_32s_6k(
12855 k: SaSint,
12856 buckets: *mut SaSint,
12857 );
12858 fn probe_libsais16x64_initialize_buckets_start_and_end_32s_4k(
12859 k: SaSint,
12860 buckets: *mut SaSint,
12861 );
12862 fn probe_libsais16x64_initialize_buckets_end_32s_2k(k: SaSint, buckets: *mut SaSint);
12863 fn probe_libsais16x64_initialize_buckets_start_and_end_32s_2k(
12864 k: SaSint,
12865 buckets: *mut SaSint,
12866 );
12867 fn probe_libsais16x64_initialize_buckets_start_32s_1k(k: SaSint, buckets: *mut SaSint);
12868 fn probe_libsais16x64_initialize_buckets_end_32s_1k(k: SaSint, buckets: *mut SaSint);
12869 fn probe_libsais16x64_initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(
12870 t: *const SaSint,
12871 k: SaSint,
12872 buckets: *mut SaSint,
12873 first_lms_suffix: SaSint,
12874 );
12875 fn probe_libsais16x64_initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(
12876 t: *const SaSint,
12877 k: SaSint,
12878 buckets: *mut SaSint,
12879 first_lms_suffix: SaSint,
12880 ) -> SaSint;
12881 fn probe_libsais16x64_initialize_buckets_for_radix_and_partial_sorting_32s_4k(
12882 t: *const SaSint,
12883 k: SaSint,
12884 buckets: *mut SaSint,
12885 first_lms_suffix: SaSint,
12886 );
12887 fn probe_libsais16x64_place_lms_suffixes_interval_32s_4k(
12888 sa: *mut SaSint,
12889 n: SaSint,
12890 k: SaSint,
12891 m: SaSint,
12892 buckets: *const SaSint,
12893 );
12894 fn probe_libsais16x64_place_lms_suffixes_interval_32s_2k(
12895 sa: *mut SaSint,
12896 n: SaSint,
12897 k: SaSint,
12898 m: SaSint,
12899 buckets: *const SaSint,
12900 );
12901 fn probe_libsais16x64_place_lms_suffixes_interval_32s_1k(
12902 t: *const SaSint,
12903 sa: *mut SaSint,
12904 k: SaSint,
12905 m: SaSint,
12906 buckets: *mut SaSint,
12907 );
12908 fn probe_libsais16x64_renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(
12909 t: *mut SaSint,
12910 sa: *mut SaSint,
12911 n: SaSint,
12912 m: SaSint,
12913 threads: SaSint,
12914 ) -> SaSint;
12915 fn probe_libsais16x64_partial_sorting_shift_markers_32s_6k_omp(
12916 sa: *mut SaSint,
12917 k: SaSint,
12918 buckets: *const SaSint,
12919 threads: SaSint,
12920 );
12921 fn probe_libsais16x64_partial_sorting_shift_markers_32s_4k(sa: *mut SaSint, n: SaSint);
12922 fn probe_libsais16x64_partial_sorting_shift_buckets_32s_6k(k: SaSint, buckets: *mut SaSint);
12923 fn probe_libsais16x64_partial_sorting_scan_left_to_right_32s_6k(
12924 t: *const SaSint,
12925 sa: *mut SaSint,
12926 buckets: *mut SaSint,
12927 d: SaSint,
12928 omp_block_start: SaSint,
12929 omp_block_size: SaSint,
12930 ) -> SaSint;
12931 fn probe_libsais16x64_partial_sorting_scan_left_to_right_32s_4k(
12932 t: *const SaSint,
12933 sa: *mut SaSint,
12934 k: SaSint,
12935 buckets: *mut SaSint,
12936 d: SaSint,
12937 omp_block_start: SaSint,
12938 omp_block_size: SaSint,
12939 ) -> SaSint;
12940 fn probe_libsais16x64_partial_sorting_scan_left_to_right_32s_1k(
12941 t: *const SaSint,
12942 sa: *mut SaSint,
12943 buckets: *mut SaSint,
12944 omp_block_start: SaSint,
12945 omp_block_size: SaSint,
12946 );
12947 fn probe_libsais16x64_partial_sorting_scan_left_to_right_32s_6k_omp(
12948 t: *const SaSint,
12949 sa: *mut SaSint,
12950 n: SaSint,
12951 buckets: *mut SaSint,
12952 left_suffixes_count: SaSint,
12953 d: SaSint,
12954 threads: SaSint,
12955 ) -> SaSint;
12956 fn probe_libsais16x64_partial_sorting_scan_left_to_right_32s_4k_omp(
12957 t: *const SaSint,
12958 sa: *mut SaSint,
12959 n: SaSint,
12960 k: SaSint,
12961 buckets: *mut SaSint,
12962 d: SaSint,
12963 threads: SaSint,
12964 ) -> SaSint;
12965 fn probe_libsais16x64_partial_sorting_scan_left_to_right_32s_1k_omp(
12966 t: *const SaSint,
12967 sa: *mut SaSint,
12968 n: SaSint,
12969 buckets: *mut SaSint,
12970 threads: SaSint,
12971 );
12972 fn probe_libsais16x64_partial_sorting_scan_right_to_left_32s_6k(
12973 t: *const SaSint,
12974 sa: *mut SaSint,
12975 buckets: *mut SaSint,
12976 d: SaSint,
12977 omp_block_start: SaSint,
12978 omp_block_size: SaSint,
12979 ) -> SaSint;
12980 fn probe_libsais16x64_partial_sorting_scan_right_to_left_32s_4k(
12981 t: *const SaSint,
12982 sa: *mut SaSint,
12983 k: SaSint,
12984 buckets: *mut SaSint,
12985 d: SaSint,
12986 omp_block_start: SaSint,
12987 omp_block_size: SaSint,
12988 ) -> SaSint;
12989 fn probe_libsais16x64_partial_sorting_scan_right_to_left_32s_1k(
12990 t: *const SaSint,
12991 sa: *mut SaSint,
12992 buckets: *mut SaSint,
12993 omp_block_start: SaSint,
12994 omp_block_size: SaSint,
12995 );
12996 fn probe_libsais16x64_partial_sorting_scan_right_to_left_32s_6k_omp(
12997 t: *const SaSint,
12998 sa: *mut SaSint,
12999 n: SaSint,
13000 buckets: *mut SaSint,
13001 first_lms_suffix: SaSint,
13002 left_suffixes_count: SaSint,
13003 d: SaSint,
13004 threads: SaSint,
13005 ) -> SaSint;
13006 fn probe_libsais16x64_partial_sorting_scan_right_to_left_32s_4k_omp(
13007 t: *const SaSint,
13008 sa: *mut SaSint,
13009 n: SaSint,
13010 k: SaSint,
13011 buckets: *mut SaSint,
13012 d: SaSint,
13013 threads: SaSint,
13014 ) -> SaSint;
13015 fn probe_libsais16x64_partial_sorting_scan_right_to_left_32s_1k_omp(
13016 t: *const SaSint,
13017 sa: *mut SaSint,
13018 n: SaSint,
13019 buckets: *mut SaSint,
13020 threads: SaSint,
13021 );
13022 fn probe_libsais16x64_partial_sorting_gather_lms_suffixes_32s_4k(
13023 sa: *mut SaSint,
13024 omp_block_start: SaSint,
13025 omp_block_size: SaSint,
13026 ) -> SaSint;
13027 fn probe_libsais16x64_partial_sorting_gather_lms_suffixes_32s_1k(
13028 sa: *mut SaSint,
13029 omp_block_start: SaSint,
13030 omp_block_size: SaSint,
13031 ) -> SaSint;
13032 fn probe_libsais16x64_partial_sorting_gather_lms_suffixes_32s_4k_omp(
13033 sa: *mut SaSint,
13034 n: SaSint,
13035 threads: SaSint,
13036 );
13037 fn probe_libsais16x64_partial_sorting_gather_lms_suffixes_32s_1k_omp(
13038 sa: *mut SaSint,
13039 n: SaSint,
13040 threads: SaSint,
13041 );
13042 fn probe_libsais16x64_count_and_gather_lms_suffixes_32s_2k(
13043 t: *const SaSint,
13044 sa: *mut SaSint,
13045 n: SaSint,
13046 k: SaSint,
13047 buckets: *mut SaSint,
13048 omp_block_start: SaSint,
13049 omp_block_size: SaSint,
13050 ) -> SaSint;
13051 fn probe_libsais16x64_count_and_gather_compacted_lms_suffixes_32s_2k(
13052 t: *const SaSint,
13053 sa: *mut SaSint,
13054 n: SaSint,
13055 k: SaSint,
13056 buckets: *mut SaSint,
13057 omp_block_start: SaSint,
13058 omp_block_size: SaSint,
13059 ) -> SaSint;
13060 fn probe_libsais16x64_count_and_gather_lms_suffixes_32s_2k_omp(
13061 t: *const SaSint,
13062 sa: *mut SaSint,
13063 n: SaSint,
13064 k: SaSint,
13065 buckets: *mut SaSint,
13066 local_buckets: SaSint,
13067 threads: SaSint,
13068 ) -> SaSint;
13069 fn probe_libsais16x64_count_and_gather_compacted_lms_suffixes_32s_2k_omp(
13070 t: *const SaSint,
13071 sa: *mut SaSint,
13072 n: SaSint,
13073 k: SaSint,
13074 buckets: *mut SaSint,
13075 local_buckets: SaSint,
13076 threads: SaSint,
13077 );
13078 fn probe_libsais16x64_reconstruct_compacted_lms_suffixes_32s_2k_omp(
13079 t: *mut SaSint,
13080 sa: *mut SaSint,
13081 n: SaSint,
13082 k: SaSint,
13083 m: SaSint,
13084 fs: SaSint,
13085 f: SaSint,
13086 buckets: *mut SaSint,
13087 local_buckets: SaSint,
13088 threads: SaSint,
13089 );
13090 fn probe_libsais16x64_reconstruct_compacted_lms_suffixes_32s_1k_omp(
13091 t: *mut SaSint,
13092 sa: *mut SaSint,
13093 n: SaSint,
13094 m: SaSint,
13095 fs: SaSint,
13096 f: SaSint,
13097 threads: SaSint,
13098 );
13099 }
13100
13101 fn brute_sa(t: &[u16]) -> Vec<SaSint> {
13102 let mut sa: Vec<_> = (0..t.len() as SaSint).collect();
13103 sa.sort_by(|&a, &b| t[a as usize..].cmp(&t[b as usize..]));
13104 sa
13105 }
13106
13107 #[test]
13108 fn libsais16x64_gather_lms_suffixes_16u_matches_c() {
13109 let cases: &[&[u16]] = &[
13110 &[2, 1, 3, 1, 2, 0],
13111 &[7, 7, 7, 7, 0],
13112 &[3, 1, 2, 1, 0, 4, 1, 0],
13113 &[9, 1, 9, 1, 9, 0, 2, 2, 0],
13114 ];
13115
13116 for &text in cases {
13117 let n = text.len() as SaSint;
13118 let mut rust_sa = vec![-99; text.len()];
13119 let mut c_sa = rust_sa.clone();
13120
13121 gather_lms_suffixes_16u(text, &mut rust_sa, n, n - 1, 0, n);
13122 unsafe {
13123 probe_libsais16x64_gather_lms_suffixes_16u(
13124 text.as_ptr(),
13125 c_sa.as_mut_ptr(),
13126 n,
13127 n - 1,
13128 0,
13129 n,
13130 );
13131 }
13132
13133 assert_eq!(rust_sa, c_sa);
13134 }
13135 }
13136
13137 #[test]
13138 fn libsais16x64_count_and_gather_lms_suffixes_16u_matches_c() {
13139 let cases: &[&[u16]] = &[
13140 &[2, 1, 3, 1, 2, 0],
13141 &[7, 7, 7, 7, 0],
13142 &[3, 1, 2, 1, 0, 4, 1, 0],
13143 &[9, 1, 9, 1, 9, 0, 2, 2, 0],
13144 ];
13145
13146 for &text in cases {
13147 let n = text.len() as SaSint;
13148 let mut rust_sa = vec![-99; text.len()];
13149 let mut c_sa = rust_sa.clone();
13150 let mut rust_buckets = vec![-1; 4 * ALPHABET_SIZE];
13151 let mut c_buckets = rust_buckets.clone();
13152
13153 let rust_m =
13154 count_and_gather_lms_suffixes_16u(text, &mut rust_sa, n, &mut rust_buckets, 0, n);
13155 let c_m = unsafe {
13156 probe_libsais16x64_count_and_gather_lms_suffixes_16u(
13157 text.as_ptr(),
13158 c_sa.as_mut_ptr(),
13159 n,
13160 c_buckets.as_mut_ptr(),
13161 0,
13162 n,
13163 )
13164 };
13165
13166 assert_eq!(rust_m, c_m);
13167 assert_eq!(rust_sa, c_sa);
13168 assert_eq!(rust_buckets, c_buckets);
13169 }
13170 }
13171
13172 #[test]
13173 fn libsais16x64_initialize_buckets_start_and_end_16u_matches_c() {
13174 let mut rust_buckets = vec![0; 8 * ALPHABET_SIZE];
13175 for (symbol, counts) in [
13176 (0usize, [1, 0, 0, 2]),
13177 (1, [0, 3, 1, 0]),
13178 (7, [2, 1, 0, 1]),
13179 (1024, [0, 0, 5, 0]),
13180 ] {
13181 for state in 0..4 {
13182 rust_buckets[buckets_index4(symbol, state)] = counts[state];
13183 }
13184 }
13185 let mut c_buckets = rust_buckets.clone();
13186 let mut rust_freq = vec![-1; ALPHABET_SIZE];
13187 let mut c_freq = rust_freq.clone();
13188
13189 let rust_k = initialize_buckets_start_and_end_16u(&mut rust_buckets, Some(&mut rust_freq));
13190 let c_k = unsafe {
13191 probe_libsais16x64_initialize_buckets_start_and_end_16u(
13192 c_buckets.as_mut_ptr(),
13193 c_freq.as_mut_ptr(),
13194 )
13195 };
13196
13197 assert_eq!(rust_k, c_k);
13198 assert_eq!(rust_buckets, c_buckets);
13199 assert_eq!(rust_freq, c_freq);
13200
13201 let mut rust_buckets_no_freq = vec![0; 8 * ALPHABET_SIZE];
13202 rust_buckets_no_freq[..4 * ALPHABET_SIZE]
13203 .copy_from_slice(&rust_buckets[..4 * ALPHABET_SIZE]);
13204 let mut c_buckets_no_freq = rust_buckets_no_freq.clone();
13205
13206 let rust_k = initialize_buckets_start_and_end_16u(&mut rust_buckets_no_freq, None);
13207 let c_k = unsafe {
13208 probe_libsais16x64_initialize_buckets_start_and_end_16u(
13209 c_buckets_no_freq.as_mut_ptr(),
13210 std::ptr::null_mut(),
13211 )
13212 };
13213
13214 assert_eq!(rust_k, c_k);
13215 assert_eq!(rust_buckets_no_freq, c_buckets_no_freq);
13216 }
13217
13218 #[test]
13219 fn libsais16x64_lms_radix_bucket_initialization_matches_c() {
13220 let text = [3, 1, 2, 1, 0, 4, 1, 0];
13221 let n = text.len() as SaSint;
13222 let mut rust_sa = vec![-99; text.len()];
13223 let mut rust_buckets = vec![0; 8 * ALPHABET_SIZE];
13224 let m = count_and_gather_lms_suffixes_16u(
13225 &text,
13226 &mut rust_sa,
13227 n,
13228 &mut rust_buckets[..4 * ALPHABET_SIZE],
13229 0,
13230 n,
13231 );
13232 initialize_buckets_start_and_end_16u(&mut rust_buckets, None);
13233 let first_lms_suffix = rust_sa[(n - m) as usize];
13234
13235 let mut c_buckets = rust_buckets.clone();
13236 let rust_count = initialize_buckets_for_lms_suffixes_radix_sort_16u(
13237 &text,
13238 &mut rust_buckets,
13239 first_lms_suffix,
13240 );
13241 let c_count = unsafe {
13242 probe_libsais16x64_initialize_buckets_for_lms_suffixes_radix_sort_16u(
13243 text.as_ptr(),
13244 c_buckets.as_mut_ptr(),
13245 first_lms_suffix,
13246 )
13247 };
13248
13249 assert_eq!(rust_count, c_count);
13250 assert_eq!(rust_buckets, c_buckets);
13251 }
13252
13253 #[test]
13254 fn libsais16x64_radix_sort_lms_suffixes_16u_matches_c() {
13255 let text = [3, 1, 2, 1, 0, 4, 1, 0];
13256 let n = text.len() as SaSint;
13257 let mut rust_sa = vec![-99; text.len()];
13258 let mut rust_buckets = vec![0; 8 * ALPHABET_SIZE];
13259 let m = count_and_gather_lms_suffixes_16u(
13260 &text,
13261 &mut rust_sa,
13262 n,
13263 &mut rust_buckets[..4 * ALPHABET_SIZE],
13264 0,
13265 n,
13266 );
13267 initialize_buckets_start_and_end_16u(&mut rust_buckets, None);
13268 let first_lms_suffix = rust_sa[(n - m) as usize];
13269 initialize_buckets_for_lms_suffixes_radix_sort_16u(
13270 &text,
13271 &mut rust_buckets,
13272 first_lms_suffix,
13273 );
13274
13275 let mut c_sa = rust_sa.clone();
13276 let mut c_buckets = rust_buckets.clone();
13277 {
13278 let induction_bucket = &mut rust_buckets[4 * ALPHABET_SIZE..];
13279 radix_sort_lms_suffixes_16u(&text, &mut rust_sa, induction_bucket, n - m + 1, m - 1);
13280 }
13281 unsafe {
13282 probe_libsais16x64_radix_sort_lms_suffixes_16u(
13283 text.as_ptr(),
13284 c_sa.as_mut_ptr(),
13285 c_buckets[4 * ALPHABET_SIZE..].as_mut_ptr(),
13286 n - m + 1,
13287 m - 1,
13288 );
13289 }
13290
13291 assert_eq!(rust_sa, c_sa);
13292 assert_eq!(rust_buckets, c_buckets);
13293 }
13294
13295 #[test]
13296 fn libsais16x64_initialize_buckets_for_partial_sorting_16u_matches_c() {
13297 let text = [3, 1, 2, 1, 0, 4, 1, 0];
13298 let n = text.len() as SaSint;
13299 let mut rust_sa = vec![-99; text.len()];
13300 let mut rust_buckets = vec![0; 8 * ALPHABET_SIZE];
13301 let m = count_and_gather_lms_suffixes_16u(
13302 &text,
13303 &mut rust_sa,
13304 n,
13305 &mut rust_buckets[..4 * ALPHABET_SIZE],
13306 0,
13307 n,
13308 );
13309 initialize_buckets_start_and_end_16u(&mut rust_buckets, None);
13310 let first_lms_suffix = rust_sa[(n - m) as usize];
13311 let left_suffixes_count = initialize_buckets_for_lms_suffixes_radix_sort_16u(
13312 &text,
13313 &mut rust_buckets,
13314 first_lms_suffix,
13315 );
13316 let mut c_buckets = rust_buckets.clone();
13317
13318 initialize_buckets_for_partial_sorting_16u(
13319 &text,
13320 &mut rust_buckets,
13321 first_lms_suffix,
13322 left_suffixes_count,
13323 );
13324 unsafe {
13325 probe_libsais16x64_initialize_buckets_for_partial_sorting_16u(
13326 text.as_ptr(),
13327 c_buckets.as_mut_ptr(),
13328 first_lms_suffix,
13329 left_suffixes_count,
13330 );
13331 }
13332
13333 assert_eq!(rust_buckets, c_buckets);
13334 }
13335
13336 fn partial_scan_fixture() -> ([u16; 10], Vec<SaSint>, Vec<SaSint>) {
13337 let text = [1, 0, 2, 1, 3, 0, 2, 4, 1, 0];
13338 let mut sa = vec![0; 128];
13339 sa[..5].copy_from_slice(&[3, 5 | SAINT_MIN, 7, 2, 9 | SAINT_MIN]);
13340
13341 let mut buckets = vec![0; 6 * ALPHABET_SIZE];
13342 for v in 0..32 {
13343 buckets[v] = 80 + (v as SaSint) * 4;
13344 buckets[2 * ALPHABET_SIZE + v] = if v % 3 == 0 { 2 } else { 0 };
13345 buckets[4 * ALPHABET_SIZE + v] = 20 + (v as SaSint) * 4;
13346 }
13347
13348 (text, sa, buckets)
13349 }
13350
13351 #[test]
13352 fn libsais16x64_partial_sorting_scan_left_to_right_16u_matches_c() {
13353 let (text, mut rust_sa, mut rust_buckets) = partial_scan_fixture();
13354 let mut c_sa = rust_sa.clone();
13355 let mut c_buckets = rust_buckets.clone();
13356
13357 let rust_d =
13358 partial_sorting_scan_left_to_right_16u(&text, &mut rust_sa, &mut rust_buckets, 3, 0, 5);
13359 let c_d = unsafe {
13360 probe_libsais16x64_partial_sorting_scan_left_to_right_16u(
13361 text.as_ptr(),
13362 c_sa.as_mut_ptr(),
13363 c_buckets.as_mut_ptr(),
13364 3,
13365 0,
13366 5,
13367 )
13368 };
13369
13370 assert_eq!(rust_d, c_d);
13371 assert_eq!(rust_sa, c_sa);
13372 assert_eq!(rust_buckets, c_buckets);
13373 }
13374
13375 #[test]
13376 fn libsais16x64_partial_sorting_scan_right_to_left_16u_matches_c() {
13377 let (text, mut rust_sa, mut rust_buckets) = partial_scan_fixture();
13378 let mut c_sa = rust_sa.clone();
13379 let mut c_buckets = rust_buckets.clone();
13380
13381 let rust_d =
13382 partial_sorting_scan_right_to_left_16u(&text, &mut rust_sa, &mut rust_buckets, 3, 0, 5);
13383 let c_d = unsafe {
13384 probe_libsais16x64_partial_sorting_scan_right_to_left_16u(
13385 text.as_ptr(),
13386 c_sa.as_mut_ptr(),
13387 c_buckets.as_mut_ptr(),
13388 3,
13389 0,
13390 5,
13391 )
13392 };
13393
13394 assert_eq!(rust_d, c_d);
13395 assert_eq!(rust_sa, c_sa);
13396 assert_eq!(rust_buckets, c_buckets);
13397 }
13398
13399 #[test]
13400 fn libsais16x64_partial_gsa_scan_right_to_left_16u_matches_c() {
13401 let (text, mut rust_sa, mut rust_buckets) = partial_scan_fixture();
13402 let mut c_sa = rust_sa.clone();
13403 let mut c_buckets = rust_buckets.clone();
13404
13405 let rust_d =
13406 partial_gsa_scan_right_to_left_16u(&text, &mut rust_sa, &mut rust_buckets, 3, 0, 5);
13407 let c_d = unsafe {
13408 probe_libsais16x64_partial_gsa_scan_right_to_left_16u(
13409 text.as_ptr(),
13410 c_sa.as_mut_ptr(),
13411 c_buckets.as_mut_ptr(),
13412 3,
13413 0,
13414 5,
13415 )
13416 };
13417
13418 assert_eq!(rust_d, c_d);
13419 assert_eq!(rust_sa, c_sa);
13420 assert_eq!(rust_buckets, c_buckets);
13421 }
13422
13423 #[test]
13424 fn libsais16x64_partial_sorting_shift_markers_16u_matches_c() {
13425 let mut rust_sa = vec![0; 16];
13426 rust_sa[2..6].copy_from_slice(&[1, 2 | SAINT_MIN, 3 | SAINT_MIN, 4]);
13427 rust_sa[8..12].copy_from_slice(&[5 | SAINT_MIN, 6, 7 | SAINT_MIN, 8]);
13428 let mut c_sa = rust_sa.clone();
13429
13430 let mut buckets = vec![0; 6 * ALPHABET_SIZE];
13431 buckets[0] = 2;
13432 buckets[2] = 8;
13433 buckets[4 * ALPHABET_SIZE + 2] = 6;
13434 buckets[4 * ALPHABET_SIZE + 4] = 12;
13435
13436 let n = rust_sa.len() as SaSint;
13437 partial_sorting_shift_markers_16u_omp(&mut rust_sa, n, &buckets, 1);
13438 unsafe {
13439 probe_libsais16x64_partial_sorting_shift_markers_16u_omp(
13440 c_sa.as_mut_ptr(),
13441 c_sa.len() as SaSint,
13442 buckets.as_ptr(),
13443 1,
13444 );
13445 }
13446
13447 assert_eq!(rust_sa, c_sa);
13448 }
13449
13450 #[test]
13451 fn libsais16x64_partial_left_to_right_16u_block_omp_uses_cache_pipeline() {
13452 let block_size = 65_536usize;
13453 let k = 512usize;
13454 let text: Vec<u16> = (0..block_size + 2)
13455 .map(|i| 1 + ((i * 17 + i / 7) % (k - 1)) as u16)
13456 .collect();
13457 let sa_len = block_size + 2 * k * 100;
13458 let mut base_sa = vec![0; sa_len];
13459 for (i, slot) in base_sa.iter_mut().take(block_size).enumerate() {
13460 *slot = (i + 2) as SaSint;
13461 }
13462 let mut base_buckets = vec![0; 8 * ALPHABET_SIZE];
13463 for v in 0..2 * k {
13464 base_buckets[4 * ALPHABET_SIZE + v] = (block_size + v * 100) as SaSint;
13465 }
13466
13467 let mut scalar_sa = base_sa.clone();
13468 let mut threaded_sa = base_sa;
13469 let mut scalar_buckets = base_buckets.clone();
13470 let mut threaded_buckets = base_buckets;
13471 let mut thread_state = alloc_thread_state(4).unwrap();
13472 let scalar_d = partial_sorting_scan_left_to_right_16u(
13473 &text,
13474 &mut scalar_sa,
13475 &mut scalar_buckets,
13476 0,
13477 0,
13478 block_size as SaSint,
13479 );
13480 let threaded_d = partial_sorting_scan_left_to_right_16u_block_omp(
13481 &text,
13482 &mut threaded_sa,
13483 k as SaSint,
13484 &mut threaded_buckets,
13485 0,
13486 0,
13487 block_size as SaSint,
13488 4,
13489 &mut thread_state,
13490 );
13491
13492 assert_eq!(threaded_d, scalar_d);
13493 assert_eq!(threaded_sa, scalar_sa);
13494 assert_eq!(threaded_buckets, scalar_buckets);
13495 }
13496
13497 #[test]
13498 fn libsais16x64_partial_left_to_right_16u_omp_uses_block_pipeline() {
13499 let block_size = 65_536usize;
13500 let k = 512usize;
13501 let text: Vec<u16> = (0..block_size + 2)
13502 .map(|i| 1 + ((i * 17 + i / 7) % (k - 1)) as u16)
13503 .collect();
13504 let sa_len = block_size + 2 * k * 100;
13505 let mut base_sa = vec![0; sa_len];
13506 for (i, slot) in base_sa.iter_mut().take(block_size).enumerate() {
13507 let value = (i + 2) as SaSint;
13508 *slot = if i % 17 == 0 {
13509 value | SAINT_MIN
13510 } else {
13511 value
13512 };
13513 }
13514 let mut base_buckets = vec![0; 8 * ALPHABET_SIZE];
13515 for v in 0..2 * k {
13516 base_buckets[4 * ALPHABET_SIZE + v] = (block_size + v * 100) as SaSint;
13517 base_buckets[2 * ALPHABET_SIZE + v] = if v % 5 == 0 { 3 } else { 0 };
13518 }
13519
13520 let mut scalar_sa = base_sa.clone();
13521 let mut threaded_sa = base_sa;
13522 let mut scalar_buckets = base_buckets.clone();
13523 let mut threaded_buckets = base_buckets;
13524 let scalar_d = partial_sorting_scan_left_to_right_16u_omp(
13525 &text,
13526 &mut scalar_sa,
13527 text.len() as SaSint,
13528 k as SaSint,
13529 &mut scalar_buckets,
13530 block_size as SaSint,
13531 7,
13532 1,
13533 );
13534 let threaded_d = partial_sorting_scan_left_to_right_16u_omp(
13535 &text,
13536 &mut threaded_sa,
13537 text.len() as SaSint,
13538 k as SaSint,
13539 &mut threaded_buckets,
13540 block_size as SaSint,
13541 7,
13542 4,
13543 );
13544
13545 assert_eq!(threaded_d, scalar_d);
13546 assert_eq!(threaded_sa, scalar_sa);
13547 assert_eq!(threaded_buckets, scalar_buckets);
13548 }
13549
13550 #[test]
13551 fn libsais16x64_partial_right_to_left_16u_block_omp_uses_cache_pipeline() {
13552 let block_size = 65_536usize;
13553 let k = 512usize;
13554 let width = 2 * k;
13555 let block_start = width * 200 + 1024;
13556 let text: Vec<u16> = (0..block_size + 2)
13557 .map(|i| 1 + ((i * 17 + i / 7) % (k - 1)) as u16)
13558 .collect();
13559 let sa_len = block_start + block_size + 1;
13560 let mut base_sa = vec![0; sa_len];
13561 for i in 0..block_size {
13562 let value = (i + 2) as SaSint;
13563 base_sa[block_start + i] = if i % 17 == 0 {
13564 value | SAINT_MIN
13565 } else {
13566 value
13567 };
13568 }
13569 let mut base_buckets = vec![0; 8 * ALPHABET_SIZE];
13570 for v in 0..width {
13571 base_buckets[v] = ((v + 1) * 200) as SaSint;
13572 base_buckets[2 * ALPHABET_SIZE + v] = if v % 5 == 0 { 3 } else { 0 };
13573 }
13574
13575 let mut scalar_sa = base_sa.clone();
13576 let mut threaded_sa = base_sa.clone();
13577 let mut scalar_buckets = base_buckets.clone();
13578 let mut threaded_buckets = base_buckets.clone();
13579 let mut thread_state = alloc_thread_state(4).unwrap();
13580 let scalar_d = partial_sorting_scan_right_to_left_16u(
13581 &text,
13582 &mut scalar_sa,
13583 &mut scalar_buckets,
13584 7,
13585 block_start as SaSint,
13586 block_size as SaSint,
13587 );
13588 let threaded_d = partial_sorting_scan_right_to_left_16u_block_omp(
13589 &text,
13590 &mut threaded_sa,
13591 k as SaSint,
13592 &mut threaded_buckets,
13593 7,
13594 block_start as SaSint,
13595 block_size as SaSint,
13596 4,
13597 &mut thread_state,
13598 );
13599 assert_eq!(threaded_d, scalar_d);
13600 assert_eq!(threaded_sa, scalar_sa);
13601 assert_eq!(threaded_buckets, scalar_buckets);
13602
13603 let mut scalar_sa = base_sa;
13604 let mut threaded_sa = scalar_sa.clone();
13605 let mut scalar_buckets = base_buckets.clone();
13606 let mut threaded_buckets = base_buckets;
13607 let scalar_d = partial_gsa_scan_right_to_left_16u(
13608 &text,
13609 &mut scalar_sa,
13610 &mut scalar_buckets,
13611 7,
13612 block_start as SaSint,
13613 block_size as SaSint,
13614 );
13615 let threaded_d = partial_gsa_scan_right_to_left_16u_block_omp(
13616 &text,
13617 &mut threaded_sa,
13618 k as SaSint,
13619 &mut threaded_buckets,
13620 7,
13621 block_start as SaSint,
13622 block_size as SaSint,
13623 4,
13624 &mut thread_state,
13625 );
13626 assert_eq!(threaded_d, scalar_d);
13627 assert_eq!(threaded_sa, scalar_sa);
13628 assert_eq!(threaded_buckets, scalar_buckets);
13629 }
13630
13631 #[test]
13632 fn libsais16x64_partial_right_to_left_16u_omp_uses_block_pipeline() {
13633 let block_size = 65_536usize;
13634 let k = 512usize;
13635 let width = 2 * k;
13636 let block_start = width * 200 + 1024;
13637 let text: Vec<u16> = (0..block_size + 2)
13638 .map(|i| 1 + ((i * 17 + i / 7) % (k - 1)) as u16)
13639 .collect();
13640 let sa_len = block_start + block_size + 1;
13641 let n = sa_len as SaSint;
13642 let first_lms_suffix = n - (block_start + block_size) as SaSint;
13643 let left_suffixes_count = block_start as SaSint - 1;
13644 let mut base_sa = vec![0; sa_len];
13645 for i in 0..block_size {
13646 let value = (i + 2) as SaSint;
13647 base_sa[block_start + i] = if i % 17 == 0 {
13648 value | SAINT_MIN
13649 } else {
13650 value
13651 };
13652 }
13653 let mut base_buckets = vec![0; 8 * ALPHABET_SIZE];
13654 for v in 0..width {
13655 base_buckets[v] = ((v + 1) * 200) as SaSint;
13656 base_buckets[2 * ALPHABET_SIZE + v] = if v % 5 == 0 { 3 } else { 0 };
13657 }
13658
13659 let mut scalar_sa = base_sa.clone();
13660 let mut threaded_sa = base_sa.clone();
13661 let mut scalar_buckets = base_buckets.clone();
13662 let mut threaded_buckets = base_buckets.clone();
13663 partial_sorting_scan_right_to_left_16u_omp(
13664 &text,
13665 &mut scalar_sa,
13666 n,
13667 k as SaSint,
13668 &mut scalar_buckets,
13669 first_lms_suffix,
13670 left_suffixes_count,
13671 7,
13672 1,
13673 );
13674 partial_sorting_scan_right_to_left_16u_omp(
13675 &text,
13676 &mut threaded_sa,
13677 n,
13678 k as SaSint,
13679 &mut threaded_buckets,
13680 first_lms_suffix,
13681 left_suffixes_count,
13682 7,
13683 4,
13684 );
13685 assert_eq!(threaded_sa, scalar_sa);
13686 assert_eq!(threaded_buckets, scalar_buckets);
13687
13688 let mut scalar_sa = base_sa;
13689 let mut threaded_sa = scalar_sa.clone();
13690 let mut scalar_buckets = base_buckets.clone();
13691 let mut threaded_buckets = base_buckets;
13692 partial_gsa_scan_right_to_left_16u_omp(
13693 &text,
13694 &mut scalar_sa,
13695 n,
13696 k as SaSint,
13697 &mut scalar_buckets,
13698 first_lms_suffix,
13699 left_suffixes_count,
13700 7,
13701 1,
13702 );
13703 partial_gsa_scan_right_to_left_16u_omp(
13704 &text,
13705 &mut threaded_sa,
13706 n,
13707 k as SaSint,
13708 &mut threaded_buckets,
13709 first_lms_suffix,
13710 left_suffixes_count,
13711 7,
13712 4,
13713 );
13714 assert_eq!(threaded_sa, scalar_sa);
13715 assert_eq!(threaded_buckets, scalar_buckets);
13716 }
13717
13718 fn final_scan_fixture() -> ([u16; 10], Vec<SaSint>, Vec<SaSint>) {
13719 let text = [1, 0, 2, 1, 3, 0, 2, 4, 1, 0];
13720 let mut sa = vec![0; 96];
13721 sa[..6].copy_from_slice(&[3, 0, 5 | SAINT_MIN, 7, 2, 9 | SAINT_MIN]);
13722
13723 let mut induction_bucket = vec![0; ALPHABET_SIZE];
13724 for c in 0..8 {
13725 induction_bucket[c] = 24 + (c as SaSint) * 6;
13726 }
13727
13728 (text, sa, induction_bucket)
13729 }
13730
13731 fn final_order_buckets(induction_bucket: &[SaSint]) -> Vec<SaSint> {
13732 let mut buckets = vec![0; 8 * ALPHABET_SIZE];
13733 buckets[6 * ALPHABET_SIZE..7 * ALPHABET_SIZE].copy_from_slice(induction_bucket);
13734 buckets[7 * ALPHABET_SIZE..8 * ALPHABET_SIZE].copy_from_slice(induction_bucket);
13735 buckets
13736 }
13737
13738 #[test]
13739 fn libsais16x64_final_sorting_scan_left_to_right_16u_matches_c() {
13740 let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
13741 let mut c_sa = rust_sa.clone();
13742 let mut c_bucket = rust_bucket.clone();
13743
13744 final_sorting_scan_left_to_right_16u(&text, &mut rust_sa, &mut rust_bucket, 0, 6);
13745 unsafe {
13746 probe_libsais16x64_final_sorting_scan_left_to_right_16u(
13747 text.as_ptr(),
13748 c_sa.as_mut_ptr(),
13749 c_bucket.as_mut_ptr(),
13750 0,
13751 6,
13752 );
13753 }
13754
13755 assert_eq!(rust_sa, c_sa);
13756 assert_eq!(rust_bucket, c_bucket);
13757 }
13758
13759 #[test]
13760 fn libsais16x64_final_sorting_scan_right_to_left_16u_matches_c() {
13761 let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
13762 let mut c_sa = rust_sa.clone();
13763 let mut c_bucket = rust_bucket.clone();
13764
13765 final_sorting_scan_right_to_left_16u(&text, &mut rust_sa, &mut rust_bucket, 0, 6);
13766 unsafe {
13767 probe_libsais16x64_final_sorting_scan_right_to_left_16u(
13768 text.as_ptr(),
13769 c_sa.as_mut_ptr(),
13770 c_bucket.as_mut_ptr(),
13771 0,
13772 6,
13773 );
13774 }
13775
13776 assert_eq!(rust_sa, c_sa);
13777 assert_eq!(rust_bucket, c_bucket);
13778 }
13779
13780 #[test]
13781 fn libsais16x64_final_gsa_scan_right_to_left_16u_matches_c() {
13782 let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
13783 let mut c_sa = rust_sa.clone();
13784 let mut c_bucket = rust_bucket.clone();
13785
13786 final_gsa_scan_right_to_left_16u(&text, &mut rust_sa, &mut rust_bucket, 0, 6);
13787 unsafe {
13788 probe_libsais16x64_final_gsa_scan_right_to_left_16u(
13789 text.as_ptr(),
13790 c_sa.as_mut_ptr(),
13791 c_bucket.as_mut_ptr(),
13792 0,
13793 6,
13794 );
13795 }
13796
13797 assert_eq!(rust_sa, c_sa);
13798 assert_eq!(rust_bucket, c_bucket);
13799 }
13800
13801 #[test]
13802 fn libsais16x64_final_sorting_32s_helpers_behave_like_upstream_shapes() {
13803 let t = vec![0, 1, 2, 1, 0, 1, 2, 1, 0];
13804
13805 let mut rust_sa = vec![1, 0, 0];
13806 let mut rust_bucket = vec![0, 1, 3];
13807 let mut c_sa = rust_sa.clone();
13808 let mut c_bucket = rust_bucket.clone();
13809 final_sorting_scan_left_to_right_32s(&t, &mut rust_sa, &mut rust_bucket, 0, 1);
13810 unsafe {
13811 probe_libsais16x64_final_sorting_scan_left_to_right_32s(
13812 t.as_ptr(),
13813 c_sa.as_mut_ptr(),
13814 c_bucket.as_mut_ptr(),
13815 0,
13816 1,
13817 );
13818 }
13819 assert_eq!(rust_sa, c_sa);
13820 assert_eq!(rust_bucket, c_bucket);
13821
13822 let mut rust_sa = vec![0, 2, 0];
13823 let mut rust_bucket = vec![1, 2, 3];
13824 let mut c_sa = rust_sa.clone();
13825 let mut c_bucket = rust_bucket.clone();
13826 final_sorting_scan_right_to_left_32s(&t, &mut rust_sa, &mut rust_bucket, 0, 2);
13827 unsafe {
13828 probe_libsais16x64_final_sorting_scan_right_to_left_32s(
13829 t.as_ptr(),
13830 c_sa.as_mut_ptr(),
13831 c_bucket.as_mut_ptr(),
13832 0,
13833 2,
13834 );
13835 }
13836 assert_eq!(rust_sa, c_sa);
13837 assert_eq!(rust_bucket, c_bucket);
13838
13839 let mut sa = vec![1, 2, 0, 0];
13840 let mut induction_bucket = vec![0, 1, 3];
13841 let mut cache = vec![ThreadCache::default(); PER_THREAD_CACHE_SIZE];
13842 final_sorting_scan_left_to_right_32s_block_omp(
13843 &t,
13844 &mut sa,
13845 &mut induction_bucket,
13846 &mut cache,
13847 0,
13848 2,
13849 2,
13850 );
13851 assert_eq!(sa[0] & SAINT_MAX, 0);
13852 assert_eq!(sa[1] & SAINT_MAX, 1);
13853 assert_eq!(induction_bucket[0], 1);
13854 assert_eq!(induction_bucket[1], 2);
13855
13856 let mut sa = vec![0, 2, 0, 0];
13857 let mut induction_bucket = vec![1, 2, 3];
13858 let mut cache = vec![ThreadCache::default(); PER_THREAD_CACHE_SIZE];
13859 final_sorting_scan_right_to_left_32s_block_omp(
13860 &t,
13861 &mut sa,
13862 &mut induction_bucket,
13863 &mut cache,
13864 0,
13865 2,
13866 2,
13867 );
13868 assert_eq!(sa[1] & SAINT_MAX, 1);
13869 assert_eq!(induction_bucket[1], 1);
13870 }
13871
13872 #[test]
13873 fn libsais16x64_final_left_to_right_16u_block_omp_uses_cache_pipeline() {
13874 let block_size = 65_536usize;
13875 let k = 512usize;
13876 let text: Vec<u16> = (0..=block_size).map(|i| 1 + (i % (k - 1)) as u16).collect();
13877 let sa_len = block_size + k * 200;
13878 let mut base_sa = vec![0; sa_len];
13879 for (i, slot) in base_sa.iter_mut().take(block_size).enumerate() {
13880 *slot = (i + 1) as SaSint;
13881 }
13882 let mut base_bucket = vec![0; k];
13883 for c in 0..k {
13884 base_bucket[c] = (block_size + c * 200) as SaSint;
13885 }
13886
13887 let mut scalar_sa = base_sa.clone();
13888 let mut threaded_sa = base_sa.clone();
13889 let mut scalar_bucket = base_bucket.clone();
13890 let mut threaded_bucket = base_bucket.clone();
13891 let mut thread_state = alloc_thread_state(4).unwrap();
13892 final_bwt_scan_left_to_right_16u(
13893 &text,
13894 &mut scalar_sa,
13895 &mut scalar_bucket,
13896 0,
13897 block_size as SaSint,
13898 );
13899 final_bwt_scan_left_to_right_16u_block_omp(
13900 &text,
13901 &mut threaded_sa,
13902 k as SaSint,
13903 &mut threaded_bucket,
13904 0,
13905 block_size as SaSint,
13906 4,
13907 &mut thread_state,
13908 );
13909 assert_eq!(threaded_sa, scalar_sa);
13910 assert_eq!(threaded_bucket, scalar_bucket);
13911
13912 let rm = 3;
13913 let mut scalar_sa = base_sa.clone();
13914 let mut threaded_sa = base_sa.clone();
13915 let mut scalar_bucket = base_bucket.clone();
13916 let mut threaded_bucket = base_bucket.clone();
13917 let mut scalar_i = vec![-1; (block_size / (rm as usize + 1)) + 2];
13918 let mut threaded_i = scalar_i.clone();
13919 final_bwt_aux_scan_left_to_right_16u(
13920 &text,
13921 &mut scalar_sa,
13922 rm,
13923 &mut scalar_i,
13924 &mut scalar_bucket,
13925 0,
13926 block_size as SaSint,
13927 );
13928 final_bwt_aux_scan_left_to_right_16u_block_omp(
13929 &text,
13930 &mut threaded_sa,
13931 k as SaSint,
13932 rm,
13933 &mut threaded_i,
13934 &mut threaded_bucket,
13935 0,
13936 block_size as SaSint,
13937 4,
13938 &mut thread_state,
13939 );
13940 assert_eq!(threaded_sa, scalar_sa);
13941 assert_eq!(threaded_i, scalar_i);
13942 assert_eq!(threaded_bucket, scalar_bucket);
13943
13944 let mut scalar_sa = base_sa;
13945 let mut threaded_sa = scalar_sa.clone();
13946 let mut scalar_bucket = base_bucket.clone();
13947 let mut threaded_bucket = base_bucket;
13948 final_sorting_scan_left_to_right_16u(
13949 &text,
13950 &mut scalar_sa,
13951 &mut scalar_bucket,
13952 0,
13953 block_size as SaSint,
13954 );
13955 final_sorting_scan_left_to_right_16u_block_omp(
13956 &text,
13957 &mut threaded_sa,
13958 k as SaSint,
13959 &mut threaded_bucket,
13960 0,
13961 block_size as SaSint,
13962 4,
13963 &mut thread_state,
13964 );
13965 assert_eq!(threaded_sa, scalar_sa);
13966 assert_eq!(threaded_bucket, scalar_bucket);
13967 }
13968
13969 #[test]
13970 fn libsais16x64_final_right_to_left_16u_block_omp_uses_cache_pipeline() {
13971 let block_size = 65_536usize;
13972 let k = 512usize;
13973 let block_start = k * 200 + 1024;
13974 let text: Vec<u16> = (0..=block_size + 1)
13975 .map(|i| 1 + (i % (k - 1)) as u16)
13976 .collect();
13977 let sa_len = block_start + block_size + 1;
13978 let mut base_sa = vec![0; sa_len];
13979 for i in 0..block_size {
13980 base_sa[block_start + i] = (i + 1) as SaSint;
13981 }
13982 let mut base_bucket = vec![0; k];
13983 for c in 0..k {
13984 base_bucket[c] = ((c + 1) * 200) as SaSint;
13985 }
13986
13987 let mut scalar_sa = base_sa.clone();
13988 let mut threaded_sa = base_sa.clone();
13989 let mut scalar_bucket = base_bucket.clone();
13990 let mut threaded_bucket = base_bucket.clone();
13991 let mut thread_state = alloc_thread_state(4).unwrap();
13992 final_bwt_scan_right_to_left_16u(
13993 &text,
13994 &mut scalar_sa,
13995 &mut scalar_bucket,
13996 block_start as SaSint,
13997 block_size as SaSint,
13998 );
13999 final_bwt_scan_right_to_left_16u_block_omp(
14000 &text,
14001 &mut threaded_sa,
14002 k as SaSint,
14003 &mut threaded_bucket,
14004 block_start as SaSint,
14005 block_size as SaSint,
14006 4,
14007 &mut thread_state,
14008 );
14009 assert_eq!(threaded_sa, scalar_sa);
14010 assert_eq!(threaded_bucket, scalar_bucket);
14011
14012 let rm = 3;
14013 let mut scalar_sa = base_sa.clone();
14014 let mut threaded_sa = base_sa.clone();
14015 let mut scalar_bucket = base_bucket.clone();
14016 let mut threaded_bucket = base_bucket.clone();
14017 let mut scalar_i = vec![-1; (block_size / (rm as usize + 1)) + 2];
14018 let mut threaded_i = scalar_i.clone();
14019 final_bwt_aux_scan_right_to_left_16u(
14020 &text,
14021 &mut scalar_sa,
14022 rm,
14023 &mut scalar_i,
14024 &mut scalar_bucket,
14025 block_start as SaSint,
14026 block_size as SaSint,
14027 );
14028 final_bwt_aux_scan_right_to_left_16u_block_omp(
14029 &text,
14030 &mut threaded_sa,
14031 k as SaSint,
14032 rm,
14033 &mut threaded_i,
14034 &mut threaded_bucket,
14035 block_start as SaSint,
14036 block_size as SaSint,
14037 4,
14038 &mut thread_state,
14039 );
14040 assert_eq!(threaded_sa, scalar_sa);
14041 assert_eq!(threaded_i, scalar_i);
14042 assert_eq!(threaded_bucket, scalar_bucket);
14043
14044 let mut scalar_sa = base_sa.clone();
14045 let mut threaded_sa = base_sa.clone();
14046 let mut scalar_bucket = base_bucket.clone();
14047 let mut threaded_bucket = base_bucket.clone();
14048 final_sorting_scan_right_to_left_16u(
14049 &text,
14050 &mut scalar_sa,
14051 &mut scalar_bucket,
14052 block_start as SaSint,
14053 block_size as SaSint,
14054 );
14055 final_sorting_scan_right_to_left_16u_block_omp(
14056 &text,
14057 &mut threaded_sa,
14058 k as SaSint,
14059 &mut threaded_bucket,
14060 block_start as SaSint,
14061 block_size as SaSint,
14062 4,
14063 &mut thread_state,
14064 );
14065 assert_eq!(threaded_sa, scalar_sa);
14066 assert_eq!(threaded_bucket, scalar_bucket);
14067
14068 let mut scalar_sa = base_sa;
14069 let mut threaded_sa = scalar_sa.clone();
14070 let mut scalar_bucket = base_bucket.clone();
14071 let mut threaded_bucket = base_bucket;
14072 final_gsa_scan_right_to_left_16u(
14073 &text,
14074 &mut scalar_sa,
14075 &mut scalar_bucket,
14076 block_start as SaSint,
14077 block_size as SaSint,
14078 );
14079 final_gsa_scan_right_to_left_16u_block_omp(
14080 &text,
14081 &mut threaded_sa,
14082 k as SaSint,
14083 &mut threaded_bucket,
14084 block_start as SaSint,
14085 block_size as SaSint,
14086 4,
14087 &mut thread_state,
14088 );
14089 assert_eq!(threaded_sa, scalar_sa);
14090 assert_eq!(threaded_bucket, scalar_bucket);
14091 }
14092
14093 #[test]
14094 fn libsais16x64_clear_lms_suffixes_omp_zeroes_requested_bucket_ranges() {
14095 let mut rust_sa = vec![5, 4, 3, 2, 1, 9];
14096 let mut c_sa = rust_sa.clone();
14097 let n = rust_sa.len() as SaSint;
14098 let mut bucket_start = vec![1, 4, 5];
14099 let mut bucket_end = vec![3, 5, 5];
14100
14101 clear_lms_suffixes_omp(&mut rust_sa, n, 3, &bucket_start, &bucket_end, 2);
14102 unsafe {
14103 probe_libsais16x64_clear_lms_suffixes_omp(
14104 c_sa.as_mut_ptr(),
14105 n,
14106 3,
14107 bucket_start.as_mut_ptr(),
14108 bucket_end.as_mut_ptr(),
14109 2,
14110 );
14111 }
14112
14113 assert_eq!(rust_sa, c_sa);
14114 }
14115
14116 #[test]
14117 fn libsais16x64_partial_order_wrapper_helpers_match_manual_sequence() {
14118 let mut rust_sa = vec![1, 2, 3, 4];
14119 let mut c_sa = rust_sa.clone();
14120 flip_suffix_markers_omp(&mut rust_sa, 3, 2);
14121 unsafe {
14122 probe_libsais16x64_flip_suffix_markers_omp(c_sa.as_mut_ptr(), 3, 2);
14123 }
14124 assert_eq!(rust_sa, c_sa);
14125
14126 let t = vec![0, 1, 2, 1, 0, 1, 2, 1, 0];
14127 let n = t.len() as SaSint;
14128 let k = 3;
14129 let mut wrapped_sa = vec![0; t.len()];
14130 let mut wrapped_buckets = vec![0; k as usize];
14131 let mut wrapped_state = alloc_thread_state(1).unwrap();
14132 induce_partial_order_32s_1k_omp(
14133 &t,
14134 &mut wrapped_sa,
14135 n,
14136 k,
14137 &mut wrapped_buckets,
14138 1,
14139 &mut wrapped_state,
14140 );
14141
14142 let mut manual_sa = vec![0; t.len()];
14143 let mut manual_buckets = vec![0; k as usize];
14144 let mut manual_state = alloc_thread_state(1).unwrap();
14145 count_suffixes_32s(&t, n, k, &mut manual_buckets);
14146 initialize_buckets_start_32s_1k(k, &mut manual_buckets);
14147 partial_sorting_scan_left_to_right_32s_1k_omp(
14148 &t,
14149 &mut manual_sa,
14150 n,
14151 &mut manual_buckets,
14152 1,
14153 &mut manual_state,
14154 );
14155 count_suffixes_32s(&t, n, k, &mut manual_buckets);
14156 initialize_buckets_end_32s_1k(k, &mut manual_buckets);
14157 partial_sorting_scan_right_to_left_32s_1k_omp(
14158 &t,
14159 &mut manual_sa,
14160 n,
14161 &mut manual_buckets,
14162 1,
14163 &mut manual_state,
14164 );
14165 partial_sorting_gather_lms_suffixes_32s_1k_omp(&mut manual_sa, n, 1, &mut manual_state);
14166
14167 assert_eq!(wrapped_sa, manual_sa);
14168 assert_eq!(wrapped_buckets, manual_buckets);
14169 }
14170
14171 #[test]
14172 fn libsais16x64_induce_partial_order_32s_wrappers_match_c() {
14173 let t = make_main_32s_stress_text(128, 24);
14174 let n = t.len() as SaSint;
14175 let k = 24;
14176 let threads = 1;
14177
14178 let mut rust_sa = vec![0; t.len()];
14179 let mut rust_buckets = vec![0; 6 * k as usize];
14180 let mut rust_state = alloc_thread_state(threads).unwrap();
14181 let m = count_and_gather_lms_suffixes_32s_4k_omp(
14182 &t,
14183 &mut rust_sa,
14184 n,
14185 k,
14186 &mut rust_buckets,
14187 1,
14188 threads,
14189 &mut rust_state,
14190 );
14191 assert!(m > 1);
14192 rust_sa[..(n - m) as usize].fill(0);
14193 let first_lms_suffix = rust_sa[(n - m) as usize];
14194 let left_suffixes_count = initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(
14195 &t,
14196 k,
14197 &mut rust_buckets,
14198 first_lms_suffix,
14199 );
14200 let (_, induction_bucket) = rust_buckets.split_at_mut(4 * k as usize);
14201 radix_sort_lms_suffixes_32s_6k_omp(&t, &mut rust_sa, n, m, induction_bucket, threads);
14202 radix_sort_set_markers_32s_6k_omp(&mut rust_sa, k, induction_bucket, threads);
14203 initialize_buckets_for_partial_sorting_32s_6k(
14204 &t,
14205 k,
14206 &mut rust_buckets,
14207 first_lms_suffix,
14208 left_suffixes_count,
14209 );
14210 let mut c_sa = rust_sa.clone();
14211 let mut c_buckets = rust_buckets.clone();
14212 induce_partial_order_32s_6k_omp(
14213 &t,
14214 &mut rust_sa,
14215 n,
14216 k,
14217 &mut rust_buckets,
14218 first_lms_suffix,
14219 left_suffixes_count,
14220 threads,
14221 &mut rust_state,
14222 );
14223 unsafe {
14224 probe_libsais16x64_induce_partial_order_32s_6k_omp(
14225 t.as_ptr(),
14226 c_sa.as_mut_ptr(),
14227 n,
14228 k,
14229 c_buckets.as_mut_ptr(),
14230 first_lms_suffix,
14231 left_suffixes_count,
14232 threads,
14233 );
14234 }
14235 assert_eq!(rust_sa, c_sa);
14236 assert_eq!(rust_buckets, c_buckets);
14237
14238 let mut rust_sa = vec![0; t.len()];
14239 let mut rust_buckets = vec![0; 4 * k as usize];
14240 let mut rust_state = alloc_thread_state(threads).unwrap();
14241 let m = count_and_gather_lms_suffixes_32s_2k_omp(
14242 &t,
14243 &mut rust_sa,
14244 n,
14245 k,
14246 &mut rust_buckets,
14247 1,
14248 threads,
14249 &mut rust_state,
14250 );
14251 assert!(m > 1);
14252 let first_lms_suffix = rust_sa[(n - m) as usize];
14253 initialize_buckets_for_radix_and_partial_sorting_32s_4k(
14254 &t,
14255 k,
14256 &mut rust_buckets,
14257 first_lms_suffix,
14258 );
14259 let (_, induction_bucket) = rust_buckets.split_at_mut(1);
14260 radix_sort_lms_suffixes_32s_2k_omp(&t, &mut rust_sa, n, m, induction_bucket, threads);
14261 radix_sort_set_markers_32s_4k_omp(&mut rust_sa, k, induction_bucket, threads);
14262 place_lms_suffixes_interval_32s_4k(&mut rust_sa, n, k, m - 1, &rust_buckets);
14263 let mut c_sa = rust_sa.clone();
14264 let mut c_buckets = rust_buckets.clone();
14265 induce_partial_order_32s_4k_omp(
14266 &t,
14267 &mut rust_sa,
14268 n,
14269 k,
14270 &mut rust_buckets,
14271 threads,
14272 &mut rust_state,
14273 );
14274 unsafe {
14275 probe_libsais16x64_induce_partial_order_32s_4k_omp(
14276 t.as_ptr(),
14277 c_sa.as_mut_ptr(),
14278 n,
14279 k,
14280 c_buckets.as_mut_ptr(),
14281 threads,
14282 );
14283 }
14284 assert_eq!(rust_sa, c_sa);
14285 assert_eq!(rust_buckets, c_buckets);
14286
14287 let mut rust_sa = vec![0; t.len()];
14288 let mut rust_buckets = vec![0; 2 * k as usize];
14289 let mut rust_state = alloc_thread_state(threads).unwrap();
14290 let m = count_and_gather_lms_suffixes_32s_2k_omp(
14291 &t,
14292 &mut rust_sa,
14293 n,
14294 k,
14295 &mut rust_buckets,
14296 1,
14297 threads,
14298 &mut rust_state,
14299 );
14300 assert!(m > 1);
14301 let first_lms_suffix = rust_sa[(n - m) as usize];
14302 initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(
14303 &t,
14304 k,
14305 &mut rust_buckets,
14306 first_lms_suffix,
14307 );
14308 let (_, induction_bucket) = rust_buckets.split_at_mut(1);
14309 radix_sort_lms_suffixes_32s_2k_omp(&t, &mut rust_sa, n, m, induction_bucket, threads);
14310 place_lms_suffixes_interval_32s_2k(&mut rust_sa, n, k, m - 1, &rust_buckets);
14311 initialize_buckets_start_and_end_32s_2k(k, &mut rust_buckets);
14312 let mut c_sa = rust_sa.clone();
14313 let mut c_buckets = rust_buckets.clone();
14314 induce_partial_order_32s_2k_omp(
14315 &t,
14316 &mut rust_sa,
14317 n,
14318 k,
14319 &mut rust_buckets,
14320 threads,
14321 &mut rust_state,
14322 );
14323 unsafe {
14324 probe_libsais16x64_induce_partial_order_32s_2k_omp(
14325 t.as_ptr(),
14326 c_sa.as_mut_ptr(),
14327 n,
14328 k,
14329 c_buckets.as_mut_ptr(),
14330 threads,
14331 );
14332 }
14333 assert_eq!(rust_sa, c_sa);
14334 assert_eq!(rust_buckets, c_buckets);
14335
14336 let mut rust_sa = vec![0; t.len()];
14337 let mut rust_buckets = vec![0; k as usize];
14338 let mut rust_state = alloc_thread_state(threads).unwrap();
14339 count_suffixes_32s(&t, n, k, &mut rust_buckets);
14340 initialize_buckets_end_32s_1k(k, &mut rust_buckets);
14341 let m = radix_sort_lms_suffixes_32s_1k(&t, &mut rust_sa, n, &mut rust_buckets);
14342 assert!(m > 1);
14343 let mut c_sa = rust_sa.clone();
14344 let mut c_buckets = rust_buckets.clone();
14345 induce_partial_order_32s_1k_omp(
14346 &t,
14347 &mut rust_sa,
14348 n,
14349 k,
14350 &mut rust_buckets,
14351 threads,
14352 &mut rust_state,
14353 );
14354 unsafe {
14355 probe_libsais16x64_induce_partial_order_32s_1k_omp(
14356 t.as_ptr(),
14357 c_sa.as_mut_ptr(),
14358 n,
14359 k,
14360 c_buckets.as_mut_ptr(),
14361 threads,
14362 );
14363 }
14364 assert_eq!(rust_sa, c_sa);
14365 assert_eq!(rust_buckets, c_buckets);
14366 }
14367
14368 #[test]
14369 fn libsais16x64_induce_partial_order_16u_omp_matches_c() {
14370 let text = [3, 1, 2, 1, 0, 4, 1, 0];
14371 let n = text.len() as SaSint;
14372 let flags = 0;
14373 let threads = 1;
14374 let mut rust_sa = vec![0; text.len()];
14375 let mut rust_buckets = vec![0; 8 * ALPHABET_SIZE];
14376
14377 let m = count_and_gather_lms_suffixes_16u_omp(
14378 &text,
14379 &mut rust_sa,
14380 n,
14381 &mut rust_buckets[..4 * ALPHABET_SIZE],
14382 threads,
14383 &mut [],
14384 );
14385 let k = initialize_buckets_start_and_end_16u(&mut rust_buckets, None);
14386 assert!(m > 0);
14387 let first_lms_suffix = rust_sa[(n - m) as usize];
14388 let left_suffixes_count = initialize_buckets_for_lms_suffixes_radix_sort_16u(
14389 &text,
14390 &mut rust_buckets,
14391 first_lms_suffix,
14392 );
14393 radix_sort_lms_suffixes_16u_omp(
14394 &text,
14395 &mut rust_sa,
14396 n,
14397 m,
14398 flags,
14399 &mut rust_buckets,
14400 threads,
14401 &mut [],
14402 );
14403 initialize_buckets_for_partial_sorting_16u(
14404 &text,
14405 &mut rust_buckets,
14406 first_lms_suffix,
14407 left_suffixes_count,
14408 );
14409
14410 let mut c_sa = rust_sa.clone();
14411 let mut c_buckets = rust_buckets.clone();
14412 induce_partial_order_16u_omp(
14413 &text,
14414 &mut rust_sa,
14415 n,
14416 k,
14417 flags,
14418 &mut rust_buckets,
14419 first_lms_suffix,
14420 left_suffixes_count,
14421 threads,
14422 );
14423 unsafe {
14424 probe_libsais16x64_induce_partial_order_16u_omp(
14425 text.as_ptr(),
14426 c_sa.as_mut_ptr(),
14427 n,
14428 k,
14429 flags,
14430 c_buckets.as_mut_ptr(),
14431 first_lms_suffix,
14432 left_suffixes_count,
14433 threads,
14434 );
14435 }
14436
14437 assert_eq!(rust_sa, c_sa);
14438 assert_eq!(rust_buckets, c_buckets);
14439 }
14440
14441 fn final_order_32s_fixture() -> (Vec<SaSint>, Vec<SaSint>) {
14442 (
14443 vec![0, 1, 2, 1, 0, 1, 2, 1, 0],
14444 vec![1, 0, 2, 0, 0, 0, 0, 0, 0],
14445 )
14446 }
14447
14448 fn seed_final_order_bucket_sections(buckets: &mut [SaSint], k: usize, branch_k: usize) {
14449 let left = [0, 1, 3];
14450 let right = [1, 2, 3];
14451 let left_section = match branch_k {
14452 6 => 4 * k,
14453 4 => 2 * k,
14454 2 => k,
14455 _ => 0,
14456 };
14457 let right_section = match branch_k {
14458 6 => 5 * k,
14459 4 => 3 * k,
14460 2 => 0,
14461 _ => 0,
14462 };
14463 buckets[left_section..left_section + k].copy_from_slice(&left);
14464 buckets[right_section..right_section + k].copy_from_slice(&right);
14465 }
14466
14467 #[test]
14468 fn libsais16x64_induce_final_order_32s_wrappers_match_c() {
14469 let (t, sa) = final_order_32s_fixture();
14470 let n = t.len() as SaSint;
14471 let k = 3;
14472 let threads = 1;
14473
14474 let mut rust_sa = sa.clone();
14475 let mut rust_buckets = vec![0; 6 * k as usize];
14476 seed_final_order_bucket_sections(&mut rust_buckets, k as usize, 6);
14477 let mut c_sa = rust_sa.clone();
14478 let mut c_buckets = rust_buckets.clone();
14479 let mut rust_state = alloc_thread_state(threads).unwrap();
14480 induce_final_order_32s_6k(
14481 &t,
14482 &mut rust_sa,
14483 n,
14484 k,
14485 &mut rust_buckets,
14486 threads,
14487 &mut rust_state,
14488 );
14489 unsafe {
14490 probe_libsais16x64_induce_final_order_32s_6k(
14491 t.as_ptr(),
14492 c_sa.as_mut_ptr(),
14493 n,
14494 k,
14495 c_buckets.as_mut_ptr(),
14496 threads,
14497 );
14498 }
14499 assert_eq!(rust_sa, c_sa);
14500 assert_eq!(rust_buckets, c_buckets);
14501
14502 let mut rust_sa = sa.clone();
14503 let mut rust_buckets = vec![0; 4 * k as usize];
14504 seed_final_order_bucket_sections(&mut rust_buckets, k as usize, 4);
14505 let mut c_sa = rust_sa.clone();
14506 let mut c_buckets = rust_buckets.clone();
14507 let mut rust_state = alloc_thread_state(threads).unwrap();
14508 induce_final_order_32s_4k(
14509 &t,
14510 &mut rust_sa,
14511 n,
14512 k,
14513 &mut rust_buckets,
14514 threads,
14515 &mut rust_state,
14516 );
14517 unsafe {
14518 probe_libsais16x64_induce_final_order_32s_4k(
14519 t.as_ptr(),
14520 c_sa.as_mut_ptr(),
14521 n,
14522 k,
14523 c_buckets.as_mut_ptr(),
14524 threads,
14525 );
14526 }
14527 assert_eq!(rust_sa, c_sa);
14528 assert_eq!(rust_buckets, c_buckets);
14529
14530 let mut rust_sa = sa.clone();
14531 let mut rust_buckets = vec![0; 2 * k as usize];
14532 seed_final_order_bucket_sections(&mut rust_buckets, k as usize, 2);
14533 let mut c_sa = rust_sa.clone();
14534 let mut c_buckets = rust_buckets.clone();
14535 let mut rust_state = alloc_thread_state(threads).unwrap();
14536 induce_final_order_32s_2k(
14537 &t,
14538 &mut rust_sa,
14539 n,
14540 k,
14541 &mut rust_buckets,
14542 threads,
14543 &mut rust_state,
14544 );
14545 unsafe {
14546 probe_libsais16x64_induce_final_order_32s_2k(
14547 t.as_ptr(),
14548 c_sa.as_mut_ptr(),
14549 n,
14550 k,
14551 c_buckets.as_mut_ptr(),
14552 threads,
14553 );
14554 }
14555 assert_eq!(rust_sa, c_sa);
14556 assert_eq!(rust_buckets, c_buckets);
14557
14558 let mut rust_sa = sa;
14559 let mut rust_buckets = vec![0; k as usize];
14560 let mut c_sa = rust_sa.clone();
14561 let mut c_buckets = rust_buckets.clone();
14562 let mut rust_state = alloc_thread_state(threads).unwrap();
14563 induce_final_order_32s_1k(
14564 &t,
14565 &mut rust_sa,
14566 n,
14567 k,
14568 &mut rust_buckets,
14569 threads,
14570 &mut rust_state,
14571 );
14572 unsafe {
14573 probe_libsais16x64_induce_final_order_32s_1k(
14574 t.as_ptr(),
14575 c_sa.as_mut_ptr(),
14576 n,
14577 k,
14578 c_buckets.as_mut_ptr(),
14579 threads,
14580 );
14581 }
14582 assert_eq!(rust_sa, c_sa);
14583 assert_eq!(rust_buckets, c_buckets);
14584 }
14585
14586 #[test]
14587 fn libsais16x64_induce_final_order_16u_omp_matches_manual_sequence() {
14588 let (text, mut wrapped_sa, induction_bucket) = final_scan_fixture();
14589 let mut wrapped_buckets = final_order_buckets(&induction_bucket);
14590 let mut c_sa = wrapped_sa.clone();
14591 let mut c_buckets = wrapped_buckets.clone();
14592 let mut wrapped_state = alloc_thread_state(1).unwrap();
14593 let wrapped_index = induce_final_order_16u_omp(
14594 &text,
14595 &mut wrapped_sa,
14596 text.len() as SaSint,
14597 8,
14598 0,
14599 0,
14600 None,
14601 &mut wrapped_buckets,
14602 1,
14603 &mut wrapped_state,
14604 );
14605 let c_index = unsafe {
14606 probe_libsais16x64_induce_final_order_16u_omp(
14607 text.as_ptr(),
14608 c_sa.as_mut_ptr(),
14609 text.len() as SaSint,
14610 8,
14611 0,
14612 0,
14613 std::ptr::null_mut(),
14614 c_buckets.as_mut_ptr(),
14615 1,
14616 )
14617 };
14618
14619 let (text, mut manual_sa, induction_bucket) = final_scan_fixture();
14620 let mut manual_buckets = final_order_buckets(&induction_bucket);
14621 {
14622 let (left_buckets, right_tail) = manual_buckets.split_at_mut(7 * ALPHABET_SIZE);
14623 final_sorting_scan_left_to_right_16u_omp(
14624 &text,
14625 &mut manual_sa,
14626 text.len() as SaSint,
14627 8,
14628 &mut left_buckets[6 * ALPHABET_SIZE..7 * ALPHABET_SIZE],
14629 1,
14630 );
14631 final_sorting_scan_right_to_left_16u_omp(
14632 &text,
14633 &mut manual_sa,
14634 0,
14635 text.len() as SaSint,
14636 8,
14637 &mut right_tail[..ALPHABET_SIZE],
14638 1,
14639 );
14640 }
14641
14642 assert_eq!(wrapped_index, 0);
14643 assert_eq!(wrapped_index, c_index);
14644 assert_eq!(wrapped_sa, manual_sa);
14645 assert_eq!(wrapped_sa, c_sa);
14646 assert_eq!(wrapped_buckets, manual_buckets);
14647 assert_eq!(wrapped_buckets, c_buckets);
14648
14649 let (text, mut wrapped_sa, induction_bucket) = final_scan_fixture();
14650 let mut wrapped_buckets = final_order_buckets(&induction_bucket);
14651 let mut c_sa = wrapped_sa.clone();
14652 let mut c_buckets = wrapped_buckets.clone();
14653 let mut wrapped_state = alloc_thread_state(1).unwrap();
14654 let wrapped_index = induce_final_order_16u_omp(
14655 &text,
14656 &mut wrapped_sa,
14657 text.len() as SaSint,
14658 8,
14659 LIBSAIS_FLAGS_BWT,
14660 0,
14661 None,
14662 &mut wrapped_buckets,
14663 1,
14664 &mut wrapped_state,
14665 );
14666 let c_index = unsafe {
14667 probe_libsais16x64_induce_final_order_16u_omp(
14668 text.as_ptr(),
14669 c_sa.as_mut_ptr(),
14670 text.len() as SaSint,
14671 8,
14672 LIBSAIS_FLAGS_BWT,
14673 0,
14674 std::ptr::null_mut(),
14675 c_buckets.as_mut_ptr(),
14676 1,
14677 )
14678 };
14679
14680 let (text, mut manual_sa, induction_bucket) = final_scan_fixture();
14681 let mut manual_buckets = final_order_buckets(&induction_bucket);
14682 let manual_index = {
14683 let (left_buckets, right_tail) = manual_buckets.split_at_mut(7 * ALPHABET_SIZE);
14684 final_bwt_scan_left_to_right_16u_omp(
14685 &text,
14686 &mut manual_sa,
14687 text.len() as SaSint,
14688 8,
14689 &mut left_buckets[6 * ALPHABET_SIZE..7 * ALPHABET_SIZE],
14690 1,
14691 );
14692 final_bwt_scan_right_to_left_16u_omp(
14693 &text,
14694 &mut manual_sa,
14695 text.len() as SaSint,
14696 8,
14697 &mut right_tail[..ALPHABET_SIZE],
14698 1,
14699 )
14700 };
14701
14702 assert_eq!(wrapped_index, manual_index);
14703 assert_eq!(wrapped_index, c_index);
14704 assert_eq!(wrapped_sa, manual_sa);
14705 assert_eq!(wrapped_sa, c_sa);
14706 assert_eq!(wrapped_buckets, manual_buckets);
14707 assert_eq!(wrapped_buckets, c_buckets);
14708
14709 let (text, mut wrapped_sa, induction_bucket) = final_scan_fixture();
14710 let mut wrapped_buckets = final_order_buckets(&induction_bucket);
14711 let mut c_sa = wrapped_sa.clone();
14712 let mut c_buckets = wrapped_buckets.clone();
14713 let mut wrapped_state = alloc_thread_state(1).unwrap();
14714 let mut wrapped_i = vec![-1; 8];
14715 let mut c_i = wrapped_i.clone();
14716 let wrapped_index = induce_final_order_16u_omp(
14717 &text,
14718 &mut wrapped_sa,
14719 text.len() as SaSint,
14720 8,
14721 LIBSAIS_FLAGS_BWT,
14722 2,
14723 Some(&mut wrapped_i),
14724 &mut wrapped_buckets,
14725 1,
14726 &mut wrapped_state,
14727 );
14728 let c_index = unsafe {
14729 probe_libsais16x64_induce_final_order_16u_omp(
14730 text.as_ptr(),
14731 c_sa.as_mut_ptr(),
14732 text.len() as SaSint,
14733 8,
14734 LIBSAIS_FLAGS_BWT,
14735 2,
14736 c_i.as_mut_ptr(),
14737 c_buckets.as_mut_ptr(),
14738 1,
14739 )
14740 };
14741
14742 let (text, mut manual_sa, induction_bucket) = final_scan_fixture();
14743 let mut manual_buckets = final_order_buckets(&induction_bucket);
14744 let mut manual_i = vec![-1; 8];
14745 {
14746 let (left_buckets, right_tail) = manual_buckets.split_at_mut(7 * ALPHABET_SIZE);
14747 final_bwt_aux_scan_left_to_right_16u_omp(
14748 &text,
14749 &mut manual_sa,
14750 text.len() as SaSint,
14751 8,
14752 1,
14753 &mut manual_i,
14754 &mut left_buckets[6 * ALPHABET_SIZE..7 * ALPHABET_SIZE],
14755 1,
14756 );
14757 final_bwt_aux_scan_right_to_left_16u_omp(
14758 &text,
14759 &mut manual_sa,
14760 text.len() as SaSint,
14761 8,
14762 1,
14763 &mut manual_i,
14764 &mut right_tail[..ALPHABET_SIZE],
14765 1,
14766 );
14767 }
14768
14769 assert_eq!(wrapped_index, 0);
14770 assert_eq!(wrapped_index, c_index);
14771 assert_eq!(wrapped_sa, manual_sa);
14772 assert_eq!(wrapped_sa, c_sa);
14773 assert_eq!(wrapped_buckets, manual_buckets);
14774 assert_eq!(wrapped_buckets, c_buckets);
14775 assert_eq!(wrapped_i, manual_i);
14776 assert_eq!(wrapped_i, c_i);
14777 }
14778
14779 #[test]
14780 fn libsais16x64_main_16u_matches_public_c_suffix_array_paths() {
14781 let text = [3, 1, 4, 1, 5, 9, 0, 2];
14782 let n = text.len() as SaSint;
14783 let fs = 32;
14784 let mut rust_sa = vec![0; text.len() + fs as usize];
14785 let mut rust_buckets = vec![0; 8 * ALPHABET_SIZE];
14786 let mut rust_freq = vec![0; ALPHABET_SIZE];
14787 let mut rust_state = alloc_thread_state(1).unwrap();
14788 let rust_index = main_16u(
14789 &text,
14790 &mut rust_sa,
14791 n,
14792 &mut rust_buckets,
14793 0,
14794 0,
14795 None,
14796 fs,
14797 Some(&mut rust_freq),
14798 1,
14799 &mut rust_state,
14800 );
14801
14802 let mut c_sa = vec![0; text.len() + fs as usize];
14803 let mut c_freq = vec![0; ALPHABET_SIZE];
14804 let c_index = unsafe {
14805 probe_public_libsais16x64_freq(
14806 text.as_ptr(),
14807 c_sa.as_mut_ptr(),
14808 n,
14809 fs,
14810 c_freq.as_mut_ptr(),
14811 )
14812 };
14813
14814 assert_eq!(rust_index, c_index);
14815 assert_eq!(&rust_sa[..text.len()], &c_sa[..text.len()]);
14816 assert_eq!(rust_freq, c_freq);
14817
14818 let text = [2, 1, 0, 2, 0];
14819 let n = text.len() as SaSint;
14820 let fs = 24;
14821 let mut rust_sa = vec![0; text.len() + fs as usize];
14822 let mut rust_buckets = vec![0; 8 * ALPHABET_SIZE];
14823 let mut rust_freq = vec![0; ALPHABET_SIZE];
14824 let mut rust_state = alloc_thread_state(1).unwrap();
14825 let rust_index = main_16u(
14826 &text,
14827 &mut rust_sa,
14828 n,
14829 &mut rust_buckets,
14830 LIBSAIS_FLAGS_GSA,
14831 0,
14832 None,
14833 fs,
14834 Some(&mut rust_freq),
14835 1,
14836 &mut rust_state,
14837 );
14838
14839 let mut c_sa = vec![0; text.len() + fs as usize];
14840 let mut c_freq = vec![0; ALPHABET_SIZE];
14841 let c_index = unsafe {
14842 probe_public_libsais16x64_gsa_freq(
14843 text.as_ptr(),
14844 c_sa.as_mut_ptr(),
14845 n,
14846 fs,
14847 c_freq.as_mut_ptr(),
14848 )
14849 };
14850
14851 assert_eq!(rust_index, c_index);
14852 assert_eq!(&rust_sa[..text.len()], &c_sa[..text.len()]);
14853 assert_eq!(rust_freq, c_freq);
14854 }
14855
14856 #[test]
14857 fn libsais16x64_final_bwt_scan_left_to_right_16u_matches_c() {
14858 let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
14859 let mut c_sa = rust_sa.clone();
14860 let mut c_bucket = rust_bucket.clone();
14861
14862 final_bwt_scan_left_to_right_16u(&text, &mut rust_sa, &mut rust_bucket, 0, 6);
14863 unsafe {
14864 probe_libsais16x64_final_bwt_scan_left_to_right_16u(
14865 text.as_ptr(),
14866 c_sa.as_mut_ptr(),
14867 c_bucket.as_mut_ptr(),
14868 0,
14869 6,
14870 );
14871 }
14872
14873 assert_eq!(rust_sa, c_sa);
14874 assert_eq!(rust_bucket, c_bucket);
14875 }
14876
14877 #[test]
14878 fn libsais16x64_final_bwt_scan_right_to_left_16u_matches_c() {
14879 let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
14880 let mut c_sa = rust_sa.clone();
14881 let mut c_bucket = rust_bucket.clone();
14882
14883 let rust_index =
14884 final_bwt_scan_right_to_left_16u(&text, &mut rust_sa, &mut rust_bucket, 0, 6);
14885 let c_index = unsafe {
14886 probe_libsais16x64_final_bwt_scan_right_to_left_16u(
14887 text.as_ptr(),
14888 c_sa.as_mut_ptr(),
14889 c_bucket.as_mut_ptr(),
14890 0,
14891 6,
14892 )
14893 };
14894
14895 assert_eq!(rust_index, c_index);
14896 assert_eq!(rust_sa, c_sa);
14897 assert_eq!(rust_bucket, c_bucket);
14898 }
14899
14900 #[test]
14901 fn libsais16x64_final_bwt_aux_scan_left_to_right_16u_matches_c() {
14902 let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
14903 let mut c_sa = rust_sa.clone();
14904 let mut c_bucket = rust_bucket.clone();
14905 let mut rust_i = vec![-1; 8];
14906 let mut c_i = rust_i.clone();
14907
14908 final_bwt_aux_scan_left_to_right_16u(
14909 &text,
14910 &mut rust_sa,
14911 1,
14912 &mut rust_i,
14913 &mut rust_bucket,
14914 0,
14915 6,
14916 );
14917 unsafe {
14918 probe_libsais16x64_final_bwt_aux_scan_left_to_right_16u(
14919 text.as_ptr(),
14920 c_sa.as_mut_ptr(),
14921 1,
14922 c_i.as_mut_ptr(),
14923 c_bucket.as_mut_ptr(),
14924 0,
14925 6,
14926 );
14927 }
14928
14929 assert_eq!(rust_sa, c_sa);
14930 assert_eq!(rust_bucket, c_bucket);
14931 assert_eq!(rust_i, c_i);
14932 }
14933
14934 #[test]
14935 fn libsais16x64_final_bwt_aux_scan_right_to_left_16u_matches_c() {
14936 let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
14937 let mut c_sa = rust_sa.clone();
14938 let mut c_bucket = rust_bucket.clone();
14939 let mut rust_i = vec![-1; 8];
14940 let mut c_i = rust_i.clone();
14941
14942 final_bwt_aux_scan_right_to_left_16u(
14943 &text,
14944 &mut rust_sa,
14945 1,
14946 &mut rust_i,
14947 &mut rust_bucket,
14948 0,
14949 6,
14950 );
14951 unsafe {
14952 probe_libsais16x64_final_bwt_aux_scan_right_to_left_16u(
14953 text.as_ptr(),
14954 c_sa.as_mut_ptr(),
14955 1,
14956 c_i.as_mut_ptr(),
14957 c_bucket.as_mut_ptr(),
14958 0,
14959 6,
14960 );
14961 }
14962
14963 assert_eq!(rust_sa, c_sa);
14964 assert_eq!(rust_bucket, c_bucket);
14965 assert_eq!(rust_i, c_i);
14966 }
14967
14968 #[test]
14969 fn libsais16x64_renumber_lms_suffixes_16u_matches_c() {
14970 let m = 6;
14971 let mut rust_sa = vec![0; 20];
14972 rust_sa[..m].copy_from_slice(&[2, 4 | SAINT_MIN, 6, 8 | SAINT_MIN, 10, 12 | SAINT_MIN]);
14973 let mut c_sa = rust_sa.clone();
14974
14975 let rust_name = renumber_lms_suffixes_16u(&mut rust_sa, m as SaSint, 5, 0, m as SaSint);
14976 let c_name = unsafe {
14977 probe_libsais16x64_renumber_lms_suffixes_16u(
14978 c_sa.as_mut_ptr(),
14979 m as SaSint,
14980 5,
14981 0,
14982 m as SaSint,
14983 )
14984 };
14985
14986 assert_eq!(rust_name, c_name);
14987 assert_eq!(rust_sa, c_sa);
14988 }
14989
14990 fn lms_interval_fixture() -> (Vec<SaSint>, Vec<SaSint>) {
14991 let mut sa = vec![-7; 16];
14992 sa[4..8].copy_from_slice(&[41, 42, 61, 62]);
14993
14994 let mut buckets = vec![0; 8 * ALPHABET_SIZE];
14995 buckets[buckets_index2(2, 1)] = 0;
14996 buckets[buckets_index2(3, 1)] = 2;
14997 buckets[buckets_index2(4, 1)] = 2;
14998 buckets[buckets_index2(5, 1)] = 2;
14999 buckets[buckets_index2(6, 1)] = 4;
15000 buckets[buckets_index2(7, 1)] = 4;
15001 buckets[7 * ALPHABET_SIZE + 2] = 6;
15002 buckets[7 * ALPHABET_SIZE + 5] = 12;
15003
15004 (sa, buckets)
15005 }
15006
15007 #[test]
15008 fn libsais16x64_place_lms_suffixes_interval_16u_matches_c() {
15009 for flags in [0, LIBSAIS_FLAGS_GSA] {
15010 let (mut rust_sa, mut rust_buckets) = lms_interval_fixture();
15011 let mut c_sa = rust_sa.clone();
15012 let mut c_buckets = rust_buckets.clone();
15013
15014 place_lms_suffixes_interval_16u(&mut rust_sa, 16, 8, flags, &mut rust_buckets);
15015 unsafe {
15016 probe_libsais16x64_place_lms_suffixes_interval_16u(
15017 c_sa.as_mut_ptr(),
15018 16,
15019 8,
15020 flags,
15021 c_buckets.as_mut_ptr(),
15022 );
15023 }
15024
15025 assert_eq!(rust_sa, c_sa);
15026 assert_eq!(rust_buckets, c_buckets);
15027 }
15028 }
15029
15030 #[test]
15031 fn libsais16x64_bwt_copy_16u_matches_c() {
15032 let mut a = vec![0, 1, 65535, 65536, -1, -2, 70000, 17, 131071, -65536];
15033 let mut rust_u = vec![999; a.len()];
15034 let mut c_u = rust_u.clone();
15035
15036 bwt_copy_16u(&mut rust_u, &a, a.len() as SaSint);
15037 unsafe {
15038 probe_libsais16x64_bwt_copy_16u(c_u.as_mut_ptr(), a.as_mut_ptr(), a.len() as SaSint);
15039 }
15040
15041 assert_eq!(rust_u, c_u);
15042 }
15043
15044 #[test]
15045 fn libsais16x64_early_omp_wrappers_match_c() {
15046 let text = [3, 1, 2, 1, 0, 4, 1, 0];
15047 let n = text.len() as SaSint;
15048
15049 let mut rust_sa = vec![-99; text.len()];
15050 let mut c_sa = rust_sa.clone();
15051 gather_lms_suffixes_16u_omp(&text, &mut rust_sa, n, 1, &mut []);
15052 unsafe {
15053 probe_libsais16x64_gather_lms_suffixes_16u_omp(text.as_ptr(), c_sa.as_mut_ptr(), n, 1);
15054 }
15055 assert_eq!(rust_sa, c_sa);
15056
15057 let mut rust_sa = vec![-99; text.len()];
15058 let mut c_sa = rust_sa.clone();
15059 let mut rust_buckets = vec![-1; 4 * ALPHABET_SIZE];
15060 let mut c_buckets = rust_buckets.clone();
15061 let rust_m = count_and_gather_lms_suffixes_16u_omp(
15062 &text,
15063 &mut rust_sa,
15064 n,
15065 &mut rust_buckets,
15066 1,
15067 &mut [],
15068 );
15069 let c_m = unsafe {
15070 probe_libsais16x64_count_and_gather_lms_suffixes_16u_omp(
15071 text.as_ptr(),
15072 c_sa.as_mut_ptr(),
15073 n,
15074 c_buckets.as_mut_ptr(),
15075 1,
15076 )
15077 };
15078 assert_eq!(rust_m, c_m);
15079 assert_eq!(rust_sa, c_sa);
15080 assert_eq!(rust_buckets, c_buckets);
15081
15082 let mut rust_buckets = vec![0; 8 * ALPHABET_SIZE];
15083 let m = count_and_gather_lms_suffixes_16u(
15084 &text,
15085 &mut rust_sa,
15086 n,
15087 &mut rust_buckets[..4 * ALPHABET_SIZE],
15088 0,
15089 n,
15090 );
15091 initialize_buckets_start_and_end_16u(&mut rust_buckets, None);
15092 let first_lms_suffix = rust_sa[(n - m) as usize];
15093 initialize_buckets_for_lms_suffixes_radix_sort_16u(
15094 &text,
15095 &mut rust_buckets,
15096 first_lms_suffix,
15097 );
15098 let mut c_sa = rust_sa.clone();
15099 let mut c_buckets = rust_buckets.clone();
15100 radix_sort_lms_suffixes_16u_omp(
15101 &text,
15102 &mut rust_sa,
15103 n,
15104 m,
15105 0,
15106 &mut rust_buckets,
15107 1,
15108 &mut [],
15109 );
15110 unsafe {
15111 probe_libsais16x64_radix_sort_lms_suffixes_16u_omp(
15112 text.as_ptr(),
15113 c_sa.as_mut_ptr(),
15114 n,
15115 m,
15116 0,
15117 c_buckets.as_mut_ptr(),
15118 1,
15119 );
15120 }
15121 assert_eq!(rust_sa, c_sa);
15122 assert_eq!(rust_buckets, c_buckets);
15123 }
15124
15125 #[test]
15126 fn libsais16x64_early_omp_wrappers_use_block_partition_for_large_inputs() {
15127 let n = 65_600usize;
15128 let text: Vec<u16> = (0..n)
15129 .map(|i| 1 + ((i * 37 + i / 17) % 509) as u16)
15130 .collect();
15131
15132 let mut gathered_threaded = vec![-99; n];
15133 let mut gathered_scalar = vec![-99; n];
15134 let mut thread_state = alloc_thread_state(4).unwrap();
15135 let mut count_sa = vec![-99; n];
15136 let mut count_buckets = vec![0; 4 * ALPHABET_SIZE];
15137 count_and_gather_lms_suffixes_16u_omp(
15138 &text,
15139 &mut count_sa,
15140 n as SaSint,
15141 &mut count_buckets,
15142 4,
15143 &mut thread_state,
15144 );
15145 gather_lms_suffixes_16u_omp(
15146 &text,
15147 &mut gathered_threaded,
15148 n as SaSint,
15149 4,
15150 &mut thread_state,
15151 );
15152 gather_lms_suffixes_16u(
15153 &text,
15154 &mut gathered_scalar,
15155 n as SaSint,
15156 n as SaSint - 1,
15157 0,
15158 n as SaSint,
15159 );
15160 assert_eq!(gathered_threaded, gathered_scalar);
15161
15162 let mut sa_threaded = vec![-99; n];
15163 let mut sa_scalar = vec![-99; n];
15164 let mut buckets_threaded = vec![0; 4 * ALPHABET_SIZE];
15165 let mut buckets_scalar = vec![0; 4 * ALPHABET_SIZE];
15166 let m_threaded = count_and_gather_lms_suffixes_16u_omp(
15167 &text,
15168 &mut sa_threaded,
15169 n as SaSint,
15170 &mut buckets_threaded,
15171 4,
15172 &mut thread_state,
15173 );
15174 let m_scalar = count_and_gather_lms_suffixes_16u(
15175 &text,
15176 &mut sa_scalar,
15177 n as SaSint,
15178 &mut buckets_scalar,
15179 0,
15180 n as SaSint,
15181 );
15182 assert_eq!(m_threaded, m_scalar);
15183 assert_eq!(
15184 &sa_threaded[n - m_threaded as usize..],
15185 &sa_scalar[n - m_scalar as usize..]
15186 );
15187 assert_eq!(buckets_threaded, buckets_scalar);
15188 }
15189
15190 #[test]
15191 fn libsais16x64_late_omp_wrappers_match_c() {
15192 let m = 6;
15193 let mut rust_sa = vec![0; 20];
15194 rust_sa[..m].copy_from_slice(&[2, 4 | SAINT_MIN, 6, 8 | SAINT_MIN, 10, 12 | SAINT_MIN]);
15195 let mut c_sa = rust_sa.clone();
15196 let mut rust_thread_state = alloc_thread_state(1).unwrap();
15197 let rust_name =
15198 renumber_lms_suffixes_16u_omp(&mut rust_sa, m as SaSint, 1, &mut rust_thread_state);
15199 let c_name = unsafe {
15200 probe_libsais16x64_renumber_lms_suffixes_16u_omp(c_sa.as_mut_ptr(), m as SaSint, 1)
15201 };
15202 assert_eq!(rust_name, c_name);
15203 assert_eq!(rust_sa, c_sa);
15204
15205 let mut a = vec![0, 1, 65535, 65536, -1, -2, 70000, 17, 131071, -65536];
15206 let mut rust_u = vec![999; a.len()];
15207 let mut c_u = rust_u.clone();
15208 bwt_copy_16u_omp(&mut rust_u, &a, a.len() as SaSint, 1);
15209 unsafe {
15210 probe_libsais16x64_bwt_copy_16u_omp(
15211 c_u.as_mut_ptr(),
15212 a.as_mut_ptr(),
15213 a.len() as SaSint,
15214 1,
15215 );
15216 }
15217 assert_eq!(rust_u, c_u);
15218 }
15219
15220 #[test]
15221 fn libsais16x64_gather_marked_lms_suffixes_matches_c() {
15222 let mut rust_sa = vec![0, 0, 3 | SAINT_MIN, 4, 5 | SAINT_MIN, 6, -7, 8];
15223 let mut c_sa = rust_sa.clone();
15224
15225 let rust_l = gather_marked_lms_suffixes(&mut rust_sa, 2, 8, 0, 4) as SaSint;
15226 let c_l =
15227 unsafe { probe_libsais16x64_gather_marked_lms_suffixes(c_sa.as_mut_ptr(), 2, 8, 0, 4) };
15228
15229 assert_eq!(rust_l, c_l);
15230 assert_eq!(rust_sa, c_sa);
15231 }
15232
15233 #[test]
15234 fn libsais16x64_gather_marked_lms_suffixes_omp_matches_c() {
15235 let mut rust_sa = vec![0; 10];
15236 rust_sa[4..8].copy_from_slice(&[2 | SAINT_MIN, 4, 6 | SAINT_MIN, 8]);
15237 let mut c_sa = rust_sa.clone();
15238
15239 let mut rust_thread_state = alloc_thread_state(1).unwrap();
15240 gather_marked_lms_suffixes_omp(&mut rust_sa, 8, 4, 2, 1, &mut rust_thread_state);
15241 unsafe {
15242 probe_libsais16x64_gather_marked_lms_suffixes_omp(c_sa.as_mut_ptr(), 8, 4, 2, 1);
15243 }
15244
15245 assert_eq!(rust_sa, c_sa);
15246 }
15247
15248 #[test]
15249 fn libsais16x64_renumber_and_gather_lms_suffixes_omp_matches_c() {
15250 let mut rust_sa = vec![0; 10];
15251 rust_sa[..4].copy_from_slice(&[2, 4 | SAINT_MIN, 6, 8 | SAINT_MIN]);
15252 let mut c_sa = rust_sa.clone();
15253
15254 let mut rust_thread_state = alloc_thread_state(1).unwrap();
15255 let rust_name =
15256 renumber_and_gather_lms_suffixes_omp(&mut rust_sa, 8, 4, 2, 1, &mut rust_thread_state);
15257 let c_name = unsafe {
15258 probe_libsais16x64_renumber_and_gather_lms_suffixes_omp(c_sa.as_mut_ptr(), 8, 4, 2, 1)
15259 };
15260
15261 assert_eq!(rust_name, c_name);
15262 assert_eq!(rust_sa, c_sa);
15263 }
15264
15265 #[test]
15266 fn libsais16x64_reconstruct_lms_suffixes_matches_c() {
15267 let mut rust_sa = vec![2, 0, 1, 77, 88, 10, 11, 12];
15268 let mut c_sa = rust_sa.clone();
15269
15270 reconstruct_lms_suffixes(&mut rust_sa, 8, 3, 0, 3);
15271 unsafe {
15272 probe_libsais16x64_reconstruct_lms_suffixes(c_sa.as_mut_ptr(), 8, 3, 0, 3);
15273 }
15274
15275 assert_eq!(rust_sa, c_sa);
15276
15277 let mut rust_sa = vec![2, 0, 1, 77, 88, 10, 11, 12];
15278 let mut c_sa = rust_sa.clone();
15279 reconstruct_lms_suffixes_omp(&mut rust_sa, 8, 3, 1);
15280 unsafe {
15281 probe_libsais16x64_reconstruct_lms_suffixes_omp(c_sa.as_mut_ptr(), 8, 3, 1);
15282 }
15283
15284 assert_eq!(rust_sa, c_sa);
15285 }
15286
15287 #[test]
15288 fn libsais16x64_lms_late_omp_wrappers_use_block_partition() {
15289 let m = 65_536usize;
15290 let mut scalar = vec![0; 2 * m + 8];
15291 for i in 0..m {
15292 let value = (2 * i) as SaSint;
15293 scalar[i] = if i % 7 == 0 { value | SAINT_MIN } else { value };
15294 }
15295 let mut threaded = scalar.clone();
15296
15297 let mut scalar_state = alloc_thread_state(1).unwrap();
15298 let mut threaded_state = alloc_thread_state(4).unwrap();
15299 let scalar_name =
15300 renumber_lms_suffixes_16u_omp(&mut scalar, m as SaSint, 1, &mut scalar_state);
15301 let threaded_name =
15302 renumber_lms_suffixes_16u_omp(&mut threaded, m as SaSint, 4, &mut threaded_state);
15303 assert_eq!(threaded_name, scalar_name);
15304 assert_eq!(threaded, scalar);
15305
15306 let n = 131_072usize;
15307 let m = 65_536usize;
15308 let fs = 128usize;
15309 let mut scalar = vec![0; n + fs];
15310 for i in 0..(n >> 1) {
15311 let value = (i as SaSint + 1) & SAINT_MAX;
15312 scalar[m + i] = if i % 7 == 0 { value | SAINT_MIN } else { value };
15313 }
15314 let marked_count = (0..(n >> 1)).filter(|i| i % 7 == 0).count();
15315 let mut threaded = scalar.clone();
15316
15317 let mut scalar_state = alloc_thread_state(1).unwrap();
15318 let mut threaded_state = alloc_thread_state(4).unwrap();
15319 gather_marked_lms_suffixes_omp(
15320 &mut scalar,
15321 n as SaSint,
15322 m as SaSint,
15323 fs as SaSint,
15324 1,
15325 &mut scalar_state,
15326 );
15327 gather_marked_lms_suffixes_omp(
15328 &mut threaded,
15329 n as SaSint,
15330 m as SaSint,
15331 fs as SaSint,
15332 4,
15333 &mut threaded_state,
15334 );
15335 assert_eq!(
15336 &threaded[n + fs - marked_count..n + fs],
15337 &scalar[n + fs - marked_count..n + fs]
15338 );
15339
15340 let m = 65_536usize;
15341 let n = 2 * m;
15342 let mut scalar = vec![0; n];
15343 for i in 0..m {
15344 scalar[i] = i as SaSint;
15345 scalar[n - m + i] = 1_000_000 + i as SaSint;
15346 }
15347 let mut threaded = scalar.clone();
15348
15349 reconstruct_lms_suffixes_omp(&mut scalar, n as SaSint, m as SaSint, 1);
15350 reconstruct_lms_suffixes_omp(&mut threaded, n as SaSint, m as SaSint, 4);
15351 assert_eq!(threaded, scalar);
15352 }
15353
15354 #[test]
15355 fn libsais16x64_distinct_lms_helpers_match_c() {
15356 let m = 6;
15357 let mut rust_sa = vec![0; 18];
15358 rust_sa[..m].copy_from_slice(&[
15359 2 | SAINT_MIN,
15360 4 | SAINT_MIN,
15361 6,
15362 8 | SAINT_MIN,
15363 10,
15364 12 | SAINT_MIN,
15365 ]);
15366 let mut c_sa = rust_sa.clone();
15367 let rust_name =
15368 renumber_distinct_lms_suffixes_32s_4k(&mut rust_sa, m as SaSint, 1, 0, m as isize);
15369 let c_name = unsafe {
15370 probe_libsais16x64_renumber_distinct_lms_suffixes_32s_4k(
15371 c_sa.as_mut_ptr(),
15372 m as SaSint,
15373 1,
15374 0,
15375 m as SaSint,
15376 )
15377 };
15378 assert_eq!(rust_name, c_name);
15379 assert_eq!(rust_sa, c_sa);
15380
15381 let mut rust_sa = vec![0; 12];
15382 rust_sa[m..m + 6].copy_from_slice(&[SAINT_MIN | 1, 0, SAINT_MIN | 2, 0, 3, 0]);
15383 let mut c_sa = rust_sa.clone();
15384 mark_distinct_lms_suffixes_32s(&mut rust_sa, m as SaSint, 0, 6);
15385 unsafe {
15386 probe_libsais16x64_mark_distinct_lms_suffixes_32s(c_sa.as_mut_ptr(), m as SaSint, 0, 6);
15387 }
15388 assert_eq!(rust_sa, c_sa);
15389
15390 let mut rust_sa = vec![0; 12];
15391 rust_sa[m..m + 6].copy_from_slice(&[SAINT_MIN | 1, 7, SAINT_MIN | 2, 0, -5, 9]);
15392 let mut c_sa = rust_sa.clone();
15393 clamp_lms_suffixes_length_32s(&mut rust_sa, m as SaSint, 0, 6);
15394 unsafe {
15395 probe_libsais16x64_clamp_lms_suffixes_length_32s(c_sa.as_mut_ptr(), m as SaSint, 0, 6);
15396 }
15397 assert_eq!(rust_sa, c_sa);
15398 }
15399
15400 #[test]
15401 fn libsais16x64_distinct_lms_omp_wrappers_match_c() {
15402 let n = 12;
15403 let m = 6;
15404 let mut rust_sa = vec![0; 18];
15405 rust_sa[..m].copy_from_slice(&[
15406 2 | SAINT_MIN,
15407 4 | SAINT_MIN,
15408 6,
15409 8 | SAINT_MIN,
15410 10,
15411 12 | SAINT_MIN,
15412 ]);
15413 let mut c_sa = rust_sa.clone();
15414 let mut rust_thread_state = alloc_thread_state(1).unwrap();
15415 let rust_name = renumber_distinct_lms_suffixes_32s_4k_omp(
15416 &mut rust_sa,
15417 m as SaSint,
15418 1,
15419 &mut rust_thread_state,
15420 );
15421 let c_name = unsafe {
15422 probe_libsais16x64_renumber_distinct_lms_suffixes_32s_4k_omp(
15423 c_sa.as_mut_ptr(),
15424 m as SaSint,
15425 1,
15426 )
15427 };
15428 assert_eq!(rust_name, c_name);
15429 assert_eq!(rust_sa, c_sa);
15430
15431 let mut rust_sa = vec![0; 18];
15432 rust_sa[m..m + 6].copy_from_slice(&[SAINT_MIN | 1, 0, SAINT_MIN | 2, 0, 3, 0]);
15433 let mut c_sa = rust_sa.clone();
15434 mark_distinct_lms_suffixes_32s_omp(&mut rust_sa, n, m as SaSint, 1);
15435 unsafe {
15436 probe_libsais16x64_mark_distinct_lms_suffixes_32s_omp(
15437 c_sa.as_mut_ptr(),
15438 n,
15439 m as SaSint,
15440 1,
15441 );
15442 }
15443 assert_eq!(rust_sa, c_sa);
15444
15445 let mut rust_sa = vec![0; 18];
15446 rust_sa[m..m + 6].copy_from_slice(&[SAINT_MIN | 1, 7, SAINT_MIN | 2, 0, -5, 9]);
15447 let mut c_sa = rust_sa.clone();
15448 clamp_lms_suffixes_length_32s_omp(&mut rust_sa, n, m as SaSint, 1);
15449 unsafe {
15450 probe_libsais16x64_clamp_lms_suffixes_length_32s_omp(
15451 c_sa.as_mut_ptr(),
15452 n,
15453 m as SaSint,
15454 1,
15455 );
15456 }
15457 assert_eq!(rust_sa, c_sa);
15458
15459 let mut rust_sa = vec![0; 18];
15460 rust_sa[..m].copy_from_slice(&[
15461 2 | SAINT_MIN,
15462 4 | SAINT_MIN,
15463 6,
15464 8 | SAINT_MIN,
15465 10,
15466 12 | SAINT_MIN,
15467 ]);
15468 let mut c_sa = rust_sa.clone();
15469 let mut rust_thread_state = alloc_thread_state(1).unwrap();
15470 let rust_name = renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(
15471 &mut rust_sa,
15472 n,
15473 m as SaSint,
15474 1,
15475 &mut rust_thread_state,
15476 );
15477 let c_name = unsafe {
15478 probe_libsais16x64_renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(
15479 c_sa.as_mut_ptr(),
15480 n,
15481 m as SaSint,
15482 1,
15483 )
15484 };
15485 assert_eq!(rust_name, c_name);
15486 assert_eq!(rust_sa, c_sa);
15487 }
15488
15489 #[test]
15490 fn libsais16x64_distinct_lms_omp_wrappers_use_block_partition() {
15491 let m = 65_536usize;
15492 let mut scalar = vec![0; 2 * m];
15493 for i in 0..m {
15494 let value = (2 * i) as SaSint;
15495 scalar[i] = if i % 7 == 0 { value | SAINT_MIN } else { value };
15496 }
15497 let mut threaded = scalar.clone();
15498
15499 let mut scalar_state = alloc_thread_state(1).unwrap();
15500 let mut threaded_state = alloc_thread_state(4).unwrap();
15501 let scalar_name = renumber_distinct_lms_suffixes_32s_4k_omp(
15502 &mut scalar,
15503 m as SaSint,
15504 1,
15505 &mut scalar_state,
15506 );
15507 let threaded_name = renumber_distinct_lms_suffixes_32s_4k_omp(
15508 &mut threaded,
15509 m as SaSint,
15510 4,
15511 &mut threaded_state,
15512 );
15513 assert_eq!(threaded_name, scalar_name);
15514 assert_eq!(threaded, scalar);
15515
15516 let n = 131_072usize;
15517 let m = 65_536usize;
15518 let mut scalar = vec![0; n];
15519 for i in 0..(n >> 1) {
15520 scalar[m + i] = if i % 5 == 0 {
15521 SAINT_MIN | (i as SaSint + 1)
15522 } else if i % 11 == 0 {
15523 0
15524 } else {
15525 i as SaSint + 1
15526 };
15527 }
15528 let mut threaded = scalar.clone();
15529 mark_distinct_lms_suffixes_32s_omp(&mut scalar, n as SaSint, m as SaSint, 1);
15530 mark_distinct_lms_suffixes_32s_omp(&mut threaded, n as SaSint, m as SaSint, 4);
15531 assert_eq!(&threaded[m..n], &scalar[m..n]);
15532
15533 let mut scalar = vec![0; n];
15534 for i in 0..(n >> 1) {
15535 scalar[m + i] = if i % 5 == 0 {
15536 SAINT_MIN | (i as SaSint + 1)
15537 } else {
15538 i as SaSint + 1
15539 };
15540 }
15541 let mut threaded = scalar.clone();
15542 clamp_lms_suffixes_length_32s_omp(&mut scalar, n as SaSint, m as SaSint, 1);
15543 clamp_lms_suffixes_length_32s_omp(&mut threaded, n as SaSint, m as SaSint, 4);
15544 assert_eq!(&threaded[m..n], &scalar[m..n]);
15545 }
15546
15547 #[test]
15548 fn libsais16x64_unique_nonunique_lms_helpers_match_c() {
15549 let m = 4;
15550 let mut rust_t = vec![0; 12];
15551 let mut rust_sa = vec![0; 12];
15552 rust_sa[..m].copy_from_slice(&[2, 4, 6, 8]);
15553 rust_sa[m + 1] = SAINT_MIN | 11;
15554 rust_sa[m + 2] = 22;
15555 rust_sa[m + 3] = SAINT_MIN | 33;
15556 rust_sa[m + 4] = 44;
15557 let mut c_t = rust_t.clone();
15558 let mut c_sa = rust_sa.clone();
15559
15560 let rust_f = renumber_unique_and_nonunique_lms_suffixes_32s(
15561 &mut rust_t,
15562 &mut rust_sa,
15563 m as SaSint,
15564 0,
15565 0,
15566 m as isize,
15567 );
15568 let c_f = unsafe {
15569 probe_libsais16x64_renumber_unique_and_nonunique_lms_suffixes_32s(
15570 c_t.as_mut_ptr(),
15571 c_sa.as_mut_ptr(),
15572 m as SaSint,
15573 0,
15574 0,
15575 m as SaSint,
15576 )
15577 };
15578 assert_eq!(rust_f, c_f);
15579 assert_eq!(rust_t, c_t);
15580 assert_eq!(rust_sa, c_sa);
15581
15582 let mut rust_sa = vec![0; 10];
15583 rust_sa[m..m + 4].copy_from_slice(&[SAINT_MIN | 3, 4, SAINT_MIN | 5, 6]);
15584 let mut c_sa = rust_sa.clone();
15585 let mut rust_l = m as isize;
15586 let mut rust_r = 10isize;
15587 let mut c_l = rust_l as SaSint;
15588 let mut c_r = rust_r as SaSint;
15589 compact_unique_and_nonunique_lms_suffixes_32s(
15590 &mut rust_sa,
15591 m as SaSint,
15592 &mut rust_l,
15593 &mut rust_r,
15594 0,
15595 4,
15596 );
15597 unsafe {
15598 probe_libsais16x64_compact_unique_and_nonunique_lms_suffixes_32s(
15599 c_sa.as_mut_ptr(),
15600 m as SaSint,
15601 &mut c_l,
15602 &mut c_r,
15603 0,
15604 4,
15605 );
15606 }
15607 assert_eq!(rust_l as SaSint, c_l);
15608 assert_eq!(rust_r as SaSint, c_r);
15609 assert_eq!(rust_sa, c_sa);
15610 }
15611
15612 #[test]
15613 fn libsais16x64_unique_nonunique_lms_omp_wrappers_match_c() {
15614 let n = 8;
15615 let m = 4;
15616 let fs = 4;
15617 let mut rust_t = vec![0; 12];
15618 let mut rust_sa = vec![0; 12];
15619 rust_sa[..m].copy_from_slice(&[2, 4, 6, 8]);
15620 rust_sa[m + 1] = SAINT_MIN | 11;
15621 rust_sa[m + 2] = 22;
15622 rust_sa[m + 3] = SAINT_MIN | 33;
15623 rust_sa[m + 4] = 44;
15624 let mut c_t = rust_t.clone();
15625 let mut c_sa = rust_sa.clone();
15626
15627 let rust_f = renumber_unique_and_nonunique_lms_suffixes_32s_omp(
15628 &mut rust_t,
15629 &mut rust_sa,
15630 m as SaSint,
15631 1,
15632 );
15633 let c_f = unsafe {
15634 probe_libsais16x64_renumber_unique_and_nonunique_lms_suffixes_32s_omp(
15635 c_t.as_mut_ptr(),
15636 c_sa.as_mut_ptr(),
15637 m as SaSint,
15638 1,
15639 )
15640 };
15641 assert_eq!(rust_f, c_f);
15642 assert_eq!(rust_t, c_t);
15643 assert_eq!(rust_sa, c_sa);
15644
15645 let mut rust_sa = vec![0; 12];
15646 rust_sa[m..m + 4].copy_from_slice(&[SAINT_MIN | 3, 4, SAINT_MIN | 5, 6]);
15647 rust_sa[m - 2..m].copy_from_slice(&[101, 102]);
15648 let mut c_sa = rust_sa.clone();
15649 compact_unique_and_nonunique_lms_suffixes_32s_omp(&mut rust_sa, n, m as SaSint, fs, 2, 1);
15650 unsafe {
15651 probe_libsais16x64_compact_unique_and_nonunique_lms_suffixes_32s_omp(
15652 c_sa.as_mut_ptr(),
15653 n,
15654 m as SaSint,
15655 fs,
15656 2,
15657 1,
15658 );
15659 }
15660 assert_eq!(rust_sa, c_sa);
15661
15662 let mut rust_t = vec![0; 12];
15663 let mut rust_sa = vec![0; 12];
15664 rust_sa[..m].copy_from_slice(&[2, 4, 6, 8]);
15665 rust_sa[m + 1] = SAINT_MIN | 11;
15666 rust_sa[m + 2] = 22;
15667 rust_sa[m + 3] = SAINT_MIN | 33;
15668 rust_sa[m + 4] = 44;
15669 let mut c_t = rust_t.clone();
15670 let mut c_sa = rust_sa.clone();
15671 let rust_f = compact_lms_suffixes_32s_omp(&mut rust_t, &mut rust_sa, n, m as SaSint, fs, 1);
15672 let c_f = unsafe {
15673 probe_libsais16x64_compact_lms_suffixes_32s_omp(
15674 c_t.as_mut_ptr(),
15675 c_sa.as_mut_ptr(),
15676 n,
15677 m as SaSint,
15678 fs,
15679 1,
15680 )
15681 };
15682 assert_eq!(rust_f, c_f);
15683 assert_eq!(rust_t, c_t);
15684 assert_eq!(rust_sa, c_sa);
15685 }
15686
15687 #[test]
15688 fn libsais16x64_unique_nonunique_lms_omp_wrappers_use_block_partition() {
15689 let m = 65_536usize;
15690 let mut scalar_t = vec![0; 2 * m];
15691 let mut scalar_sa = vec![0; 2 * m];
15692 for i in 0..m {
15693 scalar_sa[i] = (2 * i) as SaSint;
15694 scalar_sa[m + i] = if i % 5 == 0 {
15695 SAINT_MIN | (i as SaSint + 3)
15696 } else {
15697 i as SaSint + 3
15698 };
15699 }
15700 let mut threaded_t = scalar_t.clone();
15701 let mut threaded_sa = scalar_sa.clone();
15702
15703 let scalar_f = renumber_unique_and_nonunique_lms_suffixes_32s_omp(
15704 &mut scalar_t,
15705 &mut scalar_sa,
15706 m as SaSint,
15707 1,
15708 );
15709 let threaded_f = renumber_unique_and_nonunique_lms_suffixes_32s_omp(
15710 &mut threaded_t,
15711 &mut threaded_sa,
15712 m as SaSint,
15713 4,
15714 );
15715 assert_eq!(threaded_f, scalar_f);
15716 assert_eq!(threaded_t, scalar_t);
15717 assert_eq!(threaded_sa, scalar_sa);
15718
15719 let n = 131_072usize;
15720 let m = 4_096usize;
15721 let fs = 8_192usize;
15722 let mut scalar_sa = vec![0; n + fs];
15723 for i in 0..(n >> 1) {
15724 scalar_sa[m + i] = if i % 32 == 0 {
15725 SAINT_MIN | (i as SaSint + 1)
15726 } else {
15727 i as SaSint + 1
15728 };
15729 }
15730 let f = 1_024usize;
15731 for i in 0..f {
15732 scalar_sa[m - f + i] = 1_000_000 + i as SaSint;
15733 }
15734 let mut threaded_sa = scalar_sa.clone();
15735
15736 compact_unique_and_nonunique_lms_suffixes_32s_omp(
15737 &mut scalar_sa,
15738 n as SaSint,
15739 m as SaSint,
15740 fs as SaSint,
15741 f as SaSint,
15742 1,
15743 );
15744 compact_unique_and_nonunique_lms_suffixes_32s_omp(
15745 &mut threaded_sa,
15746 n as SaSint,
15747 m as SaSint,
15748 fs as SaSint,
15749 f as SaSint,
15750 4,
15751 );
15752 assert_eq!(&threaded_sa[..m], &scalar_sa[..m]);
15753 assert_eq!(
15754 &threaded_sa[n + fs - m..n + fs],
15755 &scalar_sa[n + fs - m..n + fs]
15756 );
15757 }
15758
15759 #[test]
15760 fn libsais16x64_merge_lms_helpers_match_c() {
15761 let n = 10;
15762 let m = 3;
15763 let mut rust_t = vec![0; n as usize];
15764 rust_t[1] = SAINT_MIN | 11;
15765 rust_t[3] = SAINT_MIN | 22;
15766 rust_t[7] = SAINT_MIN | 33;
15767 let mut rust_sa = vec![0; n as usize];
15768 rust_sa[6..10].copy_from_slice(&[2, 5, 8, 9]);
15769 let mut c_t = rust_t.clone();
15770 let mut c_sa = rust_sa.clone();
15771 merge_unique_lms_suffixes_32s(&mut rust_t, &mut rust_sa, n, m, 0, 0, n as isize);
15772 unsafe {
15773 probe_libsais16x64_merge_unique_lms_suffixes_32s(
15774 c_t.as_mut_ptr(),
15775 c_sa.as_mut_ptr(),
15776 n,
15777 m,
15778 0,
15779 0,
15780 n,
15781 );
15782 }
15783 assert_eq!(rust_t, c_t);
15784 assert_eq!(rust_sa, c_sa);
15785
15786 let n = 10;
15787 let m = 5;
15788 let mut rust_sa = vec![9, 0, 8, 0, 0, 7, 31, 32, 33, 34];
15789 let mut c_sa = rust_sa.clone();
15790 merge_nonunique_lms_suffixes_32s(&mut rust_sa, n, m, 2, 0, m as isize);
15791 unsafe {
15792 probe_libsais16x64_merge_nonunique_lms_suffixes_32s(c_sa.as_mut_ptr(), n, m, 2, 0, m);
15793 }
15794 assert_eq!(rust_sa, c_sa);
15795 }
15796
15797 #[test]
15798 fn libsais16x64_merge_lms_omp_wrappers_match_c() {
15799 let n = 12;
15800 let m = 4;
15801 let f = 2;
15802 let mut rust_t = vec![0; n as usize];
15803 rust_t[1] = SAINT_MIN | 11;
15804 rust_t[5] = SAINT_MIN | 22;
15805 let mut rust_sa = vec![0; n as usize];
15806 rust_sa[1] = 41;
15807 rust_sa[7..12].copy_from_slice(&[2, 6, 21, 22, 23]);
15808 let mut c_t = rust_t.clone();
15809 let mut c_sa = rust_sa.clone();
15810 merge_unique_lms_suffixes_32s_omp(&mut rust_t, &mut rust_sa, n, m, 1);
15811 unsafe {
15812 probe_libsais16x64_merge_unique_lms_suffixes_32s_omp(
15813 c_t.as_mut_ptr(),
15814 c_sa.as_mut_ptr(),
15815 n,
15816 m,
15817 1,
15818 );
15819 }
15820 assert_eq!(rust_t, c_t);
15821 assert_eq!(rust_sa, c_sa);
15822
15823 let mut rust_sa = vec![0, 41, 1, 0, 55, 66, 77, 2, 6, 21, 22, 23];
15824 let mut c_sa = rust_sa.clone();
15825 merge_nonunique_lms_suffixes_32s_omp(&mut rust_sa, n, m, f, 1);
15826 unsafe {
15827 probe_libsais16x64_merge_nonunique_lms_suffixes_32s_omp(c_sa.as_mut_ptr(), n, m, f, 1);
15828 }
15829 assert_eq!(rust_sa, c_sa);
15830
15831 let mut rust_t = vec![0; n as usize];
15832 rust_t[1] = SAINT_MIN | 11;
15833 rust_t[5] = SAINT_MIN | 22;
15834 let mut rust_sa = vec![0; n as usize];
15835 rust_sa[1] = 41;
15836 rust_sa[7..12].copy_from_slice(&[2, 6, 21, 22, 23]);
15837 let mut c_t = rust_t.clone();
15838 let mut c_sa = rust_sa.clone();
15839 merge_compacted_lms_suffixes_32s_omp(&mut rust_t, &mut rust_sa, n, m, f, 1);
15840 unsafe {
15841 probe_libsais16x64_merge_compacted_lms_suffixes_32s_omp(
15842 c_t.as_mut_ptr(),
15843 c_sa.as_mut_ptr(),
15844 n,
15845 m,
15846 f,
15847 1,
15848 );
15849 }
15850 assert_eq!(rust_t, c_t);
15851 assert_eq!(rust_sa, c_sa);
15852 }
15853
15854 #[test]
15855 fn libsais16x64_merge_lms_omp_wrappers_use_block_partition() {
15856 let n = 65_536usize;
15857 let m = 10_000usize;
15858 let mut scalar_t = vec![0; n];
15859 for i in (0..n).step_by(17) {
15860 scalar_t[i] = SAINT_MIN | (i as SaSint + 1);
15861 }
15862 let unique_count = scalar_t.iter().filter(|&&value| value < 0).count();
15863 let mut scalar_sa = vec![0; n];
15864 let source = n - m - 1;
15865 for i in 0..=unique_count {
15866 scalar_sa[source + i] = ((i * 13 + 7) % n) as SaSint;
15867 }
15868 let mut threaded_t = scalar_t.clone();
15869 let mut threaded_sa = scalar_sa.clone();
15870
15871 merge_unique_lms_suffixes_32s_omp(
15872 &mut scalar_t,
15873 &mut scalar_sa,
15874 n as SaSint,
15875 m as SaSint,
15876 1,
15877 );
15878 merge_unique_lms_suffixes_32s_omp(
15879 &mut threaded_t,
15880 &mut threaded_sa,
15881 n as SaSint,
15882 m as SaSint,
15883 4,
15884 );
15885 assert_eq!(threaded_t, scalar_t);
15886 assert_eq!(threaded_sa, scalar_sa);
15887
15888 let n = 131_072usize;
15889 let m = 65_536usize;
15890 let f = 100usize;
15891 let mut scalar_sa = vec![1; n];
15892 for i in (0..m).step_by(9) {
15893 scalar_sa[i] = 0;
15894 }
15895 let zero_count = scalar_sa[..m].iter().filter(|&&value| value == 0).count();
15896 let source = n - m - 1 + f;
15897 for i in 0..=zero_count {
15898 scalar_sa[source + i] = 2_000_000 + i as SaSint;
15899 }
15900 let mut threaded_sa = scalar_sa.clone();
15901
15902 merge_nonunique_lms_suffixes_32s_omp(
15903 &mut scalar_sa,
15904 n as SaSint,
15905 m as SaSint,
15906 f as SaSint,
15907 1,
15908 );
15909 merge_nonunique_lms_suffixes_32s_omp(
15910 &mut threaded_sa,
15911 n as SaSint,
15912 m as SaSint,
15913 f as SaSint,
15914 4,
15915 );
15916 assert_eq!(threaded_sa, scalar_sa);
15917 }
15918
15919 #[test]
15920 fn libsais16x64_radix_sort_lms_suffixes_32s_match_c() {
15921 let t = vec![0, 1, 2, 3, 1, 2, 3, 0];
15922 let mut rust_sa = vec![0, 0, 0, 0, 0, 1, 2, 3];
15923 let mut c_sa = rust_sa.clone();
15924 let mut rust_bucket = vec![0, 6, 7, 8];
15925 let mut c_bucket = rust_bucket.clone();
15926 radix_sort_lms_suffixes_32s_6k(&t, &mut rust_sa, &mut rust_bucket, 5, 3);
15927 unsafe {
15928 probe_libsais16x64_radix_sort_lms_suffixes_32s_6k(
15929 t.as_ptr(),
15930 c_sa.as_mut_ptr(),
15931 c_bucket.as_mut_ptr(),
15932 5,
15933 3,
15934 );
15935 }
15936 assert_eq!(rust_sa, c_sa);
15937 assert_eq!(rust_bucket, c_bucket);
15938
15939 let mut rust_sa = vec![0, 0, 0, 0, 0, 1, 2, 3];
15940 let mut c_sa = rust_sa.clone();
15941 let mut rust_bucket = vec![0, 0, 6, 0, 7, 0, 8, 0];
15942 let mut c_bucket = rust_bucket.clone();
15943 radix_sort_lms_suffixes_32s_2k(&t, &mut rust_sa, &mut rust_bucket, 5, 3);
15944 unsafe {
15945 probe_libsais16x64_radix_sort_lms_suffixes_32s_2k(
15946 t.as_ptr(),
15947 c_sa.as_mut_ptr(),
15948 c_bucket.as_mut_ptr(),
15949 5,
15950 3,
15951 );
15952 }
15953 assert_eq!(rust_sa, c_sa);
15954 assert_eq!(rust_bucket, c_bucket);
15955
15956 let mut cache = vec![ThreadCache::default(); 8];
15957 let sa = vec![0, 0, 0, 0, 0, 1, 2, 3];
15958 radix_sort_lms_suffixes_32s_block_gather(&t, &sa, &mut cache, 5, 3);
15959 assert_eq!(cache[5].index, 1);
15960 assert_eq!(cache[5].symbol, 1);
15961 assert_eq!(cache[6].index, 2);
15962 assert_eq!(cache[6].symbol, 2);
15963 assert_eq!(cache[7].index, 3);
15964 assert_eq!(cache[7].symbol, 3);
15965
15966 let mut bucket = vec![0, 6, 7, 8];
15967 radix_sort_lms_suffixes_32s_6k_block_sort(&mut bucket, &mut cache, 5, 3);
15968 assert_eq!(bucket, vec![0, 5, 6, 7]);
15969 assert_eq!(cache[5].symbol, 5);
15970 assert_eq!(cache[6].symbol, 6);
15971 assert_eq!(cache[7].symbol, 7);
15972
15973 let mut cache = vec![ThreadCache::default(); 8];
15974 radix_sort_lms_suffixes_32s_block_gather(&t, &sa, &mut cache, 5, 3);
15975 let mut bucket = vec![0, 0, 6, 0, 7, 0, 8, 0];
15976 radix_sort_lms_suffixes_32s_2k_block_sort(&mut bucket, &mut cache, 5, 3);
15977 assert_eq!(bucket, vec![0, 0, 5, 0, 6, 0, 7, 0]);
15978 assert_eq!(cache[5].symbol, 5);
15979 assert_eq!(cache[6].symbol, 6);
15980 assert_eq!(cache[7].symbol, 7);
15981
15982 let mut rust_sa = vec![0, 0, 0, 0, 0, 1, 2, 3];
15983 let mut c_sa = rust_sa.clone();
15984 let mut rust_bucket = vec![0, 6, 7, 8];
15985 let mut c_bucket = rust_bucket.clone();
15986 radix_sort_lms_suffixes_32s_6k_omp(&t, &mut rust_sa, 8, 4, &mut rust_bucket, 1);
15987 unsafe {
15988 probe_libsais16x64_radix_sort_lms_suffixes_32s_6k_omp(
15989 t.as_ptr(),
15990 c_sa.as_mut_ptr(),
15991 8,
15992 4,
15993 c_bucket.as_mut_ptr(),
15994 1,
15995 );
15996 }
15997 assert_eq!(rust_sa, c_sa);
15998 assert_eq!(rust_bucket, c_bucket);
15999
16000 let mut rust_sa = vec![0, 0, 0, 0, 0, 1, 2, 3];
16001 let mut c_sa = rust_sa.clone();
16002 let mut rust_bucket = vec![0, 0, 6, 0, 7, 0, 8, 0];
16003 let mut c_bucket = rust_bucket.clone();
16004 radix_sort_lms_suffixes_32s_2k_omp(&t, &mut rust_sa, 8, 4, &mut rust_bucket, 1);
16005 unsafe {
16006 probe_libsais16x64_radix_sort_lms_suffixes_32s_2k_omp(
16007 t.as_ptr(),
16008 c_sa.as_mut_ptr(),
16009 8,
16010 4,
16011 c_bucket.as_mut_ptr(),
16012 1,
16013 );
16014 }
16015 assert_eq!(rust_sa, c_sa);
16016 assert_eq!(rust_bucket, c_bucket);
16017
16018 let t = vec![2, 1, 3, 1, 0];
16019 let mut rust_sa = vec![0; t.len()];
16020 let mut c_sa = rust_sa.clone();
16021 let mut rust_bucket = vec![0, 2, 4, 5];
16022 let mut c_bucket = rust_bucket.clone();
16023 let rust_m =
16024 radix_sort_lms_suffixes_32s_1k(&t, &mut rust_sa, t.len() as SaSint, &mut rust_bucket);
16025 let c_m = unsafe {
16026 probe_libsais16x64_radix_sort_lms_suffixes_32s_1k(
16027 t.as_ptr(),
16028 c_sa.as_mut_ptr(),
16029 t.len() as SaSint,
16030 c_bucket.as_mut_ptr(),
16031 )
16032 };
16033 assert_eq!(rust_m, c_m);
16034 assert_eq!(rust_sa, c_sa);
16035 assert_eq!(rust_bucket, c_bucket);
16036 }
16037
16038 #[test]
16039 fn libsais16x64_radix_sort_set_markers_32s_match_c() {
16040 let mut rust_sa = vec![0; 8];
16041 let mut c_sa = rust_sa.clone();
16042 let mut induction_bucket = vec![1, 3, 5, 7];
16043 radix_sort_set_markers_32s_6k(&mut rust_sa, &induction_bucket, 0, 4);
16044 unsafe {
16045 probe_libsais16x64_radix_sort_set_markers_32s_6k(
16046 c_sa.as_mut_ptr(),
16047 induction_bucket.as_mut_ptr(),
16048 0,
16049 4,
16050 );
16051 }
16052 assert_eq!(rust_sa, c_sa);
16053
16054 let mut rust_sa = vec![0; 8];
16055 let mut c_sa = rust_sa.clone();
16056 radix_sort_set_markers_32s_6k_omp(&mut rust_sa, 5, &induction_bucket, 1);
16057 unsafe {
16058 probe_libsais16x64_radix_sort_set_markers_32s_6k_omp(
16059 c_sa.as_mut_ptr(),
16060 5,
16061 induction_bucket.as_mut_ptr(),
16062 1,
16063 );
16064 }
16065 assert_eq!(rust_sa, c_sa);
16066
16067 let mut rust_sa = vec![0; 8];
16068 let mut c_sa = rust_sa.clone();
16069 let mut induction_bucket = vec![1, 0, 3, 0, 5, 0, 7, 0];
16070 radix_sort_set_markers_32s_4k(&mut rust_sa, &induction_bucket, 0, 4);
16071 unsafe {
16072 probe_libsais16x64_radix_sort_set_markers_32s_4k(
16073 c_sa.as_mut_ptr(),
16074 induction_bucket.as_mut_ptr(),
16075 0,
16076 4,
16077 );
16078 }
16079 assert_eq!(rust_sa, c_sa);
16080
16081 let mut rust_sa = vec![0; 8];
16082 let mut c_sa = rust_sa.clone();
16083 radix_sort_set_markers_32s_4k_omp(&mut rust_sa, 5, &induction_bucket, 1);
16084 unsafe {
16085 probe_libsais16x64_radix_sort_set_markers_32s_4k_omp(
16086 c_sa.as_mut_ptr(),
16087 5,
16088 induction_bucket.as_mut_ptr(),
16089 1,
16090 );
16091 }
16092 assert_eq!(rust_sa, c_sa);
16093 }
16094
16095 #[test]
16096 fn libsais16x64_radix_sort_set_markers_32s_omp_partitions_large_inputs() {
16097 let k = 65_600usize;
16098 let induction_bucket_6k: Vec<SaSint> = (0..k).map(|i| i as SaSint).collect();
16099 let mut single = vec![0; k];
16100 let mut threaded = vec![0; k];
16101 radix_sort_set_markers_32s_6k_omp(&mut single, k as SaSint, &induction_bucket_6k, 1);
16102 radix_sort_set_markers_32s_6k_omp(&mut threaded, k as SaSint, &induction_bucket_6k, 4);
16103 assert_eq!(threaded, single);
16104
16105 let mut induction_bucket_4k = vec![0; 2 * k];
16106 for i in 0..k {
16107 induction_bucket_4k[buckets_index2(i, 0)] = i as SaSint;
16108 }
16109 let mut single = vec![0; k];
16110 let mut threaded = vec![0; k];
16111 radix_sort_set_markers_32s_4k_omp(&mut single, k as SaSint, &induction_bucket_4k, 1);
16112 radix_sort_set_markers_32s_4k_omp(&mut threaded, k as SaSint, &induction_bucket_4k, 4);
16113 assert_eq!(threaded, single);
16114 }
16115
16116 #[test]
16117 fn libsais16x64_partial_sorting_32s_helpers_match_c() {
16118 let k = 3;
16119 let mut rust_sa = vec![0, SAINT_MIN, 2, SAINT_MIN, 4, SAINT_MIN];
16120 let mut c_sa = rust_sa.clone();
16121 let mut buckets = vec![0; 6 * k as usize];
16122 buckets[buckets_index4(1, 0)] = 3;
16123 buckets[buckets_index4(2, 0)] = 6;
16124 buckets[4 * k as usize + buckets_index2(0, 0)] = 0;
16125 buckets[4 * k as usize + buckets_index2(1, 0)] = 1;
16126 partial_sorting_shift_markers_32s_6k_omp(&mut rust_sa, k, &buckets, 1);
16127 unsafe {
16128 probe_libsais16x64_partial_sorting_shift_markers_32s_6k_omp(
16129 c_sa.as_mut_ptr(),
16130 k,
16131 buckets.as_ptr(),
16132 1,
16133 );
16134 }
16135 assert_eq!(rust_sa, c_sa);
16136
16137 let mut rust_sa = vec![
16138 1 | SUFFIX_GROUP_MARKER,
16139 2,
16140 3 | SUFFIX_GROUP_MARKER,
16141 4 | SUFFIX_GROUP_MARKER,
16142 5,
16143 6,
16144 ];
16145 let mut c_sa = rust_sa.clone();
16146 partial_sorting_shift_markers_32s_4k(&mut rust_sa, 6);
16147 unsafe { probe_libsais16x64_partial_sorting_shift_markers_32s_4k(c_sa.as_mut_ptr(), 6) };
16148 assert_eq!(rust_sa, c_sa);
16149
16150 let mut rust_buckets = vec![0; 6 * k as usize];
16151 for (i, value) in rust_buckets[4 * k as usize..].iter_mut().enumerate() {
16152 *value = 100 + i as SaSint;
16153 }
16154 let mut c_buckets = rust_buckets.clone();
16155 partial_sorting_shift_buckets_32s_6k(k, &mut rust_buckets);
16156 unsafe {
16157 probe_libsais16x64_partial_sorting_shift_buckets_32s_6k(k, c_buckets.as_mut_ptr())
16158 };
16159 assert_eq!(rust_buckets, c_buckets);
16160
16161 let mut rust_sa = vec![1 | SUFFIX_GROUP_MARKER, -3, 5 | SUFFIX_GROUP_MARKER, -7];
16162 let mut c_sa = rust_sa.clone();
16163 let rust_l = partial_sorting_gather_lms_suffixes_32s_4k(&mut rust_sa, 0, 4);
16164 let c_l = unsafe {
16165 probe_libsais16x64_partial_sorting_gather_lms_suffixes_32s_4k(c_sa.as_mut_ptr(), 0, 4)
16166 };
16167 assert_eq!(rust_l, c_l);
16168 assert_eq!(rust_sa, c_sa);
16169
16170 let mut rust_sa = vec![1, -3, 5, -7];
16171 let mut c_sa = rust_sa.clone();
16172 let rust_l = partial_sorting_gather_lms_suffixes_32s_1k(&mut rust_sa, 0, 4);
16173 let c_l = unsafe {
16174 probe_libsais16x64_partial_sorting_gather_lms_suffixes_32s_1k(c_sa.as_mut_ptr(), 0, 4)
16175 };
16176 assert_eq!(rust_l, c_l);
16177 assert_eq!(rust_sa, c_sa);
16178
16179 let mut rust_state = alloc_thread_state(1).unwrap();
16180 let mut rust_sa = vec![1 | SUFFIX_GROUP_MARKER, -3, 5 | SUFFIX_GROUP_MARKER, -7];
16181 let mut c_sa = rust_sa.clone();
16182 partial_sorting_gather_lms_suffixes_32s_4k_omp(&mut rust_sa, 4, 1, &mut rust_state);
16183 unsafe {
16184 probe_libsais16x64_partial_sorting_gather_lms_suffixes_32s_4k_omp(
16185 c_sa.as_mut_ptr(),
16186 4,
16187 1,
16188 );
16189 }
16190 assert_eq!(rust_sa, c_sa);
16191
16192 let mut rust_state = alloc_thread_state(1).unwrap();
16193 let mut rust_sa = vec![1, -3, 5, -7];
16194 let mut c_sa = rust_sa.clone();
16195 partial_sorting_gather_lms_suffixes_32s_1k_omp(&mut rust_sa, 4, 1, &mut rust_state);
16196 unsafe {
16197 probe_libsais16x64_partial_sorting_gather_lms_suffixes_32s_1k_omp(
16198 c_sa.as_mut_ptr(),
16199 4,
16200 1,
16201 );
16202 }
16203 assert_eq!(rust_sa, c_sa);
16204 }
16205
16206 #[test]
16207 fn libsais16x64_partial_sorting_gather_lms_suffixes_32s_omp_uses_block_partition() {
16208 let n = 65_536usize;
16209 let mut base_4k = vec![0; n];
16210 let mut base_1k = vec![0; n];
16211 for i in 0..n {
16212 let value = (i as SaSint + 1) & SAINT_MAX;
16213 base_4k[i] = if i % 7 == 0 {
16214 value | SAINT_MIN | SUFFIX_GROUP_MARKER
16215 } else if i % 11 == 0 {
16216 value | SUFFIX_GROUP_MARKER
16217 } else {
16218 value
16219 };
16220 base_1k[i] = if i % 7 == 0 { value | SAINT_MIN } else { value };
16221 }
16222 let lms_count = base_1k.iter().filter(|&&v| v < 0).count();
16223
16224 let mut scalar = base_4k.clone();
16225 let mut threaded = base_4k;
16226 let mut scalar_state = alloc_thread_state(1).unwrap();
16227 let mut threaded_state = alloc_thread_state(4).unwrap();
16228 partial_sorting_gather_lms_suffixes_32s_4k_omp(
16229 &mut scalar,
16230 n as SaSint,
16231 1,
16232 &mut scalar_state,
16233 );
16234 partial_sorting_gather_lms_suffixes_32s_4k_omp(
16235 &mut threaded,
16236 n as SaSint,
16237 4,
16238 &mut threaded_state,
16239 );
16240 assert_eq!(&threaded[..lms_count], &scalar[..lms_count]);
16241
16242 let mut scalar = base_1k.clone();
16243 let mut threaded = base_1k;
16244 partial_sorting_gather_lms_suffixes_32s_1k_omp(
16245 &mut scalar,
16246 n as SaSint,
16247 1,
16248 &mut scalar_state,
16249 );
16250 partial_sorting_gather_lms_suffixes_32s_1k_omp(
16251 &mut threaded,
16252 n as SaSint,
16253 4,
16254 &mut threaded_state,
16255 );
16256 assert_eq!(&threaded[..lms_count], &scalar[..lms_count]);
16257 }
16258
16259 #[test]
16260 fn libsais16x64_partial_sorting_32s_block_helpers_behave_like_upstream_shapes() {
16261 let t = vec![0, 1, 2, 1, 0];
16262 let k = 3;
16263
16264 let mut sa = vec![0, 4 | SAINT_MIN, 0];
16265 let mut cache = vec![ThreadCache::default(); sa.len()];
16266 partial_sorting_scan_right_to_left_32s_6k_block_gather(&t, &mut sa, &mut cache, 1, 1);
16267 assert_eq!(cache[1].index, 4 | SAINT_MIN);
16268 assert_eq!(cache[1].symbol, buckets_index4(1, 1) as SaSint);
16269
16270 let mut sa = vec![0, 4 | SUFFIX_GROUP_MARKER, 0];
16271 let mut cache = vec![ThreadCache::default(); sa.len()];
16272 partial_sorting_scan_right_to_left_32s_4k_block_gather(&t, &mut sa, &mut cache, 1, 1);
16273 assert_eq!(sa[1], 0);
16274 assert_eq!(cache[1].index, 4 | SUFFIX_GROUP_MARKER);
16275 assert_eq!(cache[1].symbol, buckets_index2(1, 1) as SaSint);
16276
16277 let mut sa = vec![0, 4, 0];
16278 let mut cache = vec![ThreadCache::default(); sa.len()];
16279 partial_sorting_scan_right_to_left_32s_1k_block_gather(&t, &mut sa, &mut cache, 1, 1);
16280 assert_eq!(sa[1], 0);
16281 assert_eq!(cache[1].index, 3 | SAINT_MIN);
16282 assert_eq!(cache[1].symbol, 1);
16283
16284 let mut sa = vec![4 | SAINT_MIN, 0, 0];
16285 let mut cache = vec![ThreadCache::default(); sa.len()];
16286 partial_sorting_scan_left_to_right_32s_6k_block_gather(&t, &mut sa, &mut cache, 0, 1);
16287 assert_eq!(cache[0].index, 4 | SAINT_MIN);
16288 assert_eq!(cache[0].symbol, buckets_index4(1, 1) as SaSint);
16289
16290 let mut sa = vec![4 | SUFFIX_GROUP_MARKER, 0, 0];
16291 let mut cache = vec![ThreadCache::default(); sa.len()];
16292 partial_sorting_scan_left_to_right_32s_4k_block_gather(&t, &mut sa, &mut cache, 0, 1);
16293 assert_eq!(sa[0], 0);
16294 assert_eq!(cache[0].index, 4 | SUFFIX_GROUP_MARKER);
16295 assert_eq!(cache[0].symbol, buckets_index2(1, 0) as SaSint);
16296
16297 let mut sa = vec![4, 0, 0];
16298 let mut cache = vec![ThreadCache::default(); sa.len()];
16299 partial_sorting_scan_left_to_right_32s_1k_block_gather(&t, &mut sa, &mut cache, 0, 1);
16300 assert_eq!(sa[0], 0);
16301 assert_eq!(cache[0].index, 3);
16302 assert_eq!(cache[0].symbol, 1);
16303
16304 let mut cache = vec![ThreadCache::default(); 3];
16305 cache[1].index = 4 | SAINT_MIN;
16306 cache[1].symbol = buckets_index4(1, 1) as SaSint;
16307 let mut buckets = vec![0; 4 * k];
16308 buckets[buckets_index4(1, 1)] = 2;
16309 let d = partial_sorting_scan_right_to_left_32s_6k_block_sort(
16310 &t,
16311 &mut buckets,
16312 0,
16313 &mut cache,
16314 1,
16315 1,
16316 );
16317 assert_eq!(d, 1);
16318 assert_eq!(cache[1].index, 3 | SAINT_MIN);
16319 assert_eq!(buckets[buckets_index4(1, 1)], 1);
16320 assert_eq!(buckets[buckets_index4(1, 1) + 2], 1);
16321
16322 let mut cache = vec![ThreadCache::default(); 3];
16323 cache[0].index = 4 | SAINT_MIN;
16324 cache[0].symbol = buckets_index4(1, 1) as SaSint;
16325 let mut buckets = vec![0; 4 * k];
16326 buckets[buckets_index4(1, 1)] = 1;
16327 let d = partial_sorting_scan_left_to_right_32s_6k_block_sort(
16328 &t,
16329 &mut buckets,
16330 0,
16331 &mut cache,
16332 0,
16333 1,
16334 );
16335 assert_eq!(d, 1);
16336 assert_eq!(cache[0].index, 3 | SAINT_MIN);
16337 assert_eq!(buckets[buckets_index4(1, 1)], 2);
16338 assert_eq!(buckets[buckets_index4(1, 1) + 2], 1);
16339
16340 let mut cache = vec![ThreadCache::default(); 3];
16341 cache[1].index = 4 | SUFFIX_GROUP_MARKER;
16342 cache[1].symbol = buckets_index2(1, 1) as SaSint;
16343 let mut buckets = vec![0; 4 * k];
16344 buckets[3 * k + 1] = 2;
16345 let d = partial_sorting_scan_right_to_left_32s_4k_block_sort(
16346 &t,
16347 k as SaSint,
16348 &mut buckets,
16349 0,
16350 &mut cache,
16351 1,
16352 1,
16353 );
16354 assert_eq!(d, 1);
16355 assert_eq!(cache[1].symbol, 1);
16356 assert_eq!(buckets[3 * k + 1], 1);
16357
16358 let mut cache = vec![ThreadCache::default(); 3];
16359 cache[0].index = 4 | SUFFIX_GROUP_MARKER;
16360 cache[0].symbol = buckets_index2(1, 0) as SaSint;
16361 let mut buckets = vec![0; 4 * k];
16362 buckets[2 * k + 1] = 1;
16363 let d = partial_sorting_scan_left_to_right_32s_4k_block_sort(
16364 &t,
16365 k as SaSint,
16366 &mut buckets,
16367 0,
16368 &mut cache,
16369 0,
16370 1,
16371 );
16372 assert_eq!(d, 1);
16373 assert_eq!(cache[0].symbol, 1);
16374 assert_eq!(buckets[2 * k + 1], 2);
16375
16376 let mut cache = vec![ThreadCache::default(); 3];
16377 cache[1].index = 4;
16378 cache[1].symbol = 1;
16379 let mut buckets = vec![0; k];
16380 buckets[1] = 2;
16381 partial_sorting_scan_right_to_left_32s_1k_block_sort(&t, &mut buckets, &mut cache, 1, 1);
16382 assert_eq!(cache[1].symbol, 1);
16383 assert_eq!(buckets[1], 1);
16384
16385 let mut cache = vec![ThreadCache::default(); 3];
16386 cache[0].index = 4;
16387 cache[0].symbol = 1;
16388 let mut buckets = vec![0; k];
16389 buckets[1] = 1;
16390 partial_sorting_scan_left_to_right_32s_1k_block_sort(&t, &mut buckets, &mut cache, 0, 1);
16391 assert_eq!(cache[0].symbol, 1);
16392 assert_eq!(buckets[1], 2);
16393 }
16394
16395 #[test]
16396 fn libsais16x64_partial_sorting_scan_32s_match_c() {
16397 let t = vec![0, 1, 2, 1, 3, 0];
16398 let k = 4;
16399
16400 let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 9, 9];
16401 let mut c_sa = rust_sa.clone();
16402 let mut rust_buckets = vec![0; 6 * k as usize];
16403 rust_buckets[buckets_index4(2, 0)] = 4;
16404 rust_buckets[buckets_index4(1, 1)] = 5;
16405 let mut c_buckets = rust_buckets.clone();
16406 let rust_d =
16407 partial_sorting_scan_left_to_right_32s_6k(&t, &mut rust_sa, &mut rust_buckets, 0, 0, 2);
16408 let c_d = unsafe {
16409 probe_libsais16x64_partial_sorting_scan_left_to_right_32s_6k(
16410 t.as_ptr(),
16411 c_sa.as_mut_ptr(),
16412 c_buckets.as_mut_ptr(),
16413 0,
16414 0,
16415 2,
16416 )
16417 };
16418 assert_eq!(rust_d, c_d);
16419 assert_eq!(rust_sa, c_sa);
16420 assert_eq!(rust_buckets, c_buckets);
16421
16422 let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 9, 9];
16423 let mut c_sa = rust_sa.clone();
16424 let mut rust_buckets = vec![0; 4 * k as usize];
16425 rust_buckets[2 * k as usize + 2] = 4;
16426 rust_buckets[2 * k as usize + 1] = 5;
16427 let mut c_buckets = rust_buckets.clone();
16428 let rust_d = partial_sorting_scan_left_to_right_32s_4k(
16429 &t,
16430 &mut rust_sa,
16431 k,
16432 &mut rust_buckets,
16433 0,
16434 0,
16435 2,
16436 );
16437 let c_d = unsafe {
16438 probe_libsais16x64_partial_sorting_scan_left_to_right_32s_4k(
16439 t.as_ptr(),
16440 c_sa.as_mut_ptr(),
16441 k,
16442 c_buckets.as_mut_ptr(),
16443 0,
16444 0,
16445 2,
16446 )
16447 };
16448 assert_eq!(rust_d, c_d);
16449 assert_eq!(rust_sa, c_sa);
16450 assert_eq!(rust_buckets, c_buckets);
16451
16452 let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 9, 9];
16453 let mut c_sa = rust_sa.clone();
16454 let mut rust_buckets = vec![0, 5, 4, 0];
16455 let mut c_buckets = rust_buckets.clone();
16456 partial_sorting_scan_left_to_right_32s_1k(&t, &mut rust_sa, &mut rust_buckets, 0, 2);
16457 unsafe {
16458 probe_libsais16x64_partial_sorting_scan_left_to_right_32s_1k(
16459 t.as_ptr(),
16460 c_sa.as_mut_ptr(),
16461 c_buckets.as_mut_ptr(),
16462 0,
16463 2,
16464 );
16465 }
16466 assert_eq!(rust_sa, c_sa);
16467 assert_eq!(rust_buckets, c_buckets);
16468
16469 let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 9, 9];
16470 let mut c_sa = rust_sa.clone();
16471 let mut rust_buckets = vec![0; 6 * k as usize];
16472 rust_buckets[buckets_index4(2, 0)] = 7;
16473 rust_buckets[buckets_index4(1, 1)] = 6;
16474 let mut c_buckets = rust_buckets.clone();
16475 let rust_d =
16476 partial_sorting_scan_right_to_left_32s_6k(&t, &mut rust_sa, &mut rust_buckets, 0, 0, 2);
16477 let c_d = unsafe {
16478 probe_libsais16x64_partial_sorting_scan_right_to_left_32s_6k(
16479 t.as_ptr(),
16480 c_sa.as_mut_ptr(),
16481 c_buckets.as_mut_ptr(),
16482 0,
16483 0,
16484 2,
16485 )
16486 };
16487 assert_eq!(rust_d, c_d);
16488 assert_eq!(rust_sa, c_sa);
16489 assert_eq!(rust_buckets, c_buckets);
16490
16491 let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 9, 9];
16492 let mut c_sa = rust_sa.clone();
16493 let mut rust_buckets = vec![0; 4 * k as usize];
16494 rust_buckets[3 * k as usize + 2] = 7;
16495 rust_buckets[3 * k as usize + 1] = 6;
16496 let mut c_buckets = rust_buckets.clone();
16497 let rust_d = partial_sorting_scan_right_to_left_32s_4k(
16498 &t,
16499 &mut rust_sa,
16500 k,
16501 &mut rust_buckets,
16502 0,
16503 0,
16504 2,
16505 );
16506 let c_d = unsafe {
16507 probe_libsais16x64_partial_sorting_scan_right_to_left_32s_4k(
16508 t.as_ptr(),
16509 c_sa.as_mut_ptr(),
16510 k,
16511 c_buckets.as_mut_ptr(),
16512 0,
16513 0,
16514 2,
16515 )
16516 };
16517 assert_eq!(rust_d, c_d);
16518 assert_eq!(rust_sa, c_sa);
16519 assert_eq!(rust_buckets, c_buckets);
16520
16521 let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 9, 9];
16522 let mut c_sa = rust_sa.clone();
16523 let mut rust_buckets = vec![0, 6, 7, 0];
16524 let mut c_buckets = rust_buckets.clone();
16525 partial_sorting_scan_right_to_left_32s_1k(&t, &mut rust_sa, &mut rust_buckets, 0, 2);
16526 unsafe {
16527 probe_libsais16x64_partial_sorting_scan_right_to_left_32s_1k(
16528 t.as_ptr(),
16529 c_sa.as_mut_ptr(),
16530 c_buckets.as_mut_ptr(),
16531 0,
16532 2,
16533 );
16534 }
16535 assert_eq!(rust_sa, c_sa);
16536 assert_eq!(rust_buckets, c_buckets);
16537
16538 let mut state = alloc_thread_state(1).unwrap();
16539 let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 7, 9];
16540 let mut c_sa = rust_sa.clone();
16541 let mut rust_buckets = vec![0; 6 * k as usize];
16542 rust_buckets[buckets_index4(2, 0)] = 4;
16543 rust_buckets[buckets_index4(1, 1)] = 5;
16544 rust_buckets[buckets_index4(3, 0)] = 6;
16545 let mut c_buckets = rust_buckets.clone();
16546 let rust_d = partial_sorting_scan_left_to_right_32s_6k_omp(
16547 &t,
16548 &mut rust_sa,
16549 5,
16550 &mut rust_buckets,
16551 2,
16552 0,
16553 1,
16554 &mut state,
16555 );
16556 let c_d = unsafe {
16557 probe_libsais16x64_partial_sorting_scan_left_to_right_32s_6k_omp(
16558 t.as_ptr(),
16559 c_sa.as_mut_ptr(),
16560 5,
16561 c_buckets.as_mut_ptr(),
16562 2,
16563 0,
16564 1,
16565 )
16566 };
16567 assert_eq!(rust_d, c_d);
16568 assert_eq!(rust_sa, c_sa);
16569 assert_eq!(rust_buckets, c_buckets);
16570
16571 let mut state = alloc_thread_state(1).unwrap();
16572 let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 7, 9];
16573 let mut c_sa = rust_sa.clone();
16574 let mut rust_buckets = vec![0; 4 * k as usize];
16575 rust_buckets[2 * k as usize + 2] = 4;
16576 rust_buckets[2 * k as usize + 1] = 5;
16577 rust_buckets[2 * k as usize + 3] = 6;
16578 let mut c_buckets = rust_buckets.clone();
16579 let rust_d = partial_sorting_scan_left_to_right_32s_4k_omp(
16580 &t,
16581 &mut rust_sa,
16582 5,
16583 k,
16584 &mut rust_buckets,
16585 0,
16586 1,
16587 &mut state,
16588 );
16589 let c_d = unsafe {
16590 probe_libsais16x64_partial_sorting_scan_left_to_right_32s_4k_omp(
16591 t.as_ptr(),
16592 c_sa.as_mut_ptr(),
16593 5,
16594 k,
16595 c_buckets.as_mut_ptr(),
16596 0,
16597 1,
16598 )
16599 };
16600 assert_eq!(rust_d, c_d);
16601 assert_eq!(rust_sa, c_sa);
16602 assert_eq!(rust_buckets, c_buckets);
16603
16604 let mut state = alloc_thread_state(1).unwrap();
16605 let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 7, 9];
16606 let mut c_sa = rust_sa.clone();
16607 let mut rust_buckets = vec![0, 5, 4, 6];
16608 let mut c_buckets = rust_buckets.clone();
16609 partial_sorting_scan_left_to_right_32s_1k_omp(
16610 &t,
16611 &mut rust_sa,
16612 5,
16613 &mut rust_buckets,
16614 1,
16615 &mut state,
16616 );
16617 unsafe {
16618 probe_libsais16x64_partial_sorting_scan_left_to_right_32s_1k_omp(
16619 t.as_ptr(),
16620 c_sa.as_mut_ptr(),
16621 5,
16622 c_buckets.as_mut_ptr(),
16623 1,
16624 );
16625 }
16626 assert_eq!(rust_sa, c_sa);
16627 assert_eq!(rust_buckets, c_buckets);
16628
16629 let mut state = alloc_thread_state(1).unwrap();
16630 let mut rust_sa = vec![0, 0, 3, 4, 9, 9, 9, 9];
16631 let mut c_sa = rust_sa.clone();
16632 let mut rust_buckets = vec![0; 6 * k as usize];
16633 rust_buckets[buckets_index4(2, 0)] = 7;
16634 rust_buckets[buckets_index4(1, 1)] = 6;
16635 let mut c_buckets = rust_buckets.clone();
16636 let rust_d = partial_sorting_scan_right_to_left_32s_6k_omp(
16637 &t,
16638 &mut rust_sa,
16639 5,
16640 &mut rust_buckets,
16641 1,
16642 1,
16643 0,
16644 1,
16645 &mut state,
16646 );
16647 let c_d = unsafe {
16648 probe_libsais16x64_partial_sorting_scan_right_to_left_32s_6k_omp(
16649 t.as_ptr(),
16650 c_sa.as_mut_ptr(),
16651 5,
16652 c_buckets.as_mut_ptr(),
16653 1,
16654 1,
16655 0,
16656 1,
16657 )
16658 };
16659 assert_eq!(rust_d, c_d);
16660 assert_eq!(rust_sa, c_sa);
16661 assert_eq!(rust_buckets, c_buckets);
16662
16663 let mut state = alloc_thread_state(1).unwrap();
16664 let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 9, 9];
16665 let mut c_sa = rust_sa.clone();
16666 let mut rust_buckets = vec![0; 4 * k as usize];
16667 rust_buckets[3 * k as usize + 2] = 7;
16668 rust_buckets[3 * k as usize + 1] = 6;
16669 let mut c_buckets = rust_buckets.clone();
16670 let rust_d = partial_sorting_scan_right_to_left_32s_4k_omp(
16671 &t,
16672 &mut rust_sa,
16673 2,
16674 k,
16675 &mut rust_buckets,
16676 0,
16677 1,
16678 &mut state,
16679 );
16680 let c_d = unsafe {
16681 probe_libsais16x64_partial_sorting_scan_right_to_left_32s_4k_omp(
16682 t.as_ptr(),
16683 c_sa.as_mut_ptr(),
16684 2,
16685 k,
16686 c_buckets.as_mut_ptr(),
16687 0,
16688 1,
16689 )
16690 };
16691 assert_eq!(rust_d, c_d);
16692 assert_eq!(rust_sa, c_sa);
16693 assert_eq!(rust_buckets, c_buckets);
16694
16695 let mut state = alloc_thread_state(1).unwrap();
16696 let mut rust_sa = vec![3, 4, 0, 0, 9, 9, 9, 9];
16697 let mut c_sa = rust_sa.clone();
16698 let mut rust_buckets = vec![0, 6, 7, 0];
16699 let mut c_buckets = rust_buckets.clone();
16700 partial_sorting_scan_right_to_left_32s_1k_omp(
16701 &t,
16702 &mut rust_sa,
16703 2,
16704 &mut rust_buckets,
16705 1,
16706 &mut state,
16707 );
16708 unsafe {
16709 probe_libsais16x64_partial_sorting_scan_right_to_left_32s_1k_omp(
16710 t.as_ptr(),
16711 c_sa.as_mut_ptr(),
16712 2,
16713 c_buckets.as_mut_ptr(),
16714 1,
16715 );
16716 }
16717 assert_eq!(rust_sa, c_sa);
16718 assert_eq!(rust_buckets, c_buckets);
16719 }
16720
16721 #[test]
16722 fn libsais16x64_place_lms_suffixes_histogram_32s_match_c() {
16723 let n = 12;
16724 let k = 4;
16725 let m = 4;
16726 let mut rust_sa = vec![101, 102, 103, 104, 9, 9, 9, 9, 9, 9, 9, 9];
16727 let mut c_sa = rust_sa.clone();
16728 let mut buckets = vec![0; 2 * k as usize];
16729 buckets[buckets_index2(1, 0)] = 7;
16730 buckets[buckets_index2(1, 1)] = 2;
16731 buckets[buckets_index2(2, 0)] = 10;
16732 buckets[buckets_index2(2, 1)] = 1;
16733 place_lms_suffixes_histogram_32s_2k(&mut rust_sa, n, k, m, &buckets);
16734 unsafe {
16735 probe_libsais16x64_place_lms_suffixes_histogram_32s_2k(
16736 c_sa.as_mut_ptr(),
16737 n,
16738 k,
16739 m,
16740 buckets.as_ptr(),
16741 );
16742 }
16743 assert_eq!(rust_sa, c_sa);
16744
16745 let mut rust_sa = vec![101, 102, 103, 104, 9, 9, 9, 9, 9, 9, 9, 9];
16746 let mut c_sa = rust_sa.clone();
16747 let mut buckets = vec![0; 4 * k as usize];
16748 buckets[buckets_index2(1, 1)] = 2;
16749 buckets[buckets_index2(2, 1)] = 1;
16750 buckets[3 * k as usize + 1] = 7;
16751 buckets[3 * k as usize + 2] = 10;
16752 place_lms_suffixes_histogram_32s_4k(&mut rust_sa, n, k, m, &buckets);
16753 unsafe {
16754 probe_libsais16x64_place_lms_suffixes_histogram_32s_4k(
16755 c_sa.as_mut_ptr(),
16756 n,
16757 k,
16758 m,
16759 buckets.as_ptr(),
16760 );
16761 }
16762 assert_eq!(rust_sa, c_sa);
16763
16764 let mut rust_sa = vec![101, 102, 103, 104, 9, 9, 9, 9, 9, 9, 9, 9];
16765 let mut c_sa = rust_sa.clone();
16766 let mut buckets = vec![0; 6 * k as usize];
16767 buckets[buckets_index4(1, 1)] = 2;
16768 buckets[buckets_index4(2, 1)] = 1;
16769 buckets[5 * k as usize + 1] = 7;
16770 buckets[5 * k as usize + 2] = 10;
16771 place_lms_suffixes_histogram_32s_6k(&mut rust_sa, n, k, m, &buckets);
16772 unsafe {
16773 probe_libsais16x64_place_lms_suffixes_histogram_32s_6k(
16774 c_sa.as_mut_ptr(),
16775 n,
16776 k,
16777 m,
16778 buckets.as_ptr(),
16779 );
16780 }
16781 assert_eq!(rust_sa, c_sa);
16782 }
16783
16784 #[test]
16785 fn libsais16x64_count_gather_lms_suffixes_32s_match_c() {
16786 let t = vec![2, 1, 3, 1, 2, 0, 1, 0];
16787 let n = t.len() as SaSint;
16788 let k = 4;
16789
16790 let mut rust_sa = vec![0; t.len()];
16791 let mut c_sa = rust_sa.clone();
16792 let rust_m = gather_lms_suffixes_32s(&t, &mut rust_sa, n);
16793 let c_m =
16794 unsafe { probe_libsais16x64_gather_lms_suffixes_32s(t.as_ptr(), c_sa.as_mut_ptr(), n) };
16795 assert_eq!(rust_m, c_m);
16796 assert_eq!(rust_sa, c_sa);
16797
16798 let compact_t = vec![2, SAINT_MIN | 1, 3, 1, SAINT_MIN | 2, 0, 1, 0];
16799 let mut rust_sa = vec![0; compact_t.len()];
16800 let mut c_sa = rust_sa.clone();
16801 let rust_m = gather_compacted_lms_suffixes_32s(&compact_t, &mut rust_sa, n);
16802 let c_m = unsafe {
16803 probe_libsais16x64_gather_compacted_lms_suffixes_32s(
16804 compact_t.as_ptr(),
16805 c_sa.as_mut_ptr(),
16806 n,
16807 )
16808 };
16809 assert_eq!(rust_m, c_m);
16810 assert_eq!(rust_sa, c_sa);
16811
16812 let mut rust_buckets = vec![99; 2 * k as usize];
16813 let mut c_buckets = rust_buckets.clone();
16814 count_lms_suffixes_32s_2k(&t, n, k, &mut rust_buckets);
16815 unsafe {
16816 probe_libsais16x64_count_lms_suffixes_32s_2k(t.as_ptr(), n, k, c_buckets.as_mut_ptr());
16817 }
16818 assert_eq!(rust_buckets, c_buckets);
16819
16820 let mut rust_sa = vec![0; t.len()];
16821 let mut c_sa = rust_sa.clone();
16822 let mut rust_buckets = vec![0; 2 * k as usize];
16823 let mut c_buckets = rust_buckets.clone();
16824 let rust_m = count_and_gather_lms_suffixes_32s_2k(
16825 &t,
16826 &mut rust_sa,
16827 n,
16828 k,
16829 &mut rust_buckets,
16830 0,
16831 n as isize,
16832 );
16833 let c_m = unsafe {
16834 probe_libsais16x64_count_and_gather_lms_suffixes_32s_2k(
16835 t.as_ptr(),
16836 c_sa.as_mut_ptr(),
16837 n,
16838 k,
16839 c_buckets.as_mut_ptr(),
16840 0,
16841 n,
16842 )
16843 };
16844 assert_eq!(rust_m, c_m);
16845 assert_eq!(rust_sa, c_sa);
16846 assert_eq!(rust_buckets, c_buckets);
16847
16848 let mut rust_sa = vec![0; compact_t.len()];
16849 let mut c_sa = rust_sa.clone();
16850 let mut rust_buckets = vec![0; 2 * k as usize];
16851 let mut c_buckets = rust_buckets.clone();
16852 let rust_m = count_and_gather_compacted_lms_suffixes_32s_2k(
16853 &compact_t,
16854 &mut rust_sa,
16855 n,
16856 k,
16857 &mut rust_buckets,
16858 0,
16859 n as isize,
16860 );
16861 let c_m = unsafe {
16862 probe_libsais16x64_count_and_gather_compacted_lms_suffixes_32s_2k(
16863 compact_t.as_ptr(),
16864 c_sa.as_mut_ptr(),
16865 n,
16866 k,
16867 c_buckets.as_mut_ptr(),
16868 0,
16869 n,
16870 )
16871 };
16872 assert_eq!(rust_m, c_m);
16873 assert_eq!(rust_sa, c_sa);
16874 assert_eq!(rust_buckets, c_buckets);
16875 }
16876
16877 #[test]
16878 fn libsais16x64_small_openmp_leaf_helpers_match_upstream_shapes() {
16879 let sa = [-1, 0, 3, SAINT_MIN, 0, 7, -5];
16880 assert_eq!(count_negative_marked_suffixes(&sa, 1, 5), 1);
16881 assert_eq!(count_zero_marked_suffixes(&sa, 1, 5), 2);
16882
16883 let mut buckets = vec![1, 2, 3, 0, 4, 5, 6, 0, 7, 8, 9, 0, 10, 11, 12, 0];
16884 accumulate_counts_s32_4(&mut buckets, 12, 3, 4);
16885 assert_eq!(&buckets[12..15], &[22, 26, 30]);
16886
16887 let mut many = Vec::new();
16888 for bucket in 0..10 {
16889 many.extend([bucket, bucket + 1, bucket + 2, 0]);
16890 }
16891 accumulate_counts_s32(&mut many, 36, 3, 4, 10);
16892 assert_eq!(&many[36..39], &[45, 55, 65]);
16893
16894 let t = [1, SAINT_MIN | 2, 0];
16895 let mut compacted_buckets = vec![0; 6];
16896 count_compacted_lms_suffixes_32s_2k(&t, t.len() as SaSint, 3, &mut compacted_buckets);
16897 assert_eq!(compacted_buckets, vec![1, 0, 1, 0, 0, 1]);
16898
16899 let unique_sa = [0, 2, 4, 6, 0, -10, 20, -30];
16900 assert_eq!(count_unique_suffixes(&unique_sa, 4, 0, 4), 2);
16901
16902 let s = [10u32, 11, 12, 13];
16903 let mut d = [0u64; 4];
16904 convert_32u_to_64u(&s, &mut d, 1, 2);
16905 assert_eq!(d, [0, 11, 12, 0]);
16906
16907 let mut words = [10u32, 11, 12, 13, 99, 99, 99, 99];
16908 convert_inplace_32u_to_64u(&mut words, 0, 4);
16909 assert_eq!(words, [10, 0, 11, 0, 12, 0, 13, 0]);
16910 convert_inplace_64u_to_32u(&mut words, 0, 4);
16911 assert_eq!(&words[..4], &[10, 11, 12, 13]);
16912
16913 let mut words = [20u32, 21, 22, 23, 99, 99, 99, 99];
16914 convert_inplace_32u_to_64u_omp(&mut words, 4, 2);
16915 assert_eq!(words, [20, 0, 21, 0, 22, 0, 23, 0]);
16916
16917 assert_eq!(get_bucket_stride(20_000, 1000, 4), 1024);
16918 assert_eq!(get_bucket_stride(3024, 1001, 4), 1008);
16919 assert_eq!(get_bucket_stride(3000, 1001, 4), 1001);
16920 }
16921
16922 #[test]
16923 fn libsais16x64_count_gather_lms_suffixes_32s_omp_wrappers_match_c() {
16924 let t = vec![2, 1, 3, 1, 2, 0, 1, 0];
16925 let n = t.len() as SaSint;
16926 let k = 4;
16927 let mut rust_sa = vec![0; t.len()];
16928 let mut c_sa = rust_sa.clone();
16929 let mut rust_buckets = vec![0; 2 * k as usize];
16930 let mut c_buckets = rust_buckets.clone();
16931 let mut rust_state = alloc_thread_state(1).unwrap();
16932 let rust_m = count_and_gather_lms_suffixes_32s_2k_omp(
16933 &t,
16934 &mut rust_sa,
16935 n,
16936 k,
16937 &mut rust_buckets,
16938 0,
16939 1,
16940 &mut rust_state,
16941 );
16942 let c_m = unsafe {
16943 probe_libsais16x64_count_and_gather_lms_suffixes_32s_2k_omp(
16944 t.as_ptr(),
16945 c_sa.as_mut_ptr(),
16946 n,
16947 k,
16948 c_buckets.as_mut_ptr(),
16949 0,
16950 1,
16951 )
16952 };
16953 assert_eq!(rust_m, c_m);
16954 assert_eq!(rust_sa, c_sa);
16955 assert_eq!(rust_buckets, c_buckets);
16956
16957 let compact_t = vec![2, SAINT_MIN | 1, 3, 1, SAINT_MIN | 2, 0, 1, 0];
16958 let mut rust_sa = vec![0; compact_t.len()];
16959 let mut c_sa = rust_sa.clone();
16960 let mut rust_buckets = vec![0; 2 * k as usize];
16961 let mut c_buckets = rust_buckets.clone();
16962 let mut rust_state = alloc_thread_state(1).unwrap();
16963 count_and_gather_compacted_lms_suffixes_32s_2k_omp(
16964 &compact_t,
16965 &mut rust_sa,
16966 n,
16967 k,
16968 &mut rust_buckets,
16969 0,
16970 1,
16971 &mut rust_state,
16972 );
16973 unsafe {
16974 probe_libsais16x64_count_and_gather_compacted_lms_suffixes_32s_2k_omp(
16975 compact_t.as_ptr(),
16976 c_sa.as_mut_ptr(),
16977 n,
16978 k,
16979 c_buckets.as_mut_ptr(),
16980 0,
16981 1,
16982 );
16983 }
16984 assert_eq!(rust_sa, c_sa);
16985 assert_eq!(rust_buckets, c_buckets);
16986 }
16987
16988 #[test]
16989 fn libsais16x64_count_gather_lms_suffixes_32s_4k_match_c() {
16990 let t = vec![2, 1, 3, 1, 2, 0, 1, 0];
16991 let n = t.len() as SaSint;
16992 let k = 4;
16993
16994 let mut rust_buckets = vec![77; 4 * k as usize];
16995 let mut c_buckets = vec![0; 4 * k as usize];
16996 let mut c_sa_for_count = vec![0; t.len()];
16997 count_lms_suffixes_32s_4k(&t, n, k, &mut rust_buckets);
16998 unsafe {
16999 probe_libsais16x64_count_and_gather_lms_suffixes_32s_4k(
17000 t.as_ptr(),
17001 c_sa_for_count.as_mut_ptr(),
17002 n,
17003 k,
17004 c_buckets.as_mut_ptr(),
17005 0,
17006 n,
17007 );
17008 }
17009 assert_eq!(rust_buckets, c_buckets);
17010
17011 let mut rust_sa = vec![0; t.len()];
17012 let mut c_sa = rust_sa.clone();
17013 let mut rust_buckets = vec![0; 4 * k as usize];
17014 let mut c_buckets = rust_buckets.clone();
17015 let rust_m = count_and_gather_lms_suffixes_32s_4k(
17016 &t,
17017 &mut rust_sa,
17018 n,
17019 k,
17020 &mut rust_buckets,
17021 0,
17022 n as isize,
17023 );
17024 let c_m = unsafe {
17025 probe_libsais16x64_count_and_gather_lms_suffixes_32s_4k(
17026 t.as_ptr(),
17027 c_sa.as_mut_ptr(),
17028 n,
17029 k,
17030 c_buckets.as_mut_ptr(),
17031 0,
17032 n,
17033 )
17034 };
17035 assert_eq!(rust_m, c_m);
17036 assert_eq!(rust_sa, c_sa);
17037 assert_eq!(rust_buckets, c_buckets);
17038
17039 let mut rust_sa = vec![0; t.len()];
17040 let mut c_sa = rust_sa.clone();
17041 let mut rust_buckets = vec![0; 4 * k as usize];
17042 let mut c_buckets = rust_buckets.clone();
17043 let mut rust_state = alloc_thread_state(1).unwrap();
17044 let rust_m = count_and_gather_lms_suffixes_32s_4k_omp(
17045 &t,
17046 &mut rust_sa,
17047 n,
17048 k,
17049 &mut rust_buckets,
17050 0,
17051 1,
17052 &mut rust_state,
17053 );
17054 let c_m = unsafe {
17055 probe_libsais16x64_count_and_gather_lms_suffixes_32s_4k_omp(
17056 t.as_ptr(),
17057 c_sa.as_mut_ptr(),
17058 n,
17059 k,
17060 c_buckets.as_mut_ptr(),
17061 0,
17062 1,
17063 )
17064 };
17065 assert_eq!(rust_m, c_m);
17066 assert_eq!(rust_sa, c_sa);
17067 assert_eq!(rust_buckets, c_buckets);
17068
17069 let mut rust_buckets = vec![91; k as usize];
17070 let mut c_buckets = rust_buckets.clone();
17071 count_suffixes_32s(&t, n, k, &mut rust_buckets);
17072 unsafe {
17073 probe_libsais16x64_count_suffixes_32s(t.as_ptr(), n, k, c_buckets.as_mut_ptr());
17074 }
17075 assert_eq!(rust_buckets, c_buckets);
17076 }
17077
17078 #[test]
17079 fn libsais16x64_initialize_buckets_32s_match_c() {
17080 let k = 4;
17081
17082 let base_6k = vec![
17083 1, 2, 0, 1, 0, 1, 2, 0, 3, 0, 1, 1, 2, 1, 0, 0, 9, 9, 9, 9, 8, 8, 8, 8,
17084 ];
17085 let mut rust = base_6k.clone();
17086 let mut c = base_6k.clone();
17087 initialize_buckets_start_and_end_32s_6k(k, &mut rust);
17088 unsafe { probe_libsais16x64_initialize_buckets_start_and_end_32s_6k(k, c.as_mut_ptr()) };
17089 assert_eq!(rust, c);
17090
17091 let base_4k = vec![1, 2, 0, 1, 3, 0, 2, 1, 9, 9, 9, 9, 8, 8, 8, 8];
17092 let mut rust = base_4k.clone();
17093 let mut c = base_4k.clone();
17094 initialize_buckets_start_and_end_32s_4k(k, &mut rust);
17095 unsafe { probe_libsais16x64_initialize_buckets_start_and_end_32s_4k(k, c.as_mut_ptr()) };
17096 assert_eq!(rust, c);
17097
17098 let base_2k = vec![1, 2, 0, 1, 3, 0, 2, 1];
17099 let mut rust = base_2k.clone();
17100 let mut c = base_2k.clone();
17101 initialize_buckets_end_32s_2k(k, &mut rust);
17102 unsafe { probe_libsais16x64_initialize_buckets_end_32s_2k(k, c.as_mut_ptr()) };
17103 assert_eq!(rust, c);
17104
17105 let mut rust = base_2k.clone();
17106 let mut c = base_2k.clone();
17107 initialize_buckets_start_and_end_32s_2k(k, &mut rust);
17108 unsafe { probe_libsais16x64_initialize_buckets_start_and_end_32s_2k(k, c.as_mut_ptr()) };
17109 assert_eq!(rust, c);
17110
17111 let base_1k = vec![2, 1, 3, 2];
17112 let mut rust = base_1k.clone();
17113 let mut c = base_1k.clone();
17114 initialize_buckets_start_32s_1k(k, &mut rust);
17115 unsafe { probe_libsais16x64_initialize_buckets_start_32s_1k(k, c.as_mut_ptr()) };
17116 assert_eq!(rust, c);
17117
17118 let mut rust = base_1k.clone();
17119 let mut c = base_1k.clone();
17120 initialize_buckets_end_32s_1k(k, &mut rust);
17121 unsafe { probe_libsais16x64_initialize_buckets_end_32s_1k(k, c.as_mut_ptr()) };
17122 assert_eq!(rust, c);
17123
17124 let t = vec![2, 1, 3, 1, 2, 0, 1, 0];
17125 let mut rust = vec![1, 2, 0, 1, 3, 0, 2, 1];
17126 let mut c = rust.clone();
17127 initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(&t, k, &mut rust, 4);
17128 unsafe {
17129 probe_libsais16x64_initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(
17130 t.as_ptr(),
17131 k,
17132 c.as_mut_ptr(),
17133 4,
17134 );
17135 }
17136 assert_eq!(rust, c);
17137
17138 let mut rust = vec![
17139 1, 2, 0, 1, 3, 0, 2, 1, 1, 0, 2, 0, 0, 1, 1, 0, 9, 9, 9, 9, 8, 8, 8, 8,
17140 ];
17141 let mut c = rust.clone();
17142 let rust_sum = initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(&t, k, &mut rust, 4);
17143 let c_sum = unsafe {
17144 probe_libsais16x64_initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(
17145 t.as_ptr(),
17146 k,
17147 c.as_mut_ptr(),
17148 4,
17149 )
17150 };
17151 assert_eq!(rust_sum, c_sum);
17152 assert_eq!(rust, c);
17153
17154 let mut rust = base_4k.clone();
17155 let mut c = base_4k;
17156 initialize_buckets_for_radix_and_partial_sorting_32s_4k(&t, k, &mut rust, 4);
17157 unsafe {
17158 probe_libsais16x64_initialize_buckets_for_radix_and_partial_sorting_32s_4k(
17159 t.as_ptr(),
17160 k,
17161 c.as_mut_ptr(),
17162 4,
17163 );
17164 }
17165 assert_eq!(rust, c);
17166 }
17167
17168 #[test]
17169 fn libsais16x64_place_lms_suffixes_interval_32s_match_c() {
17170 let n = 12;
17171 let k = 4;
17172 let m = 4;
17173
17174 let mut rust_sa = vec![101, 102, 103, 104, 9, 9, 9, 9, 9, 9, 9, 9];
17175 let mut c_sa = rust_sa.clone();
17176 let mut buckets = vec![0; 4 * k as usize];
17177 buckets[buckets_index2(0, 1)] = 2;
17178 buckets[buckets_index2(1, 1)] = 2;
17179 buckets[buckets_index2(2, 1)] = 3;
17180 buckets[buckets_index2(2, 1) + buckets_index2(1, 0)] = 4;
17181 buckets[3 * k as usize + 1] = 7;
17182 buckets[3 * k as usize + 2] = 10;
17183 place_lms_suffixes_interval_32s_4k(&mut rust_sa, n, k, m, &buckets);
17184 unsafe {
17185 probe_libsais16x64_place_lms_suffixes_interval_32s_4k(
17186 c_sa.as_mut_ptr(),
17187 n,
17188 k,
17189 m,
17190 buckets.as_ptr(),
17191 );
17192 }
17193 assert_eq!(rust_sa, c_sa);
17194
17195 let mut rust_sa = vec![101, 102, 103, 104, 9, 9, 9, 9, 9, 9, 9, 9];
17196 let mut c_sa = rust_sa.clone();
17197 let mut buckets = vec![0; 2 * k as usize];
17198 buckets[buckets_index2(1, 0)] = 7;
17199 buckets[buckets_index2(0, 1)] = 1;
17200 buckets[buckets_index2(1, 1)] = 1;
17201 buckets[buckets_index2(2, 0)] = 10;
17202 buckets[buckets_index2(2, 1)] = 2;
17203 buckets[buckets_index2(3, 1)] = 3;
17204 place_lms_suffixes_interval_32s_2k(&mut rust_sa, n, k, m, &buckets);
17205 unsafe {
17206 probe_libsais16x64_place_lms_suffixes_interval_32s_2k(
17207 c_sa.as_mut_ptr(),
17208 n,
17209 k,
17210 m,
17211 buckets.as_ptr(),
17212 );
17213 }
17214 assert_eq!(rust_sa, c_sa);
17215
17216 let t = vec![0, 1, 2, 1, 2, 3, 1, 3, 0, 0, 0, 0];
17217 let mut rust_sa = vec![1, 3, 4, 7, 9, 9, 9, 9, 9, 9, 9, 9];
17218 let mut c_sa = rust_sa.clone();
17219 let rust_buckets = vec![0, 3, 6, 10];
17220 let mut c_buckets = rust_buckets.clone();
17221 place_lms_suffixes_interval_32s_1k(&t, &mut rust_sa, k, m, &rust_buckets);
17222 unsafe {
17223 probe_libsais16x64_place_lms_suffixes_interval_32s_1k(
17224 t.as_ptr(),
17225 c_sa.as_mut_ptr(),
17226 k,
17227 m,
17228 c_buckets.as_mut_ptr(),
17229 );
17230 }
17231 assert_eq!(rust_sa, c_sa);
17232 assert_eq!(rust_buckets, c_buckets);
17233 }
17234
17235 #[test]
17236 fn libsais16x64_renumber_and_mark_distinct_lms_suffixes_32s_1k_matches_c() {
17237 let rust_t = vec![2, 1, 3, 1, 2, 0, 1, 0];
17238 let n = rust_t.len() as SaSint;
17239 let mut probe_sa = vec![0; rust_t.len()];
17240 let m = gather_lms_suffixes_32s(&rust_t, &mut probe_sa, n);
17241 let mut rust_sa = vec![0; rust_t.len()];
17242 let mut c_t = rust_t.clone();
17243 let mut c_sa = rust_sa.clone();
17244
17245 let rust_name =
17246 renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(&rust_t, &mut rust_sa, n, m, 1);
17247 let c_name = unsafe {
17248 probe_libsais16x64_renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(
17249 c_t.as_mut_ptr(),
17250 c_sa.as_mut_ptr(),
17251 n,
17252 m,
17253 1,
17254 )
17255 };
17256 assert_eq!(rust_name, c_name);
17257 assert_eq!(rust_t, c_t);
17258 assert_eq!(rust_sa, c_sa);
17259 }
17260
17261 #[test]
17262 fn libsais16x64_reconstruct_compacted_lms_suffixes_32s_match_c() {
17263 let n = 8;
17264 let k = 4;
17265 let fs = 0;
17266 let f = 0;
17267 let mut m_probe_sa = vec![0; n as usize];
17268 let m = gather_lms_suffixes_32s(&[2, 1, 3, 1, 2, 0, 1, 0], &mut m_probe_sa, n);
17269
17270 let mut rust_t = vec![2, 1, 3, 1, 2, 0, 1, 0];
17271 let mut c_t = rust_t.clone();
17272 let mut rust_sa = vec![0; n as usize];
17273 let mut c_sa = rust_sa.clone();
17274 let mut rust_buckets = vec![0; 2 * k as usize];
17275 let mut c_buckets = rust_buckets.clone();
17276 let mut rust_thread_state = alloc_thread_state(1).unwrap();
17277 reconstruct_compacted_lms_suffixes_32s_2k_omp(
17278 &mut rust_t,
17279 &mut rust_sa,
17280 n,
17281 k,
17282 m,
17283 fs,
17284 f,
17285 &mut rust_buckets,
17286 0,
17287 1,
17288 &mut rust_thread_state,
17289 );
17290 unsafe {
17291 probe_libsais16x64_reconstruct_compacted_lms_suffixes_32s_2k_omp(
17292 c_t.as_mut_ptr(),
17293 c_sa.as_mut_ptr(),
17294 n,
17295 k,
17296 m,
17297 fs,
17298 f,
17299 c_buckets.as_mut_ptr(),
17300 0,
17301 1,
17302 );
17303 }
17304 assert_eq!(rust_t, c_t);
17305 assert_eq!(rust_sa, c_sa);
17306 assert_eq!(rust_buckets, c_buckets);
17307
17308 let mut rust_t = vec![2, 1, 3, 1, 2, 0, 1, 0];
17309 let mut c_t = rust_t.clone();
17310 let mut rust_sa = vec![0; n as usize];
17311 let mut c_sa = rust_sa.clone();
17312 reconstruct_compacted_lms_suffixes_32s_1k_omp(&mut rust_t, &mut rust_sa, n, m, fs, f, 1);
17313 unsafe {
17314 probe_libsais16x64_reconstruct_compacted_lms_suffixes_32s_1k_omp(
17315 c_t.as_mut_ptr(),
17316 c_sa.as_mut_ptr(),
17317 n,
17318 m,
17319 fs,
17320 f,
17321 1,
17322 );
17323 }
17324 assert_eq!(rust_t, c_t);
17325 assert_eq!(rust_sa, c_sa);
17326 }
17327
17328 #[test]
17329 fn libsais16x64_partial_omp_wrappers_match_c() {
17330 let (text, mut rust_sa, mut rust_buckets) = partial_scan_fixture();
17331 let mut c_sa = rust_sa.clone();
17332 let mut c_buckets = rust_buckets.clone();
17333
17334 let rust_d = partial_sorting_scan_left_to_right_16u_omp(
17335 &text,
17336 &mut rust_sa,
17337 text.len() as SaSint,
17338 8,
17339 &mut rust_buckets,
17340 5,
17341 3,
17342 1,
17343 );
17344 let c_d = unsafe {
17345 probe_libsais16x64_partial_sorting_scan_left_to_right_16u_omp(
17346 text.as_ptr(),
17347 c_sa.as_mut_ptr(),
17348 text.len() as SaSint,
17349 8,
17350 c_buckets.as_mut_ptr(),
17351 5,
17352 3,
17353 1,
17354 )
17355 };
17356 assert_eq!(rust_d, c_d);
17357 assert_eq!(rust_sa, c_sa);
17358 assert_eq!(rust_buckets, c_buckets);
17359
17360 let (text, mut rust_sa, mut rust_buckets) = partial_scan_fixture();
17361 rust_sa[6..10].copy_from_slice(&[3, 5 | SAINT_MIN, 7, 9 | SAINT_MIN]);
17362 let mut c_sa = rust_sa.clone();
17363 let mut c_buckets = rust_buckets.clone();
17364 partial_sorting_scan_right_to_left_16u_omp(
17365 &text,
17366 &mut rust_sa,
17367 text.len() as SaSint,
17368 8,
17369 &mut rust_buckets,
17370 0,
17371 5,
17372 3,
17373 1,
17374 );
17375 unsafe {
17376 probe_libsais16x64_partial_sorting_scan_right_to_left_16u_omp(
17377 text.as_ptr(),
17378 c_sa.as_mut_ptr(),
17379 text.len() as SaSint,
17380 8,
17381 c_buckets.as_mut_ptr(),
17382 0,
17383 5,
17384 3,
17385 1,
17386 );
17387 }
17388 assert_eq!(rust_sa, c_sa);
17389 assert_eq!(rust_buckets, c_buckets);
17390
17391 let (text, mut rust_sa, mut rust_buckets) = partial_scan_fixture();
17392 rust_sa[6..10].copy_from_slice(&[3, 5 | SAINT_MIN, 7, 9 | SAINT_MIN]);
17393 let mut c_sa = rust_sa.clone();
17394 let mut c_buckets = rust_buckets.clone();
17395 partial_gsa_scan_right_to_left_16u_omp(
17396 &text,
17397 &mut rust_sa,
17398 text.len() as SaSint,
17399 8,
17400 &mut rust_buckets,
17401 0,
17402 5,
17403 3,
17404 1,
17405 );
17406 unsafe {
17407 probe_libsais16x64_partial_gsa_scan_right_to_left_16u_omp(
17408 text.as_ptr(),
17409 c_sa.as_mut_ptr(),
17410 text.len() as SaSint,
17411 8,
17412 c_buckets.as_mut_ptr(),
17413 0,
17414 5,
17415 3,
17416 1,
17417 );
17418 }
17419 assert_eq!(rust_sa, c_sa);
17420 assert_eq!(rust_buckets, c_buckets);
17421 }
17422
17423 #[test]
17424 fn libsais16x64_final_omp_wrappers_match_c() {
17425 let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
17426 let mut c_sa = rust_sa.clone();
17427 let mut c_bucket = rust_bucket.clone();
17428 final_bwt_scan_left_to_right_16u_omp(
17429 &text,
17430 &mut rust_sa,
17431 text.len() as SaSint,
17432 8,
17433 &mut rust_bucket,
17434 1,
17435 );
17436 unsafe {
17437 probe_libsais16x64_final_bwt_scan_left_to_right_16u_omp(
17438 text.as_ptr(),
17439 c_sa.as_mut_ptr(),
17440 text.len() as SaSint,
17441 8,
17442 c_bucket.as_mut_ptr(),
17443 1,
17444 );
17445 }
17446 assert_eq!(rust_sa, c_sa);
17447 assert_eq!(rust_bucket, c_bucket);
17448
17449 let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
17450 let mut c_sa = rust_sa.clone();
17451 let mut c_bucket = rust_bucket.clone();
17452 let mut rust_i = vec![-1; 8];
17453 let mut c_i = rust_i.clone();
17454 final_bwt_aux_scan_left_to_right_16u_omp(
17455 &text,
17456 &mut rust_sa,
17457 text.len() as SaSint,
17458 8,
17459 1,
17460 &mut rust_i,
17461 &mut rust_bucket,
17462 1,
17463 );
17464 unsafe {
17465 probe_libsais16x64_final_bwt_aux_scan_left_to_right_16u_omp(
17466 text.as_ptr(),
17467 c_sa.as_mut_ptr(),
17468 text.len() as SaSint,
17469 8,
17470 1,
17471 c_i.as_mut_ptr(),
17472 c_bucket.as_mut_ptr(),
17473 1,
17474 );
17475 }
17476 assert_eq!(rust_sa, c_sa);
17477 assert_eq!(rust_bucket, c_bucket);
17478 assert_eq!(rust_i, c_i);
17479
17480 let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
17481 let mut c_sa = rust_sa.clone();
17482 let mut c_bucket = rust_bucket.clone();
17483 final_sorting_scan_left_to_right_16u_omp(
17484 &text,
17485 &mut rust_sa,
17486 text.len() as SaSint,
17487 8,
17488 &mut rust_bucket,
17489 1,
17490 );
17491 unsafe {
17492 probe_libsais16x64_final_sorting_scan_left_to_right_16u_omp(
17493 text.as_ptr(),
17494 c_sa.as_mut_ptr(),
17495 text.len() as SaSint,
17496 8,
17497 c_bucket.as_mut_ptr(),
17498 1,
17499 );
17500 }
17501 assert_eq!(rust_sa, c_sa);
17502 assert_eq!(rust_bucket, c_bucket);
17503
17504 let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
17505 let mut c_sa = rust_sa.clone();
17506 let mut c_bucket = rust_bucket.clone();
17507 let rust_index = final_bwt_scan_right_to_left_16u_omp(
17508 &text,
17509 &mut rust_sa,
17510 text.len() as SaSint,
17511 8,
17512 &mut rust_bucket,
17513 1,
17514 );
17515 let c_index = unsafe {
17516 probe_libsais16x64_final_bwt_scan_right_to_left_16u_omp(
17517 text.as_ptr(),
17518 c_sa.as_mut_ptr(),
17519 text.len() as SaSint,
17520 8,
17521 c_bucket.as_mut_ptr(),
17522 1,
17523 )
17524 };
17525 assert_eq!(rust_index, c_index);
17526 assert_eq!(rust_sa, c_sa);
17527 assert_eq!(rust_bucket, c_bucket);
17528
17529 let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
17530 let mut c_sa = rust_sa.clone();
17531 let mut c_bucket = rust_bucket.clone();
17532 let mut rust_i = vec![-1; 8];
17533 let mut c_i = rust_i.clone();
17534 final_bwt_aux_scan_right_to_left_16u_omp(
17535 &text,
17536 &mut rust_sa,
17537 text.len() as SaSint,
17538 8,
17539 1,
17540 &mut rust_i,
17541 &mut rust_bucket,
17542 1,
17543 );
17544 unsafe {
17545 probe_libsais16x64_final_bwt_aux_scan_right_to_left_16u_omp(
17546 text.as_ptr(),
17547 c_sa.as_mut_ptr(),
17548 text.len() as SaSint,
17549 8,
17550 1,
17551 c_i.as_mut_ptr(),
17552 c_bucket.as_mut_ptr(),
17553 1,
17554 );
17555 }
17556 assert_eq!(rust_sa, c_sa);
17557 assert_eq!(rust_bucket, c_bucket);
17558 assert_eq!(rust_i, c_i);
17559
17560 let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
17561 let mut c_sa = rust_sa.clone();
17562 let mut c_bucket = rust_bucket.clone();
17563 final_sorting_scan_right_to_left_16u_omp(&text, &mut rust_sa, 0, 6, 8, &mut rust_bucket, 1);
17564 unsafe {
17565 probe_libsais16x64_final_sorting_scan_right_to_left_16u_omp(
17566 text.as_ptr(),
17567 c_sa.as_mut_ptr(),
17568 0,
17569 6,
17570 8,
17571 c_bucket.as_mut_ptr(),
17572 1,
17573 );
17574 }
17575 assert_eq!(rust_sa, c_sa);
17576 assert_eq!(rust_bucket, c_bucket);
17577
17578 let (text, mut rust_sa, mut rust_bucket) = final_scan_fixture();
17579 let mut c_sa = rust_sa.clone();
17580 let mut c_bucket = rust_bucket.clone();
17581 final_gsa_scan_right_to_left_16u_omp(&text, &mut rust_sa, 0, 6, 8, &mut rust_bucket, 1);
17582 unsafe {
17583 probe_libsais16x64_final_gsa_scan_right_to_left_16u_omp(
17584 text.as_ptr(),
17585 c_sa.as_mut_ptr(),
17586 0,
17587 6,
17588 8,
17589 c_bucket.as_mut_ptr(),
17590 1,
17591 );
17592 }
17593 assert_eq!(rust_sa, c_sa);
17594 assert_eq!(rust_bucket, c_bucket);
17595 }
17596
17597 #[test]
17598 fn libsais16x64_matches_bruteforce() {
17599 let t = [3, 1, 4, 1, 5, 9, 0, 2];
17600 let mut sa = vec![0; t.len()];
17601 let mut freq = vec![0; ALPHABET_SIZE];
17602 assert_eq!(libsais16x64(&t, &mut sa, 0, Some(&mut freq)), 0);
17603 assert_eq!(sa, brute_sa(&t));
17604 assert_eq!(freq[1], 2);
17605 assert_eq!(freq[9], 1);
17606 }
17607
17608 #[test]
17609 fn libsais16x64_bwt_round_trips() {
17610 let t = [2, 1, 3, 1, 2, 4, 1, 0];
17611 let mut bwt = vec![0; t.len()];
17612 let mut work = vec![0; t.len()];
17613 let primary = libsais16x64_bwt(&t, &mut bwt, &mut work, 0, None);
17614 assert!(primary > 0);
17615
17616 let mut restored = vec![0; t.len()];
17617 assert_eq!(
17618 libsais16x64_unbwt(&bwt, &mut restored, &mut work, None, primary),
17619 0
17620 );
17621 assert_eq!(restored, t);
17622 }
17623
17624 #[test]
17625 fn libsais16x64_plcp_lcp_are_consistent() {
17626 let t = [2, 1, 2, 1, 0];
17627 let sa = brute_sa(&t);
17628 let mut plcp = vec![0; t.len()];
17629 let mut lcp = vec![0; t.len()];
17630 assert_eq!(libsais16x64_plcp(&t, &sa, &mut plcp), 0);
17631 assert_eq!(libsais16x64_lcp(&plcp, &sa, &mut lcp), 0);
17632 assert_eq!(lcp[0], 0);
17633
17634 let mut named_plcp = vec![0; t.len()];
17635 assert_eq!(
17636 compute_phi_omp(&sa, &mut named_plcp, t.len() as SaSint, 1),
17637 0
17638 );
17639 assert_eq!(
17640 compute_plcp_omp(&t, &mut named_plcp, t.len() as SaSint, 1),
17641 0
17642 );
17643 assert_eq!(named_plcp, plcp);
17644
17645 let mut named_lcp = vec![0; t.len()];
17646 assert_eq!(
17647 compute_lcp_omp(&named_plcp, &sa, &mut named_lcp, t.len() as SaSint, 1),
17648 0
17649 );
17650 assert_eq!(named_lcp, lcp);
17651
17652 let mut gsa_plcp = vec![0; t.len()];
17653 let mut named_gsa_plcp = vec![0; t.len()];
17654 assert_eq!(libsais16x64_plcp_gsa(&t, &sa, &mut gsa_plcp), 0);
17655 assert_eq!(
17656 compute_phi_omp(&sa, &mut named_gsa_plcp, t.len() as SaSint, 1),
17657 0
17658 );
17659 assert_eq!(
17660 compute_plcp_gsa_omp(&t, &mut named_gsa_plcp, t.len() as SaSint, 1),
17661 0
17662 );
17663 assert_eq!(named_gsa_plcp, gsa_plcp);
17664 }
17665
17666 #[test]
17667 fn libsais16x64_bwt_copy_16u_omp_uses_block_partition_for_large_inputs() {
17668 let n = 65_600usize;
17669 let a: Vec<SaSint> = (0..n).map(|i| (i * 17) as SaSint).collect();
17670 let mut threaded = vec![0; n];
17671 let mut sequential = vec![0; n];
17672
17673 bwt_copy_16u_omp(&mut threaded, &a, n as SaSint, 4);
17674 bwt_copy_16u(&mut sequential, &a, n as SaSint);
17675
17676 assert_eq!(threaded, sequential);
17677 }
17678
17679 #[test]
17680 fn libsais16x64_plcp_lcp_omp_wrappers_match_single_thread_on_large_inputs() {
17681 let n = 65_600usize;
17682 let text: Vec<u16> = (0..n).map(|i| 1 + (i % 251) as u16).collect();
17683 let sa: Vec<SaSint> = (0..n as SaSint).collect();
17684
17685 let mut plcp_single = vec![0; n];
17686 let mut plcp_threaded = vec![0; n];
17687 assert_eq!(compute_phi_omp(&sa, &mut plcp_single, n as SaSint, 1), 0);
17688 assert_eq!(compute_phi_omp(&sa, &mut plcp_threaded, n as SaSint, 4), 0);
17689 assert_eq!(plcp_threaded, plcp_single);
17690
17691 assert_eq!(compute_plcp_omp(&text, &mut plcp_single, n as SaSint, 1), 0);
17692 assert_eq!(
17693 compute_plcp_omp(&text, &mut plcp_threaded, n as SaSint, 4),
17694 0
17695 );
17696 assert_eq!(plcp_threaded, plcp_single);
17697
17698 let mut lcp_single = vec![0; n];
17699 let mut lcp_threaded = vec![0; n];
17700 assert_eq!(
17701 compute_lcp_omp(&plcp_single, &sa, &mut lcp_single, n as SaSint, 1),
17702 0
17703 );
17704 assert_eq!(
17705 compute_lcp_omp(&plcp_threaded, &sa, &mut lcp_threaded, n as SaSint, 4),
17706 0
17707 );
17708 assert_eq!(lcp_threaded, lcp_single);
17709 }
17710
17711 #[test]
17712 fn libsais16x64_context_allocates_upstream_shaped_buffers() {
17713 let ctx = create_ctx().unwrap();
17714 assert_eq!(ctx.threads, 1);
17715 assert_eq!(ctx.buckets.len(), 8 * ALPHABET_SIZE);
17716 assert!(ctx.thread_state.is_none());
17717
17718 let ctx = create_ctx_omp(2).unwrap();
17719 assert_eq!(ctx.threads, 2);
17720 assert_eq!(ctx.buckets.len(), 8 * ALPHABET_SIZE);
17721 let thread_state = ctx.thread_state.as_ref().unwrap();
17722 assert_eq!(thread_state.len(), 2);
17723 assert_eq!(thread_state[0].buckets.len(), 4 * ALPHABET_SIZE);
17724 assert_eq!(thread_state[0].cache_entries, PER_THREAD_CACHE_SIZE);
17725
17726 let ctx = create_ctx_omp(0).unwrap();
17727 assert_eq!(ctx.threads, 1);
17728 assert!(ctx.thread_state.is_none());
17729 }
17730
17731 #[test]
17732 fn libsais16x64_unbwt_context_allocates_upstream_shaped_buffers() {
17733 let ctx = unbwt_create_ctx().unwrap();
17734 assert_eq!(ctx.threads, 1);
17735 assert_eq!(ctx.bucket2.len(), ALPHABET_SIZE);
17736 assert_eq!(ctx.fastbits.len(), 1 + (1 << UNBWT_FASTBITS));
17737 assert!(ctx.buckets.is_none());
17738
17739 let ctx = unbwt_create_ctx_omp(3).unwrap();
17740 assert_eq!(ctx.threads, 3);
17741 assert_eq!(ctx.bucket2.len(), ALPHABET_SIZE);
17742 assert_eq!(ctx.fastbits.len(), 1 + (1 << UNBWT_FASTBITS));
17743 assert_eq!(ctx.buckets.as_ref().unwrap().len(), 3 * ALPHABET_SIZE);
17744 }
17745
17746 #[test]
17747 fn libsais16x64_named_unbwt_helpers_follow_decode_shapes() {
17748 let t = [0, 1, 2];
17749 let mut p = vec![usize::MAX; 4];
17750 let mut bucket2 = vec![0; ALPHABET_SIZE];
17751 bucket2[0] = 1;
17752 bucket2[1] = 2;
17753 bucket2[2] = 3;
17754 unbwt_calculate_P(&t, &mut p, &mut bucket2, 2, 1, 3);
17755 assert_eq!(p[2], 1);
17756 assert_eq!(p[3], 3);
17757
17758 let p = [1usize, 2, 0];
17759 let mut bucket2 = vec![3; ALPHABET_SIZE];
17760 bucket2[0] = 1;
17761 bucket2[1] = 2;
17762 bucket2[2] = 3;
17763 let fastbits = vec![0; 3];
17764
17765 let mut u = vec![99; 3];
17766 let mut i0 = 0;
17767 unbwt_decode_1(&mut u, &p, &bucket2, &fastbits, 0, &mut i0, 3);
17768 assert_eq!(u, vec![0, 1, 2]);
17769 assert_eq!(i0, 0);
17770
17771 let mut u = vec![99; 6];
17772 let (mut i0, mut i1) = (0, 1);
17773 unbwt_decode_2(&mut u, &p, &bucket2, &fastbits, 0, 3, &mut i0, &mut i1, 2);
17774 assert_eq!(&u[..2], &[0, 1]);
17775 assert_eq!(&u[3..5], &[1, 2]);
17776 assert_eq!((i0, i1), (2, 0));
17777
17778 let mut u = vec![99; 8];
17779 let mut cursors = [0; 8];
17780 unbwt_decode_8(&mut u, &p, &bucket2, &fastbits, 0, 1, &mut cursors, 1);
17781 assert_eq!(u, vec![0; 8]);
17782 assert_eq!(cursors, [1; 8]);
17783 }
17784
17785 #[test]
17786 fn libsais16x64_unbwt_init_parallel_uses_block_partition() {
17787 let n = 70_003usize;
17788 let t: Vec<u16> = (0..n)
17789 .map(|i| ((i.wrapping_mul(37).wrapping_add(i >> 3)) % 251) as u16)
17790 .collect();
17791 let i = [12_345];
17792
17793 let mut single_p = vec![0; n + 1];
17794 let mut threaded_p = vec![0; n + 1];
17795 let mut single_bucket2 = vec![0; ALPHABET_SIZE];
17796 let mut threaded_bucket2 = vec![0; ALPHABET_SIZE];
17797 let mut single_fastbits = vec![0; 1 + (1 << UNBWT_FASTBITS)];
17798 let mut threaded_fastbits = vec![0; 1 + (1 << UNBWT_FASTBITS)];
17799 let mut buckets = vec![0; 4 * ALPHABET_SIZE];
17800
17801 unbwt_init_single(
17802 &t,
17803 &mut single_p,
17804 None,
17805 &i,
17806 &mut single_bucket2,
17807 &mut single_fastbits,
17808 );
17809 unbwt_init_parallel(
17810 &t,
17811 &mut threaded_p,
17812 None,
17813 &i,
17814 &mut threaded_bucket2,
17815 &mut threaded_fastbits,
17816 &mut buckets,
17817 4,
17818 );
17819
17820 assert_eq!(threaded_p, single_p);
17821 assert_eq!(threaded_bucket2, single_bucket2);
17822 assert_eq!(threaded_fastbits, single_fastbits);
17823 }
17824
17825 fn assert_libsais16x64_matches_c(text: &[u16]) {
17826 let mut rust_sa = vec![0; text.len()];
17827 let mut c_sa = vec![0; text.len()];
17828
17829 let rust_rc = libsais16x64(text, &mut rust_sa, 0, None);
17830 let c_rc = unsafe {
17831 probe_public_libsais16x64(text.as_ptr(), c_sa.as_mut_ptr(), text.len() as SaSint, 0)
17832 };
17833
17834 assert_eq!(rust_rc, c_rc);
17835 assert_eq!(rust_sa, c_sa);
17836 }
17837
17838 fn assert_libsais16x64_gsa_matches_c(text: &[u16]) {
17839 let mut rust_sa = vec![0; text.len()];
17840 let mut c_sa = vec![0; text.len()];
17841
17842 let rust_rc = libsais16x64_gsa(text, &mut rust_sa, 0, None);
17843 let c_rc = unsafe {
17844 probe_public_libsais16x64_gsa(text.as_ptr(), c_sa.as_mut_ptr(), text.len() as SaSint, 0)
17845 };
17846
17847 assert_eq!(rust_rc, c_rc);
17848 assert_eq!(rust_sa, c_sa);
17849 }
17850
17851 fn assert_libsais16x64_long_matches_c_with_fs(text: &[SaSint], k: SaSint, fs: SaSint) {
17852 let mut rust_t = text.to_vec();
17853 let mut c_t = text.to_vec();
17854 let mut rust_sa = vec![0; text.len() + fs as usize];
17855 let mut c_sa = vec![0; text.len() + fs as usize];
17856
17857 let rust_rc = libsais16x64_long(&mut rust_t, &mut rust_sa, k, fs);
17858 let c_rc = unsafe {
17859 probe_public_libsais16x64_long(
17860 c_t.as_mut_ptr(),
17861 c_sa.as_mut_ptr(),
17862 c_t.len() as SaSint,
17863 k,
17864 fs,
17865 )
17866 };
17867
17868 assert_eq!(rust_rc, c_rc);
17869 assert_eq!(rust_t, c_t);
17870 assert_eq!(rust_sa, c_sa);
17871 }
17872
17873 fn assert_libsais16x64_long_matches_c(text: &[SaSint], k: SaSint) {
17874 assert_libsais16x64_long_matches_c_with_fs(text, k, 0);
17875 }
17876
17877 fn make_main_32s_stress_text(len: usize, alphabet: SaSint) -> Vec<SaSint> {
17878 let mut state: u32 = 0x1357_9bdf;
17879 let mut t = Vec::with_capacity(len + 1);
17880
17881 for i in 0..len {
17882 state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223);
17883 let mut value = ((state >> 16) % (alphabet as u32 - 1)) as SaSint + 1;
17884 if i % 17 < 8 {
17885 value = ((i / 17) as SaSint % 11) + 1;
17886 }
17887 if i % 29 < 10 {
17888 value = (((i / 29) as SaSint * 3) % 19) + 1;
17889 }
17890 if i % 64 >= 48 {
17891 value = t[i - 48];
17892 }
17893 t.push(value);
17894 }
17895
17896 t.push(0);
17897 t
17898 }
17899
17900 fn make_recursive_main_32s_text(repeats: usize) -> Vec<SaSint> {
17901 let motif = [9, 4, 9, 2, 9, 4, 9, 1];
17902 let mut t = Vec::with_capacity(repeats * motif.len() + 1);
17903 for _ in 0..repeats {
17904 t.extend_from_slice(&motif);
17905 }
17906 t.push(0);
17907 t
17908 }
17909
17910 fn assert_main_32s_entry_matches_c(mut t: Vec<SaSint>, k: SaSint, fs: SaSint) {
17911 let n = t.len() as SaSint;
17912 let threads = 1;
17913 let mut sa = vec![0; t.len() + fs as usize];
17914 let initial_t = t.clone();
17915 let initial_sa = sa.clone();
17916
17917 let c_result = unsafe {
17918 probe_libsais16x64_main_32s_entry(t.as_mut_ptr(), sa.as_mut_ptr(), n, k, fs, threads)
17919 };
17920 let c_t = t.clone();
17921 let c_sa = sa.clone();
17922
17923 t.copy_from_slice(&initial_t);
17924 sa.copy_from_slice(&initial_sa);
17925
17926 let mut thread_state = alloc_thread_state(threads).unwrap();
17927 let rust_result = main_32s_entry(
17928 t.as_mut_ptr(),
17929 &mut sa,
17930 n,
17931 k,
17932 fs,
17933 threads,
17934 &mut thread_state,
17935 );
17936
17937 assert_eq!(rust_result, c_result);
17938 assert_eq!(t, c_t);
17939 assert_eq!(&sa[..n as usize], &c_sa[..n as usize]);
17940 if fs == 0 {
17941 assert_eq!(sa, c_sa);
17942 }
17943 }
17944
17945 #[test]
17946 fn libsais16x64_main_32s_entry_matches_c_for_local_32s_paths() {
17947 assert_main_32s_entry_matches_c(make_main_32s_stress_text(1024, 300), 300, 2048);
17948 assert_main_32s_entry_matches_c(make_main_32s_stress_text(1024, 400), 400, 2048);
17949 assert_main_32s_entry_matches_c(make_main_32s_stress_text(1024, 700), 700, 2048);
17950 assert_main_32s_entry_matches_c(make_main_32s_stress_text(1024, 1501), 1501, 2048);
17951 assert_main_32s_entry_matches_c(make_recursive_main_32s_text(24), 300, 0);
17952 assert_main_32s_entry_matches_c(make_recursive_main_32s_text(24), 1501, 0);
17953 }
17954
17955 fn assert_libsais16x64_bwt_matches_c(text: &[u16]) {
17956 let mut rust_u = vec![0; text.len()];
17957 let mut rust_a = vec![0; text.len()];
17958 let mut c_u = vec![0; text.len()];
17959 let mut c_a = vec![0; text.len()];
17960
17961 let rust_rc = libsais16x64_bwt(text, &mut rust_u, &mut rust_a, 0, None);
17962 let c_rc = unsafe {
17963 probe_public_libsais16x64_bwt(
17964 text.as_ptr(),
17965 c_u.as_mut_ptr(),
17966 c_a.as_mut_ptr(),
17967 text.len() as SaSint,
17968 0,
17969 )
17970 };
17971
17972 assert_eq!(rust_rc, c_rc);
17973 assert_eq!(rust_u, c_u);
17974 }
17975
17976 fn assert_libsais16x64_bwt_aux_matches_c(text: &[u16], r: SaSint) {
17977 let aux_len = if text.is_empty() {
17978 0
17979 } else {
17980 (text.len() - 1) / r as usize + 1
17981 };
17982 let mut rust_u = vec![0; text.len()];
17983 let mut rust_a = vec![0; text.len()];
17984 let mut rust_i = vec![0; aux_len];
17985 let mut c_u = vec![0; text.len()];
17986 let mut c_a = vec![0; text.len()];
17987 let mut c_i = vec![0; aux_len];
17988
17989 let rust_rc = libsais16x64_bwt_aux(text, &mut rust_u, &mut rust_a, 0, None, r, &mut rust_i);
17990 let c_rc = unsafe {
17991 probe_public_libsais16x64_bwt_aux(
17992 text.as_ptr(),
17993 c_u.as_mut_ptr(),
17994 c_a.as_mut_ptr(),
17995 text.len() as SaSint,
17996 0,
17997 r,
17998 c_i.as_mut_ptr(),
17999 )
18000 };
18001
18002 assert_eq!(rust_rc, c_rc);
18003 assert_eq!(rust_u, c_u);
18004 assert_eq!(rust_i, c_i);
18005 }
18006
18007 fn assert_libsais16x64_freq_outputs_match_c(text: &[u16], gsa_text: &[u16]) {
18008 let mut rust_sa = vec![0; text.len()];
18009 let mut c_sa = vec![0; text.len()];
18010 let mut rust_freq = vec![-1; ALPHABET_SIZE];
18011 let mut c_freq = vec![-1; ALPHABET_SIZE];
18012
18013 let rust_rc = libsais16x64(text, &mut rust_sa, 0, Some(&mut rust_freq));
18014 let c_rc = unsafe {
18015 probe_public_libsais16x64_freq(
18016 text.as_ptr(),
18017 c_sa.as_mut_ptr(),
18018 text.len() as SaSint,
18019 0,
18020 c_freq.as_mut_ptr(),
18021 )
18022 };
18023 assert_eq!(rust_rc, c_rc);
18024 assert_eq!(rust_sa, c_sa);
18025 assert_eq!(rust_freq, c_freq);
18026
18027 let mut rust_gsa = vec![0; gsa_text.len()];
18028 let mut c_gsa = vec![0; gsa_text.len()];
18029 rust_freq.fill(-1);
18030 c_freq.fill(-1);
18031 let rust_rc = libsais16x64_gsa(gsa_text, &mut rust_gsa, 0, Some(&mut rust_freq));
18032 let c_rc = unsafe {
18033 probe_public_libsais16x64_gsa_freq(
18034 gsa_text.as_ptr(),
18035 c_gsa.as_mut_ptr(),
18036 gsa_text.len() as SaSint,
18037 0,
18038 c_freq.as_mut_ptr(),
18039 )
18040 };
18041 assert_eq!(rust_rc, c_rc);
18042 assert_eq!(rust_gsa, c_gsa);
18043 assert_eq!(rust_freq, c_freq);
18044
18045 let mut rust_u = vec![0; text.len()];
18046 let mut rust_a = vec![0; text.len()];
18047 let mut c_u = vec![0; text.len()];
18048 let mut c_a = vec![0; text.len()];
18049 rust_freq.fill(-1);
18050 c_freq.fill(-1);
18051 let rust_rc = libsais16x64_bwt(text, &mut rust_u, &mut rust_a, 0, Some(&mut rust_freq));
18052 let c_rc = unsafe {
18053 probe_public_libsais16x64_bwt_freq(
18054 text.as_ptr(),
18055 c_u.as_mut_ptr(),
18056 c_a.as_mut_ptr(),
18057 text.len() as SaSint,
18058 0,
18059 c_freq.as_mut_ptr(),
18060 )
18061 };
18062 assert_eq!(rust_rc, c_rc);
18063 assert_eq!(rust_u, c_u);
18064 assert_eq!(rust_freq, c_freq);
18065
18066 let r = 4;
18067 let aux_len = (text.len() - 1) / r as usize + 1;
18068 let mut rust_i = vec![0; aux_len];
18069 let mut c_i = vec![0; aux_len];
18070 rust_freq.fill(-1);
18071 c_freq.fill(-1);
18072 let rust_rc = libsais16x64_bwt_aux(
18073 text,
18074 &mut rust_u,
18075 &mut rust_a,
18076 0,
18077 Some(&mut rust_freq),
18078 r,
18079 &mut rust_i,
18080 );
18081 let c_rc = unsafe {
18082 probe_public_libsais16x64_bwt_aux_freq(
18083 text.as_ptr(),
18084 c_u.as_mut_ptr(),
18085 c_a.as_mut_ptr(),
18086 text.len() as SaSint,
18087 0,
18088 c_freq.as_mut_ptr(),
18089 r,
18090 c_i.as_mut_ptr(),
18091 )
18092 };
18093 assert_eq!(rust_rc, c_rc);
18094 assert_eq!(rust_u, c_u);
18095 assert_eq!(rust_i, c_i);
18096 assert_eq!(rust_freq, c_freq);
18097 }
18098
18099 fn assert_libsais16x64_unbwt_matches_c(text: &[u16]) {
18100 let mut bwt = vec![0; text.len()];
18101 let mut work = vec![0; text.len()];
18102 let primary = libsais16x64_bwt(text, &mut bwt, &mut work, 0, None);
18103 assert!(primary >= 0);
18104
18105 let mut rust_u = vec![0; text.len()];
18106 let mut rust_a = vec![0; text.len() + 1];
18107 let mut c_u = vec![0; text.len()];
18108 let mut c_a = vec![0; text.len() + 1];
18109
18110 let rust_rc = libsais16x64_unbwt(&bwt, &mut rust_u, &mut rust_a, None, primary);
18111 let c_rc = unsafe {
18112 probe_public_libsais16x64_unbwt(
18113 bwt.as_ptr(),
18114 c_u.as_mut_ptr(),
18115 c_a.as_mut_ptr(),
18116 bwt.len() as SaSint,
18117 primary,
18118 )
18119 };
18120
18121 assert_eq!(rust_rc, c_rc);
18122 assert_eq!(rust_u, c_u);
18123 assert_eq!(rust_u, text);
18124 }
18125
18126 fn assert_libsais16x64_unbwt_aux_matches_c(text: &[u16], r: SaSint) {
18127 let mut bwt = vec![0; text.len()];
18128 let mut work = vec![0; text.len()];
18129 let mut aux = vec![0; (text.len() - 1) / r as usize + 1];
18130 let bwt_rc = libsais16x64_bwt_aux(text, &mut bwt, &mut work, 0, None, r, &mut aux);
18131 assert_eq!(bwt_rc, 0);
18132
18133 let mut rust_u = vec![0; text.len()];
18134 let mut rust_a = vec![0; text.len() + 1];
18135 let mut c_u = vec![0; text.len()];
18136 let mut c_a = vec![0; text.len() + 1];
18137
18138 let rust_rc = libsais16x64_unbwt_aux(&bwt, &mut rust_u, &mut rust_a, None, r, &aux);
18139 let c_rc = unsafe {
18140 probe_public_libsais16x64_unbwt_aux(
18141 bwt.as_ptr(),
18142 c_u.as_mut_ptr(),
18143 c_a.as_mut_ptr(),
18144 bwt.len() as SaSint,
18145 r,
18146 aux.as_ptr(),
18147 )
18148 };
18149
18150 assert_eq!(rust_rc, c_rc);
18151 assert_eq!(rust_u, c_u);
18152 assert_eq!(rust_u, text);
18153 }
18154
18155 fn assert_libsais16x64_unbwt_freq_matches_c(text: &[u16]) {
18156 let mut freq = vec![0; ALPHABET_SIZE];
18157 let mut bwt = vec![0; text.len()];
18158 let mut work = vec![0; text.len()];
18159 let primary = libsais16x64_bwt(text, &mut bwt, &mut work, 0, Some(&mut freq));
18160 assert!(primary >= 0);
18161
18162 let mut rust_u = vec![0; text.len()];
18163 let mut rust_a = vec![0; text.len() + 1];
18164 let mut c_u = vec![0; text.len()];
18165 let mut c_a = vec![0; text.len() + 1];
18166
18167 let rust_rc = libsais16x64_unbwt(&bwt, &mut rust_u, &mut rust_a, Some(&freq), primary);
18168 let c_rc = unsafe {
18169 probe_public_libsais16x64_unbwt_freq(
18170 bwt.as_ptr(),
18171 c_u.as_mut_ptr(),
18172 c_a.as_mut_ptr(),
18173 bwt.len() as SaSint,
18174 freq.as_ptr(),
18175 primary,
18176 )
18177 };
18178 assert_eq!(rust_rc, c_rc);
18179 assert_eq!(rust_u, c_u);
18180 assert_eq!(rust_u, text);
18181
18182 let r = 4;
18183 let mut aux = vec![0; (text.len() - 1) / r as usize + 1];
18184 let bwt_rc =
18185 libsais16x64_bwt_aux(text, &mut bwt, &mut work, 0, Some(&mut freq), r, &mut aux);
18186 assert_eq!(bwt_rc, 0);
18187
18188 rust_u.fill(0);
18189 rust_a.fill(0);
18190 c_u.fill(0);
18191 c_a.fill(0);
18192 let rust_rc = libsais16x64_unbwt_aux(&bwt, &mut rust_u, &mut rust_a, Some(&freq), r, &aux);
18193 let c_rc = unsafe {
18194 probe_public_libsais16x64_unbwt_aux_freq(
18195 bwt.as_ptr(),
18196 c_u.as_mut_ptr(),
18197 c_a.as_mut_ptr(),
18198 bwt.len() as SaSint,
18199 freq.as_ptr(),
18200 r,
18201 aux.as_ptr(),
18202 )
18203 };
18204 assert_eq!(rust_rc, c_rc);
18205 assert_eq!(rust_u, c_u);
18206 assert_eq!(rust_u, text);
18207 }
18208
18209 fn assert_libsais16x64_plcp_lcp_matches_c(text: &[u16]) {
18210 let mut sa = vec![0; text.len()];
18211 assert_eq!(libsais16x64(text, &mut sa, 0, None), 0);
18212
18213 let mut rust_plcp = vec![0; text.len()];
18214 let mut c_plcp = vec![0; text.len()];
18215 let rust_rc = libsais16x64_plcp(text, &sa, &mut rust_plcp);
18216 let c_rc = unsafe {
18217 probe_public_libsais16x64_plcp(
18218 text.as_ptr(),
18219 sa.as_ptr(),
18220 c_plcp.as_mut_ptr(),
18221 text.len() as SaSint,
18222 )
18223 };
18224 assert_eq!(rust_rc, c_rc);
18225 assert_eq!(rust_plcp, c_plcp);
18226
18227 let mut rust_lcp = vec![0; text.len()];
18228 let mut c_lcp = vec![0; text.len()];
18229 let rust_rc = libsais16x64_lcp(&rust_plcp, &sa, &mut rust_lcp);
18230 let c_rc = unsafe {
18231 probe_public_libsais16x64_lcp(
18232 c_plcp.as_ptr(),
18233 sa.as_ptr(),
18234 c_lcp.as_mut_ptr(),
18235 text.len() as SaSint,
18236 )
18237 };
18238 assert_eq!(rust_rc, c_rc);
18239 assert_eq!(rust_lcp, c_lcp);
18240 }
18241
18242 fn assert_libsais16x64_plcp_gsa_matches_c(text: &[u16]) {
18243 let mut sa = vec![0; text.len()];
18244 assert_eq!(libsais16x64_gsa(text, &mut sa, 0, None), 0);
18245
18246 let mut rust_plcp = vec![0; text.len()];
18247 let mut c_plcp = vec![0; text.len()];
18248 let rust_rc = libsais16x64_plcp_gsa(text, &sa, &mut rust_plcp);
18249 let c_rc = unsafe {
18250 probe_public_libsais16x64_plcp_gsa(
18251 text.as_ptr(),
18252 sa.as_ptr(),
18253 c_plcp.as_mut_ptr(),
18254 text.len() as SaSint,
18255 )
18256 };
18257 assert_eq!(rust_rc, c_rc);
18258 assert_eq!(rust_plcp, c_plcp);
18259 }
18260
18261 #[test]
18262 fn public_libsais16x64_matches_upstream_c() {
18263 for text in [
18264 [].as_slice(),
18265 &[1][..],
18266 &[2, 1, 3, 1, 2, 0],
18267 &[2, 1, 3, 1, 2, 4, 1, 0],
18268 &[65_535, 1, 65_534, 1, 0],
18269 &[7, 7, 7, 7, 7, 0],
18270 ] {
18271 assert_libsais16x64_matches_c(text);
18272 }
18273 }
18274
18275 #[test]
18276 fn public_libsais16x64_bwt_matches_upstream_c() {
18277 for text in [
18278 [].as_slice(),
18279 &[1][..],
18280 &[2, 1, 3, 1, 2, 0],
18281 &[2, 1, 3, 1, 2, 4, 1, 0],
18282 &[65_535, 1, 65_534, 1, 0],
18283 &[7, 7, 7, 7, 7, 0],
18284 ] {
18285 assert_libsais16x64_bwt_matches_c(text);
18286 }
18287 }
18288
18289 #[test]
18290 fn public_libsais16x64_gsa_matches_upstream_c() {
18291 for text in [&[0][..], &[2, 1, 0], &[2, 1, 0, 3, 1, 0], &[7, 7, 0, 7, 0]] {
18292 assert_libsais16x64_gsa_matches_c(text);
18293 }
18294 }
18295
18296 #[test]
18297 fn public_libsais16x64_long_matches_upstream_c() {
18298 for (text, k) in [
18299 (&[][..], 0),
18300 (&[0][..], 1),
18301 (&[1, 2, 1, 0][..], 3),
18302 (&[2, 1, 2, 1, 0][..], 3),
18303 (&[3, 3, 3, 2, 1, 0][..], 4),
18304 ] {
18305 assert_libsais16x64_long_matches_c(text, k);
18306 }
18307
18308 assert_libsais16x64_long_matches_c_with_fs(&[2, 1, 3, 1, 2, 0], 4, 64);
18309 }
18310
18311 #[test]
18312 fn public_libsais16x64_plcp_lcp_matches_upstream_c() {
18313 for text in [
18314 &[2, 1, 3, 1, 2, 0][..],
18315 &[2, 1, 3, 1, 2, 4, 1, 0],
18316 &[65_535, 1, 65_534, 1, 0],
18317 &[7, 7, 7, 7, 7, 0],
18318 ] {
18319 assert_libsais16x64_plcp_lcp_matches_c(text);
18320 }
18321 }
18322
18323 #[test]
18324 fn public_libsais16x64_plcp_gsa_matches_upstream_c() {
18325 for text in [&[0][..], &[2, 1, 0], &[2, 1, 0, 3, 1, 0], &[7, 7, 0, 7, 0]] {
18326 assert_libsais16x64_plcp_gsa_matches_c(text);
18327 }
18328 }
18329
18330 #[test]
18331 fn public_libsais16x64_bwt_aux_matches_upstream_c() {
18332 for text in [
18333 &[2, 1, 3, 1, 2, 0][..],
18334 &[2, 1, 3, 1, 2, 4, 1, 0],
18335 &[65_535, 1, 65_534, 1, 0],
18336 &[7, 7, 7, 7, 7, 0],
18337 ] {
18338 assert_libsais16x64_bwt_aux_matches_c(text, 4);
18339 }
18340 }
18341
18342 #[test]
18343 fn public_libsais16x64_frequency_outputs_match_upstream_c() {
18344 assert_libsais16x64_freq_outputs_match_c(&[65_535, 1, 2, 1, 0], &[65_535, 1, 0, 2, 1, 0]);
18345 }
18346
18347 #[test]
18348 fn public_libsais16x64_unbwt_with_frequency_matches_upstream_c() {
18349 assert_libsais16x64_unbwt_freq_matches_c(&[65_535, 1, 2, 1, 0]);
18350 }
18351
18352 #[test]
18353 fn public_libsais16x64_unbwt_matches_upstream_c() {
18354 for text in [
18355 &[1][..],
18356 &[2, 1, 3, 1, 2, 0],
18357 &[2, 1, 3, 1, 2, 4, 1, 0],
18358 &[65_535, 1, 65_534, 1, 0],
18359 &[7, 7, 7, 7, 7, 0],
18360 ] {
18361 assert_libsais16x64_unbwt_matches_c(text);
18362 }
18363 }
18364
18365 #[test]
18366 fn public_libsais16x64_unbwt_aux_matches_upstream_c() {
18367 for text in [
18368 &[2, 1, 3, 1, 2, 0][..],
18369 &[2, 1, 3, 1, 2, 4, 1, 0],
18370 &[65_535, 1, 65_534, 1, 0],
18371 &[7, 7, 7, 7, 7, 0],
18372 ] {
18373 assert_libsais16x64_unbwt_aux_matches_c(text, 4);
18374 }
18375 }
18376
18377 #[test]
18378 fn public_libsais16x64_unbwt_aux_exercises_decode_dispatch_cases() {
18379 for len in [2usize, 5, 9, 13, 17, 21, 25, 29, 33, 37] {
18380 let text = (0..len)
18381 .map(|i| ((i * 37 + 11) % 65_535 + 1) as u16)
18382 .collect::<Vec<_>>();
18383 assert_libsais16x64_unbwt_aux_matches_c(&text, 4);
18384 }
18385 }
18386
18387 #[test]
18388 fn libsais16x64_lcp_helpers_reject_invalid_suffix_entries() {
18389 let text = [2, 1, 2, 1, 0];
18390 let mut plcp = vec![0; text.len()];
18391 let mut lcp = vec![0; text.len()];
18392
18393 assert_eq!(libsais16x64_plcp(&text, &[0, 1, -1, 3, 4], &mut plcp), -1);
18394 assert_eq!(libsais16x64_plcp(&text, &[0, 1, 2, 3, 5], &mut plcp), -1);
18395 assert_eq!(libsais16x64_lcp(&plcp, &[0, 1, -1, 3, 4], &mut lcp), -1);
18396 assert_eq!(libsais16x64_lcp(&plcp, &[0, 1, 2, 3, 5], &mut lcp), -1);
18397 }
18398
18399 #[test]
18400 fn libsais16x64_rejects_invalid_public_arguments() {
18401 let text = [2, 1, 3, 1, 2, 0];
18402 let int_text = [1, 2, 1, 0];
18403 let mut int_text_for_short_sa = int_text.to_vec();
18404 let mut int_text_for_negative_fs = int_text.to_vec();
18405 let mut int_text_for_alias = int_text.to_vec();
18406 let mut sa = vec![0; text.len() - 1];
18407 let mut int_sa = vec![0; int_text.len() - 1];
18408 let mut full_int_sa = vec![0; int_text.len()];
18409 let mut freq = vec![0; ALPHABET_SIZE - 1];
18410 let mut u = vec![0; text.len() - 1];
18411 let mut a = vec![0; text.len() - 1];
18412 let mut full_u = vec![0; text.len()];
18413 let mut full_a = vec![0; text.len()];
18414 let mut aux = vec![0; 1];
18415
18416 assert_eq!(libsais16x64(&text, &mut sa, 0, None), -1);
18417 assert_eq!(libsais16x64(&text, &mut full_a, 0, Some(&mut freq)), -1);
18418 assert_eq!(libsais16x64_gsa(&[1, 2, 3], &mut full_a[..3], 0, None), -1);
18419 assert_eq!(
18420 libsais16x64_long(&mut int_text_for_short_sa, &mut int_sa, 3, 0),
18421 -1
18422 );
18423 assert_eq!(
18424 libsais16x64_long(&mut int_text_for_negative_fs, &mut full_int_sa, 3, -1),
18425 -1
18426 );
18427 assert_eq!(
18428 libsais16x64_int(&mut int_text_for_alias, &mut full_int_sa, 3, -1),
18429 -1
18430 );
18431 assert_eq!(libsais16x64_bwt(&text, &mut u, &mut full_a, 0, None), -1);
18432 assert_eq!(libsais16x64_bwt(&text, &mut full_u, &mut a, 0, None), -1);
18433 assert_eq!(
18434 libsais16x64_bwt_aux(&text, &mut full_u, &mut full_a, 0, None, 0, &mut aux),
18435 -1
18436 );
18437 assert_eq!(
18438 libsais16x64_bwt_aux(&text, &mut full_u, &mut full_a, 0, None, 3, &mut aux),
18439 -1
18440 );
18441 assert_eq!(
18442 libsais16x64_bwt_aux(&text, &mut full_u, &mut full_a, 0, None, 4, &mut aux),
18443 -1
18444 );
18445 assert_eq!(create_ctx_omp(-1), None);
18446 assert_eq!(unbwt_create_ctx_omp(-1), None);
18447 }
18448
18449 #[test]
18450 fn libsais16x64_unbwt_rejects_invalid_public_arguments() {
18451 let text = [2, 1, 3, 1, 2, 0];
18452 let mut bwt = vec![0; text.len()];
18453 let mut work = vec![0; text.len()];
18454 let primary = libsais16x64_bwt(&text, &mut bwt, &mut work, 0, None);
18455
18456 let mut short_u = vec![0; text.len() - 1];
18457 let mut short_a = vec![0; text.len() - 1];
18458 let mut full_u = vec![0; text.len()];
18459 let mut full_a = vec![0; text.len()];
18460 let short_freq = vec![0; ALPHABET_SIZE - 1];
18461 let short_aux = vec![primary];
18462 let bad_aux = vec![0, 0];
18463 let good_aux = vec![primary, 4];
18464
18465 assert_eq!(
18466 libsais16x64_unbwt(&bwt, &mut short_u, &mut full_a, None, primary),
18467 -1
18468 );
18469 assert_eq!(
18470 libsais16x64_unbwt(&bwt, &mut full_u, &mut short_a, None, primary),
18471 -1
18472 );
18473 assert_eq!(
18474 libsais16x64_unbwt(&bwt, &mut full_u, &mut full_a, Some(&short_freq), primary),
18475 -1
18476 );
18477 assert_eq!(
18478 libsais16x64_unbwt(&bwt, &mut full_u, &mut full_a, None, 0),
18479 -1
18480 );
18481 assert_eq!(
18482 libsais16x64_unbwt(
18483 &bwt,
18484 &mut full_u,
18485 &mut full_a,
18486 None,
18487 text.len() as SaSint + 1
18488 ),
18489 -1
18490 );
18491 assert_eq!(
18492 libsais16x64_unbwt_aux(&bwt, &mut full_u, &mut full_a, None, 0, &good_aux),
18493 -1
18494 );
18495 assert_eq!(
18496 libsais16x64_unbwt_aux(&bwt, &mut full_u, &mut full_a, None, 3, &good_aux),
18497 -1
18498 );
18499 assert_eq!(
18500 libsais16x64_unbwt_aux(&bwt, &mut full_u, &mut full_a, None, 4, &short_aux),
18501 -1
18502 );
18503 assert_eq!(
18504 libsais16x64_unbwt_aux(&bwt, &mut full_u, &mut full_a, None, 4, &bad_aux),
18505 -1
18506 );
18507 }
18508
18509 #[test]
18510 fn libsais16x64_ctx_rejects_invalid_public_arguments() {
18511 let text = [2, 1, 3, 1, 2, 0];
18512 let mut ctx = create_ctx().unwrap();
18513 let mut sa = vec![0; text.len() - 1];
18514 let mut freq = vec![0; ALPHABET_SIZE - 1];
18515 let mut u = vec![0; text.len() - 1];
18516 let mut a = vec![0; text.len() - 1];
18517 let mut full_u = vec![0; text.len()];
18518 let mut full_a = vec![0; text.len()];
18519 let mut aux = vec![0; 1];
18520
18521 assert_eq!(libsais16x64_ctx(&mut ctx, &text, &mut sa, 0, None), -1);
18522 assert_eq!(
18523 libsais16x64_ctx(&mut ctx, &text, &mut full_a, 0, Some(&mut freq)),
18524 -1
18525 );
18526 assert_eq!(
18527 libsais16x64_gsa_ctx(&mut ctx, &[1, 2, 3], &mut full_a[..3], 0, None),
18528 -1
18529 );
18530 assert_eq!(
18531 libsais16x64_bwt_ctx(&mut ctx, &text, &mut u, &mut full_a, 0, None),
18532 -1
18533 );
18534 assert_eq!(
18535 libsais16x64_bwt_ctx(&mut ctx, &text, &mut full_u, &mut a, 0, None),
18536 -1
18537 );
18538 assert_eq!(
18539 libsais16x64_bwt_aux_ctx(
18540 &mut ctx,
18541 &text,
18542 &mut full_u,
18543 &mut full_a,
18544 0,
18545 None,
18546 0,
18547 &mut aux
18548 ),
18549 -1
18550 );
18551 assert_eq!(
18552 libsais16x64_bwt_aux_ctx(
18553 &mut ctx,
18554 &text,
18555 &mut full_u,
18556 &mut full_a,
18557 0,
18558 None,
18559 3,
18560 &mut aux
18561 ),
18562 -1
18563 );
18564 assert_eq!(
18565 libsais16x64_bwt_aux_ctx(
18566 &mut ctx,
18567 &text,
18568 &mut full_u,
18569 &mut full_a,
18570 0,
18571 None,
18572 4,
18573 &mut aux
18574 ),
18575 -1
18576 );
18577
18578 let mut default_ctx = Context::default();
18579 assert_eq!(
18580 libsais16x64_ctx(&mut default_ctx, &text, &mut full_a, 0, None),
18581 -2
18582 );
18583
18584 let mut bad_bucket_ctx = create_ctx().unwrap();
18585 bad_bucket_ctx.buckets.clear();
18586 assert_eq!(
18587 libsais16x64_ctx(&mut bad_bucket_ctx, &text, &mut full_a, 0, None),
18588 -2
18589 );
18590
18591 let mut short_thread_state_ctx = create_ctx_omp(2).unwrap();
18592 short_thread_state_ctx
18593 .thread_state
18594 .as_mut()
18595 .unwrap()
18596 .truncate(1);
18597 assert_eq!(
18598 libsais16x64_ctx(&mut short_thread_state_ctx, &text, &mut full_a, 0, None),
18599 -2
18600 );
18601 }
18602
18603 #[test]
18604 fn libsais16x64_unbwt_ctx_rejects_invalid_public_arguments() {
18605 let text = [2, 1, 3, 1, 2, 0];
18606 let mut bwt = vec![0; text.len()];
18607 let mut work = vec![0; text.len()];
18608 let primary = libsais16x64_bwt(&text, &mut bwt, &mut work, 0, None);
18609 let mut ctx = unbwt_create_ctx().unwrap();
18610
18611 let mut short_u = vec![0; text.len() - 1];
18612 let mut short_a = vec![0; text.len() - 1];
18613 let mut full_u = vec![0; text.len()];
18614 let mut full_a = vec![0; text.len()];
18615 let short_freq = vec![0; ALPHABET_SIZE - 1];
18616 let short_aux = vec![primary];
18617 let bad_aux = vec![0, 0];
18618 let good_aux = vec![primary, 4];
18619
18620 assert_eq!(
18621 libsais16x64_unbwt_ctx(&mut ctx, &bwt, &mut short_u, &mut full_a, None, primary),
18622 -1
18623 );
18624 assert_eq!(
18625 libsais16x64_unbwt_ctx(&mut ctx, &bwt, &mut full_u, &mut short_a, None, primary),
18626 -1
18627 );
18628 assert_eq!(
18629 libsais16x64_unbwt_ctx(
18630 &mut ctx,
18631 &bwt,
18632 &mut full_u,
18633 &mut full_a,
18634 Some(&short_freq),
18635 primary
18636 ),
18637 -1
18638 );
18639 assert_eq!(
18640 libsais16x64_unbwt_ctx(&mut ctx, &bwt, &mut full_u, &mut full_a, None, 0),
18641 -1
18642 );
18643 assert_eq!(
18644 libsais16x64_unbwt_aux_ctx(
18645 &mut ctx,
18646 &bwt,
18647 &mut full_u,
18648 &mut full_a,
18649 None,
18650 0,
18651 &good_aux
18652 ),
18653 -1
18654 );
18655 assert_eq!(
18656 libsais16x64_unbwt_aux_ctx(
18657 &mut ctx,
18658 &bwt,
18659 &mut full_u,
18660 &mut full_a,
18661 None,
18662 3,
18663 &good_aux
18664 ),
18665 -1
18666 );
18667 assert_eq!(
18668 libsais16x64_unbwt_aux_ctx(
18669 &mut ctx,
18670 &bwt,
18671 &mut full_u,
18672 &mut full_a,
18673 None,
18674 4,
18675 &short_aux
18676 ),
18677 -1
18678 );
18679 assert_eq!(
18680 libsais16x64_unbwt_aux_ctx(&mut ctx, &bwt, &mut full_u, &mut full_a, None, 4, &bad_aux),
18681 -1
18682 );
18683 }
18684
18685 #[test]
18686 fn libsais16x64_context_wrappers_match_direct_calls() {
18687 let text = [2, 1, 3, 1, 2, 0];
18688 let mut ctx = create_ctx().unwrap();
18689
18690 let mut direct_sa = vec![0; text.len()];
18691 let mut ctx_sa = vec![0; text.len()];
18692 assert_eq!(libsais16x64(&text, &mut direct_sa, 0, None), 0);
18693 assert_eq!(libsais16x64_ctx(&mut ctx, &text, &mut ctx_sa, 0, None), 0);
18694 assert_eq!(ctx_sa, direct_sa);
18695
18696 let mut direct_bwt = vec![0; text.len()];
18697 let mut direct_work = vec![0; text.len()];
18698 let mut ctx_bwt = vec![0; text.len()];
18699 let mut ctx_work = vec![0; text.len()];
18700 assert_eq!(
18701 libsais16x64_bwt(&text, &mut direct_bwt, &mut direct_work, 0, None),
18702 libsais16x64_bwt_ctx(&mut ctx, &text, &mut ctx_bwt, &mut ctx_work, 0, None)
18703 );
18704 assert_eq!(ctx_bwt, direct_bwt);
18705
18706 let mut direct_aux = vec![0; 2];
18707 let mut ctx_aux = vec![0; 2];
18708 assert_eq!(
18709 libsais16x64_bwt_aux(
18710 &text,
18711 &mut direct_bwt,
18712 &mut direct_work,
18713 0,
18714 None,
18715 4,
18716 &mut direct_aux
18717 ),
18718 libsais16x64_bwt_aux_ctx(
18719 &mut ctx,
18720 &text,
18721 &mut ctx_bwt,
18722 &mut ctx_work,
18723 0,
18724 None,
18725 4,
18726 &mut ctx_aux
18727 )
18728 );
18729 assert_eq!(ctx_bwt, direct_bwt);
18730 assert_eq!(ctx_aux, direct_aux);
18731 }
18732
18733 #[test]
18734 fn libsais16x64_unbwt_context_wrappers_match_direct_calls() {
18735 let text = [2, 1, 3, 1, 2, 0];
18736 let mut bwt = vec![0; text.len()];
18737 let mut work = vec![0; text.len()];
18738 let primary = libsais16x64_bwt(&text, &mut bwt, &mut work, 0, None);
18739
18740 let mut ctx = unbwt_create_ctx().unwrap();
18741 let mut direct = vec![0; text.len()];
18742 let mut direct_work = vec![0; text.len()];
18743 let mut via_ctx = vec![0; text.len()];
18744 let mut ctx_work = vec![0; text.len()];
18745
18746 assert_eq!(
18747 libsais16x64_unbwt(&bwt, &mut direct, &mut direct_work, None, primary),
18748 0
18749 );
18750 assert_eq!(
18751 libsais16x64_unbwt_ctx(&mut ctx, &bwt, &mut via_ctx, &mut ctx_work, None, primary),
18752 0
18753 );
18754 assert_eq!(via_ctx, direct);
18755
18756 let mut aux = vec![0; 2];
18757 assert_eq!(
18758 libsais16x64_bwt_aux(&text, &mut bwt, &mut work, 0, None, 4, &mut aux),
18759 0
18760 );
18761 assert_eq!(
18762 libsais16x64_unbwt_aux(&bwt, &mut direct, &mut direct_work, None, 4, &aux),
18763 0
18764 );
18765 assert_eq!(
18766 libsais16x64_unbwt_aux_ctx(&mut ctx, &bwt, &mut via_ctx, &mut ctx_work, None, 4, &aux),
18767 0
18768 );
18769 assert_eq!(via_ctx, direct);
18770 }
18771
18772 #[test]
18773 fn libsais16x64_ctx_frequency_wrappers_match_direct_calls() {
18774 let text = [2, 1, 3, 1, 2, 0];
18775 let gsa_text = [2, 1, 0, 3, 1, 0];
18776 let mut ctx = create_ctx().unwrap();
18777
18778 let mut direct_sa = vec![0; text.len()];
18779 let mut ctx_sa = vec![0; text.len()];
18780 let mut direct_freq = vec![-1; ALPHABET_SIZE];
18781 let mut ctx_freq = vec![-1; ALPHABET_SIZE];
18782 assert_eq!(
18783 libsais16x64(&text, &mut direct_sa, 0, Some(&mut direct_freq)),
18784 0
18785 );
18786 assert_eq!(
18787 libsais16x64_ctx(&mut ctx, &text, &mut ctx_sa, 0, Some(&mut ctx_freq)),
18788 0
18789 );
18790 assert_eq!(ctx_sa, direct_sa);
18791 assert_eq!(ctx_freq, direct_freq);
18792
18793 let mut direct_gsa = vec![0; gsa_text.len()];
18794 let mut ctx_gsa = vec![0; gsa_text.len()];
18795 direct_freq.fill(-1);
18796 ctx_freq.fill(-1);
18797 assert_eq!(
18798 libsais16x64_gsa(&gsa_text, &mut direct_gsa, 0, Some(&mut direct_freq)),
18799 0
18800 );
18801 assert_eq!(
18802 libsais16x64_gsa_ctx(&mut ctx, &gsa_text, &mut ctx_gsa, 0, Some(&mut ctx_freq)),
18803 0
18804 );
18805 assert_eq!(ctx_gsa, direct_gsa);
18806 assert_eq!(ctx_freq, direct_freq);
18807
18808 let mut direct_bwt = vec![0; text.len()];
18809 let mut direct_work = vec![0; text.len()];
18810 let mut ctx_bwt = vec![0; text.len()];
18811 let mut ctx_work = vec![0; text.len()];
18812 direct_freq.fill(-1);
18813 ctx_freq.fill(-1);
18814 assert_eq!(
18815 libsais16x64_bwt(
18816 &text,
18817 &mut direct_bwt,
18818 &mut direct_work,
18819 0,
18820 Some(&mut direct_freq)
18821 ),
18822 libsais16x64_bwt_ctx(
18823 &mut ctx,
18824 &text,
18825 &mut ctx_bwt,
18826 &mut ctx_work,
18827 0,
18828 Some(&mut ctx_freq)
18829 )
18830 );
18831 assert_eq!(ctx_bwt, direct_bwt);
18832 assert_eq!(ctx_freq, direct_freq);
18833
18834 let mut direct_aux = vec![0; 2];
18835 let mut ctx_aux = vec![0; 2];
18836 direct_freq.fill(-1);
18837 ctx_freq.fill(-1);
18838 assert_eq!(
18839 libsais16x64_bwt_aux(
18840 &text,
18841 &mut direct_bwt,
18842 &mut direct_work,
18843 0,
18844 Some(&mut direct_freq),
18845 4,
18846 &mut direct_aux
18847 ),
18848 libsais16x64_bwt_aux_ctx(
18849 &mut ctx,
18850 &text,
18851 &mut ctx_bwt,
18852 &mut ctx_work,
18853 0,
18854 Some(&mut ctx_freq),
18855 4,
18856 &mut ctx_aux
18857 )
18858 );
18859 assert_eq!(ctx_bwt, direct_bwt);
18860 assert_eq!(ctx_aux, direct_aux);
18861 assert_eq!(ctx_freq, direct_freq);
18862 }
18863
18864 #[test]
18865 fn libsais16x64_unbwt_ctx_frequency_wrappers_match_direct_calls() {
18866 let text = [2, 1, 3, 1, 2, 0];
18867 let mut freq = vec![0; ALPHABET_SIZE];
18868 let mut bwt = vec![0; text.len()];
18869 let mut work = vec![0; text.len()];
18870 let primary = libsais16x64_bwt(&text, &mut bwt, &mut work, 0, Some(&mut freq));
18871 assert!(primary >= 0);
18872
18873 let mut ctx = unbwt_create_ctx().unwrap();
18874 let mut direct = vec![0; text.len()];
18875 let mut direct_work = vec![0; text.len() + 1];
18876 let mut via_ctx = vec![0; text.len()];
18877 let mut ctx_work = vec![0; text.len() + 1];
18878 assert_eq!(
18879 libsais16x64_unbwt(&bwt, &mut direct, &mut direct_work, Some(&freq), primary),
18880 libsais16x64_unbwt_ctx(
18881 &mut ctx,
18882 &bwt,
18883 &mut via_ctx,
18884 &mut ctx_work,
18885 Some(&freq),
18886 primary
18887 )
18888 );
18889 assert_eq!(via_ctx, direct);
18890 assert_eq!(via_ctx, text);
18891
18892 let mut aux = vec![0; (text.len() - 1) / 4 + 1];
18893 assert_eq!(
18894 libsais16x64_bwt_aux(&text, &mut bwt, &mut work, 0, Some(&mut freq), 4, &mut aux),
18895 0
18896 );
18897 direct.fill(0);
18898 direct_work.fill(0);
18899 via_ctx.fill(0);
18900 ctx_work.fill(0);
18901 assert_eq!(
18902 libsais16x64_unbwt_aux(&bwt, &mut direct, &mut direct_work, Some(&freq), 4, &aux),
18903 libsais16x64_unbwt_aux_ctx(
18904 &mut ctx,
18905 &bwt,
18906 &mut via_ctx,
18907 &mut ctx_work,
18908 Some(&freq),
18909 4,
18910 &aux
18911 )
18912 );
18913 assert_eq!(via_ctx, direct);
18914 assert_eq!(via_ctx, text);
18915 }
18916
18917 #[test]
18918 fn libsais16x64_omp_wrappers_match_direct_calls_and_reject_negative_threads() {
18919 let text = [2, 1, 3, 1, 2, 0];
18920 let gsa_text = [2, 1, 0, 3, 1, 0];
18921 let mut direct_sa = vec![0; text.len()];
18922 let mut omp_sa = vec![0; text.len()];
18923 assert_eq!(libsais16x64(&text, &mut direct_sa, 0, None), 0);
18924 assert_eq!(libsais16x64_omp(&text, &mut omp_sa, 0, None, 2), 0);
18925 assert_eq!(omp_sa, direct_sa);
18926 assert_eq!(libsais16x64_omp(&text, &mut omp_sa, 0, None, -1), -1);
18927
18928 let mut direct_gsa = vec![0; gsa_text.len()];
18929 let mut omp_gsa = vec![0; gsa_text.len()];
18930 assert_eq!(libsais16x64_gsa(&gsa_text, &mut direct_gsa, 0, None), 0);
18931 assert_eq!(libsais16x64_gsa_omp(&gsa_text, &mut omp_gsa, 0, None, 2), 0);
18932 assert_eq!(omp_gsa, direct_gsa);
18933 assert_eq!(
18934 libsais16x64_gsa_omp(&gsa_text, &mut omp_gsa, 0, None, -1),
18935 -1
18936 );
18937
18938 let int_text = [1, 2, 1, 0];
18939 let mut direct_int_text = int_text.to_vec();
18940 let mut omp_int_text = int_text.to_vec();
18941 let mut direct_int_sa = vec![0; int_text.len()];
18942 let mut omp_int_sa = vec![0; int_text.len()];
18943 assert_eq!(
18944 libsais16x64_long(&mut direct_int_text, &mut direct_int_sa, 3, 0),
18945 0
18946 );
18947 assert_eq!(
18948 libsais16x64_long_omp(&mut omp_int_text, &mut omp_int_sa, 3, 0, 2),
18949 0
18950 );
18951 assert_eq!(omp_int_text, direct_int_text);
18952 assert_eq!(omp_int_sa, direct_int_sa);
18953 assert_eq!(
18954 libsais16x64_long_omp(&mut omp_int_text, &mut omp_int_sa, 3, 0, -1),
18955 -1
18956 );
18957
18958 let mut direct_bwt = vec![0; text.len()];
18959 let mut direct_work = vec![0; text.len()];
18960 let mut omp_bwt = vec![0; text.len()];
18961 let mut omp_work = vec![0; text.len()];
18962 assert_eq!(
18963 libsais16x64_bwt(&text, &mut direct_bwt, &mut direct_work, 0, None),
18964 libsais16x64_bwt_omp(&text, &mut omp_bwt, &mut omp_work, 0, None, 2)
18965 );
18966 assert_eq!(omp_bwt, direct_bwt);
18967 assert_eq!(
18968 libsais16x64_bwt_omp(&text, &mut omp_bwt, &mut omp_work, 0, None, -1),
18969 -1
18970 );
18971
18972 let mut direct_aux = vec![0; 2];
18973 let mut omp_aux = vec![0; 2];
18974 assert_eq!(
18975 libsais16x64_bwt_aux(
18976 &text,
18977 &mut direct_bwt,
18978 &mut direct_work,
18979 0,
18980 None,
18981 4,
18982 &mut direct_aux
18983 ),
18984 libsais16x64_bwt_aux_omp(
18985 &text,
18986 &mut omp_bwt,
18987 &mut omp_work,
18988 0,
18989 None,
18990 4,
18991 &mut omp_aux,
18992 2
18993 )
18994 );
18995 assert_eq!(omp_bwt, direct_bwt);
18996 assert_eq!(omp_aux, direct_aux);
18997 assert_eq!(
18998 libsais16x64_bwt_aux_omp(
18999 &text,
19000 &mut omp_bwt,
19001 &mut omp_work,
19002 0,
19003 None,
19004 4,
19005 &mut omp_aux,
19006 -1
19007 ),
19008 -1
19009 );
19010 }
19011
19012 #[test]
19013 fn libsais16x64_omp_frequency_wrappers_match_direct_calls() {
19014 let text = [2, 1, 3, 1, 2, 0];
19015 let gsa_text = [2, 1, 0, 3, 1, 0];
19016 let mut direct_sa = vec![0; text.len()];
19017 let mut omp_sa = vec![0; text.len()];
19018 let mut direct_freq = vec![-1; ALPHABET_SIZE];
19019 let mut omp_freq = vec![-1; ALPHABET_SIZE];
19020 assert_eq!(
19021 libsais16x64(&text, &mut direct_sa, 0, Some(&mut direct_freq)),
19022 0
19023 );
19024 assert_eq!(
19025 libsais16x64_omp(&text, &mut omp_sa, 0, Some(&mut omp_freq), 2),
19026 0
19027 );
19028 assert_eq!(omp_sa, direct_sa);
19029 assert_eq!(omp_freq, direct_freq);
19030
19031 let mut direct_gsa = vec![0; gsa_text.len()];
19032 let mut omp_gsa = vec![0; gsa_text.len()];
19033 direct_freq.fill(-1);
19034 omp_freq.fill(-1);
19035 assert_eq!(
19036 libsais16x64_gsa(&gsa_text, &mut direct_gsa, 0, Some(&mut direct_freq)),
19037 0
19038 );
19039 assert_eq!(
19040 libsais16x64_gsa_omp(&gsa_text, &mut omp_gsa, 0, Some(&mut omp_freq), 2),
19041 0
19042 );
19043 assert_eq!(omp_gsa, direct_gsa);
19044 assert_eq!(omp_freq, direct_freq);
19045
19046 let mut direct_bwt = vec![0; text.len()];
19047 let mut direct_work = vec![0; text.len()];
19048 let mut omp_bwt = vec![0; text.len()];
19049 let mut omp_work = vec![0; text.len()];
19050 direct_freq.fill(-1);
19051 omp_freq.fill(-1);
19052 assert_eq!(
19053 libsais16x64_bwt(
19054 &text,
19055 &mut direct_bwt,
19056 &mut direct_work,
19057 0,
19058 Some(&mut direct_freq)
19059 ),
19060 libsais16x64_bwt_omp(
19061 &text,
19062 &mut omp_bwt,
19063 &mut omp_work,
19064 0,
19065 Some(&mut omp_freq),
19066 2
19067 )
19068 );
19069 assert_eq!(omp_bwt, direct_bwt);
19070 assert_eq!(omp_freq, direct_freq);
19071
19072 let mut direct_aux = vec![0; 2];
19073 let mut omp_aux = vec![0; 2];
19074 direct_freq.fill(-1);
19075 omp_freq.fill(-1);
19076 assert_eq!(
19077 libsais16x64_bwt_aux(
19078 &text,
19079 &mut direct_bwt,
19080 &mut direct_work,
19081 0,
19082 Some(&mut direct_freq),
19083 4,
19084 &mut direct_aux
19085 ),
19086 libsais16x64_bwt_aux_omp(
19087 &text,
19088 &mut omp_bwt,
19089 &mut omp_work,
19090 0,
19091 Some(&mut omp_freq),
19092 4,
19093 &mut omp_aux,
19094 2
19095 )
19096 );
19097 assert_eq!(omp_bwt, direct_bwt);
19098 assert_eq!(omp_aux, direct_aux);
19099 assert_eq!(omp_freq, direct_freq);
19100 }
19101
19102 #[test]
19103 fn libsais16x64_unbwt_omp_frequency_wrappers_match_direct_calls() {
19104 let text = [2, 1, 3, 1, 2, 0];
19105 let mut freq = vec![0; ALPHABET_SIZE];
19106 let mut bwt = vec![0; text.len()];
19107 let mut work = vec![0; text.len()];
19108 let primary = libsais16x64_bwt(&text, &mut bwt, &mut work, 0, Some(&mut freq));
19109 assert!(primary >= 0);
19110
19111 let mut direct = vec![0; text.len()];
19112 let mut direct_work = vec![0; text.len() + 1];
19113 let mut omp = vec![0; text.len()];
19114 let mut omp_work = vec![0; text.len() + 1];
19115 assert_eq!(
19116 libsais16x64_unbwt(&bwt, &mut direct, &mut direct_work, Some(&freq), primary),
19117 libsais16x64_unbwt_omp(&bwt, &mut omp, &mut omp_work, Some(&freq), primary, 2)
19118 );
19119 assert_eq!(omp, direct);
19120 assert_eq!(omp, text);
19121
19122 let mut aux = vec![0; (text.len() - 1) / 4 + 1];
19123 assert_eq!(
19124 libsais16x64_bwt_aux(&text, &mut bwt, &mut work, 0, Some(&mut freq), 4, &mut aux),
19125 0
19126 );
19127 direct.fill(0);
19128 direct_work.fill(0);
19129 omp.fill(0);
19130 omp_work.fill(0);
19131 assert_eq!(
19132 libsais16x64_unbwt_aux(&bwt, &mut direct, &mut direct_work, Some(&freq), 4, &aux),
19133 libsais16x64_unbwt_aux_omp(&bwt, &mut omp, &mut omp_work, Some(&freq), 4, &aux, 2)
19134 );
19135 assert_eq!(omp, direct);
19136 assert_eq!(omp, text);
19137 }
19138
19139 #[test]
19140 fn libsais16x64_lcp_and_unbwt_omp_wrappers_match_direct_calls() {
19141 let text = [2, 1, 3, 1, 2, 0];
19142 let mut sa = vec![0; text.len()];
19143 assert_eq!(libsais16x64(&text, &mut sa, 0, None), 0);
19144
19145 let mut direct_plcp = vec![0; text.len()];
19146 let mut omp_plcp = vec![0; text.len()];
19147 assert_eq!(libsais16x64_plcp(&text, &sa, &mut direct_plcp), 0);
19148 assert_eq!(libsais16x64_plcp_omp(&text, &sa, &mut omp_plcp, 2), 0);
19149 assert_eq!(omp_plcp, direct_plcp);
19150 assert_eq!(libsais16x64_plcp_omp(&text, &sa, &mut omp_plcp, -1), -1);
19151
19152 let gsa_text = [2, 1, 0, 1, 2, 0];
19153 let mut gsa = vec![0; gsa_text.len()];
19154 assert_eq!(libsais16x64_gsa(&gsa_text, &mut gsa, 0, None), 0);
19155 let mut direct_gsa_plcp = vec![0; gsa_text.len()];
19156 let mut omp_gsa_plcp = vec![0; gsa_text.len()];
19157 assert_eq!(
19158 libsais16x64_plcp_gsa(&gsa_text, &gsa, &mut direct_gsa_plcp),
19159 0
19160 );
19161 assert_eq!(
19162 libsais16x64_plcp_gsa_omp(&gsa_text, &gsa, &mut omp_gsa_plcp, 2),
19163 0
19164 );
19165 assert_eq!(omp_gsa_plcp, direct_gsa_plcp);
19166 assert_eq!(
19167 libsais16x64_plcp_gsa_omp(&gsa_text, &gsa, &mut omp_gsa_plcp, -1),
19168 -1
19169 );
19170
19171 let mut direct_lcp = vec![0; text.len()];
19172 let mut omp_lcp = vec![0; text.len()];
19173 assert_eq!(libsais16x64_lcp(&direct_plcp, &sa, &mut direct_lcp), 0);
19174 assert_eq!(libsais16x64_lcp_omp(&direct_plcp, &sa, &mut omp_lcp, 2), 0);
19175 assert_eq!(omp_lcp, direct_lcp);
19176 assert_eq!(
19177 libsais16x64_lcp_omp(&direct_plcp, &sa, &mut omp_lcp, -1),
19178 -1
19179 );
19180
19181 let mut bwt = vec![0; text.len()];
19182 let mut work = vec![0; text.len()];
19183 let primary = libsais16x64_bwt(&text, &mut bwt, &mut work, 0, None);
19184 let mut direct = vec![0; text.len()];
19185 let mut omp = vec![0; text.len()];
19186 let mut direct_work = vec![0; text.len()];
19187 let mut omp_work = vec![0; text.len()];
19188 assert_eq!(
19189 libsais16x64_unbwt(&bwt, &mut direct, &mut direct_work, None, primary),
19190 0
19191 );
19192 assert_eq!(
19193 libsais16x64_unbwt_omp(&bwt, &mut omp, &mut omp_work, None, primary, 2),
19194 0
19195 );
19196 assert_eq!(omp, direct);
19197 assert_eq!(
19198 libsais16x64_unbwt_omp(&bwt, &mut omp, &mut omp_work, None, primary, -1),
19199 -1
19200 );
19201 }
19202}