1use std::marker::PhantomData;
8use std::mem;
9#[cfg(feature = "upstream-c")]
10use std::mem::MaybeUninit;
11
12use crate::run_rayon_with_threads;
13use rayon::prelude::*;
14
15pub type SaSint = i64;
16pub type SaUint = u64;
17pub type FastSint = isize;
18pub type FastUint = usize;
19
20pub const SAINT_BIT: u32 = 64;
21pub const SAINT_MAX: SaSint = i64::MAX;
22pub const SAINT_MIN: SaSint = i64::MIN;
23
24pub const ALPHABET_SIZE: usize = 1usize << 8;
25pub const UNBWT_FASTBITS: usize = 17;
26
27pub const SUFFIX_GROUP_BIT: u32 = SAINT_BIT - 1;
28pub const SUFFIX_GROUP_MARKER: SaSint = 1_i64 << (SUFFIX_GROUP_BIT - 1);
29
30pub const LIBSAIS_LOCAL_BUFFER_SIZE: usize = 1000;
31pub const LIBSAIS_PER_THREAD_CACHE_SIZE: usize = 24_576;
32
33pub const LIBSAIS_FLAGS_NONE: SaSint = 0;
34pub const LIBSAIS_FLAGS_BWT: SaSint = 1;
35pub const LIBSAIS_FLAGS_GSA: SaSint = 2;
36
37#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
38pub struct ThreadCache {
39 pub symbol: SaSint,
40 pub index: SaSint,
41}
42
43#[derive(Clone, Debug, PartialEq, Eq)]
44pub struct ThreadState {
45 pub position: FastSint,
46 pub count: FastSint,
47 pub m: FastSint,
48 pub last_lms_suffix: FastSint,
49 pub buckets: Vec<SaSint>,
50 pub cache: Vec<ThreadCache>,
51}
52
53impl ThreadState {
54 fn new() -> Self {
55 Self {
56 position: 0,
57 count: 0,
58 m: 0,
59 last_lms_suffix: 0,
60 buckets: vec![0; 4 * ALPHABET_SIZE],
61 cache: vec![ThreadCache::default(); LIBSAIS_PER_THREAD_CACHE_SIZE],
62 }
63 }
64}
65
66#[derive(Clone, Debug, PartialEq, Eq)]
67pub struct Context {
68 pub buckets: Vec<SaSint>,
69 pub thread_state: Option<Vec<ThreadState>>,
70 pub threads: FastSint,
71}
72
73#[derive(Clone, Debug, PartialEq, Eq)]
74pub struct UnbwtContext {
75 pub bucket2: Vec<SaUint>,
76 pub fastbits: Vec<u16>,
77 pub buckets: Option<Vec<SaUint>>,
78 pub threads: FastSint,
79}
80
81#[doc(hidden)]
83pub fn buckets_index2(c: FastUint, s: FastUint) -> FastUint {
84 (c << 1) + s
85}
86
87#[doc(hidden)]
89pub fn buckets_index4(c: FastUint, s: FastUint) -> FastUint {
90 (c << 2) + s
91}
92
93#[doc(hidden)]
95pub fn align_up(value: usize, alignment: usize) -> usize {
96 debug_assert!(alignment.is_power_of_two());
97 (value + alignment - 1) & !(alignment - 1)
98}
99
100#[doc(hidden)]
102pub fn alloc_thread_state(threads: SaSint) -> Option<Vec<ThreadState>> {
103 if threads <= 0 {
104 return None;
105 }
106
107 let len = usize::try_from(threads).ok()?;
108 Some((0..len).map(|_| ThreadState::new()).collect())
109}
110
111#[doc(hidden)]
113pub fn create_ctx_main(threads: SaSint) -> Option<Context> {
114 if threads <= 0 {
115 return None;
116 }
117
118 let thread_state = if threads > 1 {
119 Some(alloc_thread_state(threads)?)
120 } else {
121 None
122 };
123
124 Some(Context {
125 buckets: vec![0; 8 * ALPHABET_SIZE],
126 thread_state,
127 threads: threads as FastSint,
128 })
129}
130
131pub fn create_ctx() -> Option<Context> {
137 create_ctx_main(1)
138}
139
140pub fn free_ctx(_ctx: Context) {}
142
143#[doc(hidden)]
145pub fn unbwt_create_ctx_main(threads: SaSint) -> Option<UnbwtContext> {
146 if threads <= 0 {
147 return None;
148 }
149
150 let buckets = if threads > 1 {
151 let len = usize::try_from(threads).ok()? * (ALPHABET_SIZE + ALPHABET_SIZE * ALPHABET_SIZE);
152 Some(vec![0; len])
153 } else {
154 None
155 };
156
157 Some(UnbwtContext {
158 bucket2: vec![0; ALPHABET_SIZE * ALPHABET_SIZE],
159 fastbits: vec![0; 1 + (1 << UNBWT_FASTBITS)],
160 buckets,
161 threads: threads as FastSint,
162 })
163}
164
165#[doc(hidden)]
167pub fn unbwt_free_ctx_main(_ctx: UnbwtContext) {}
168
169pub fn unbwt_create_ctx() -> Option<UnbwtContext> {
175 unbwt_create_ctx_main(1)
176}
177
178pub fn unbwt_free_ctx(_ctx: UnbwtContext) {}
180
181#[doc(hidden)]
183pub fn count_negative_marked_suffixes(
184 sa: &[SaSint],
185 block_start: FastSint,
186 block_size: FastSint,
187) -> SaSint {
188 block_slice(sa, block_start, block_size)
189 .iter()
190 .map(|&value| SaSint::from(value < 0))
191 .sum()
192}
193
194#[doc(hidden)]
196pub fn count_zero_marked_suffixes(
197 sa: &[SaSint],
198 block_start: FastSint,
199 block_size: FastSint,
200) -> SaSint {
201 block_slice(sa, block_start, block_size)
202 .iter()
203 .map(|&value| SaSint::from(value == 0))
204 .sum()
205}
206
207#[doc(hidden)]
209pub fn place_cached_suffixes(
210 sa: &mut [SaSint],
211 cache: &[ThreadCache],
212 block_start: FastSint,
213 block_size: FastSint,
214) {
215 let start = usize::try_from(block_start).expect("block_start must be non-negative");
216 let len = usize::try_from(block_size).expect("block_size must be non-negative");
217 let entries = if cache.len() >= start + len {
218 &cache[start..start + len]
219 } else {
220 &cache[..len]
221 };
222
223 for entry in entries {
224 let slot = usize::try_from(entry.symbol).expect("cache symbol must be non-negative");
225 sa[slot] = entry.index;
226 }
227}
228
229#[doc(hidden)]
231pub fn compact_and_place_cached_suffixes(
232 sa: &mut [SaSint],
233 cache: &mut [ThreadCache],
234 block_start: FastSint,
235 block_size: FastSint,
236) {
237 let start = usize::try_from(block_start).expect("block_start must be non-negative");
238 let len = usize::try_from(block_size).expect("block_size must be non-negative");
239 let read_start = if cache.len() >= start + len { start } else { 0 };
240 let read_end = read_start + len;
241
242 let mut write = read_start;
243 for read in read_start..read_end {
244 let entry = cache[read];
245 if entry.symbol >= 0 {
246 cache[write] = entry;
247 write += 1;
248 }
249 }
250
251 place_cached_suffixes(sa, cache, block_start, (write - read_start) as FastSint);
252}
253
254#[doc(hidden)]
256pub fn flip_suffix_markers_omp(sa: &mut [SaSint], l: SaSint, threads: SaSint) {
257 let len = usize::try_from(l).expect("l must be non-negative");
258 let omp_num_threads = if threads > 1 && l >= 65_536 {
259 usize::try_from(threads).expect("threads must be non-negative")
260 } else {
261 1
262 };
263 if omp_num_threads > 1 {
264 let chunk_size = ((len / omp_num_threads) & !15usize).max(16);
265 run_rayon_with_threads(omp_num_threads, || {
266 sa[..len].par_chunks_mut(chunk_size).for_each(|chunk| {
267 for value in chunk {
268 *value ^= SAINT_MIN;
269 }
270 });
271 });
272 return;
273 }
274
275 let omp_block_stride = (len / omp_num_threads) & !15usize;
276 for omp_thread_num in 0..omp_num_threads {
277 let omp_block_start = omp_thread_num * omp_block_stride;
278 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
279 omp_block_stride
280 } else {
281 len - omp_block_start
282 };
283 for value in &mut sa[omp_block_start..omp_block_start + omp_block_size] {
284 *value ^= SAINT_MIN;
285 }
286 }
287}
288
289#[doc(hidden)]
291pub fn gather_lms_suffixes_8u(
292 t: &[u8],
293 sa: &mut [SaSint],
294 n: SaSint,
295 mut m: FastSint,
296 omp_block_start: FastSint,
297 omp_block_size: FastSint,
298) {
299 if omp_block_size <= 0 {
300 return;
301 }
302
303 let n = usize::try_from(n).expect("n must be non-negative");
304 let block_start =
305 usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
306 let block_size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
307
308 let mut j = block_start + block_size;
309 let mut c0 = t[block_start + block_size - 1] as FastSint;
310 let mut c1 = -1;
311 while j < n {
312 c1 = t[j] as FastSint;
313 if c1 != c0 {
314 break;
315 }
316 j += 1;
317 }
318
319 let mut f0 = usize::from(c0 >= c1);
320 let mut f1: usize;
321 let mut i = block_start + block_size - 2;
322 let limit = block_start + 3;
323
324 while i >= limit {
325 c1 = t[i] as FastSint;
326 f1 = usize::from(c1 > (c0 - f0 as FastSint));
327 sa[usize::try_from(m).expect("m must be non-negative")] = (i + 1) as SaSint;
328 m -= (f1 & !f0) as FastSint;
329
330 c0 = t[i - 1] as FastSint;
331 f0 = usize::from(c0 > (c1 - f1 as FastSint));
332 sa[usize::try_from(m).expect("m must be non-negative")] = i as SaSint;
333 m -= (f0 & !f1) as FastSint;
334
335 c1 = t[i - 2] as FastSint;
336 f1 = usize::from(c1 > (c0 - f0 as FastSint));
337 sa[usize::try_from(m).expect("m must be non-negative")] = (i - 1) as SaSint;
338 m -= (f1 & !f0) as FastSint;
339
340 c0 = t[i - 3] as FastSint;
341 f0 = usize::from(c0 > (c1 - f1 as FastSint));
342 sa[usize::try_from(m).expect("m must be non-negative")] = (i - 2) as SaSint;
343 m -= (f0 & !f1) as FastSint;
344
345 if i < 4 {
346 break;
347 }
348 i -= 4;
349 }
350
351 let tail_limit = limit - 3;
352 while i >= tail_limit {
353 c1 = c0;
354 c0 = t[i] as FastSint;
355 f1 = f0;
356 f0 = usize::from(c0 > (c1 - f1 as FastSint));
357 sa[usize::try_from(m).expect("m must be non-negative")] = (i + 1) as SaSint;
358 m -= (f0 & !f1) as FastSint;
359 if i == 0 {
360 break;
361 }
362 i -= 1;
363 }
364
365 sa[usize::try_from(m).expect("m must be non-negative")] = (i + 1) as SaSint;
366}
367
368#[doc(hidden)]
370pub fn gather_lms_suffixes_8u_omp(
371 t: &[u8],
372 sa: &mut [SaSint],
373 n: SaSint,
374 threads: SaSint,
375 thread_state: &mut [ThreadState],
376) {
377 let n_usize = usize::try_from(n).expect("n must be non-negative");
378 let omp_num_threads = if threads > 1 && n >= 65_536 {
379 usize::try_from(threads)
380 .expect("threads must be non-negative")
381 .min(thread_state.len())
382 .max(1)
383 } else {
384 1
385 };
386 if omp_num_threads == 1 {
387 gather_lms_suffixes_8u(t, sa, n, n as FastSint - 1, 0, n as FastSint);
388 return;
389 }
390
391 let omp_block_stride = (n_usize / omp_num_threads) & !15usize;
392 let mut suffix_counts_after = vec![0 as FastSint; omp_num_threads];
393 let mut m = 0 as FastSint;
394 for omp_thread_num in (0..omp_num_threads).rev() {
395 suffix_counts_after[omp_thread_num] = m;
396 m += thread_state[omp_thread_num].m;
397 }
398
399 for omp_thread_num in 0..omp_num_threads {
400 let omp_block_start = omp_thread_num * omp_block_stride;
401 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
402 omp_block_stride
403 } else {
404 n_usize - omp_block_start
405 };
406 gather_lms_suffixes_8u(
407 t,
408 sa,
409 n,
410 n as FastSint - 1 - suffix_counts_after[omp_thread_num],
411 omp_block_start as FastSint,
412 omp_block_size as FastSint,
413 );
414 }
415
416 for omp_thread_num in 0..omp_num_threads {
417 if thread_state[omp_thread_num].m > 0 {
418 let dst = usize::try_from(n as FastSint - 1 - suffix_counts_after[omp_thread_num])
419 .expect("destination must be non-negative");
420 sa[dst] = thread_state[omp_thread_num].last_lms_suffix as SaSint;
421 }
422 }
423}
424
425#[doc(hidden)]
427pub fn gather_lms_suffixes_32s(t: &[SaSint], sa: &mut [SaSint], n: SaSint) -> SaSint {
428 let n_usize = usize::try_from(n).expect("n must be non-negative");
429 let mut i = n as FastSint - 2;
430 let mut m = n_usize - 1;
431 let mut f0 = 1usize;
432 let mut f1: usize;
433 let mut c0 = t[n_usize - 1] as FastSint;
434 let mut c1: FastSint;
435
436 while i >= 3 {
437 c1 = t[i as usize] as FastSint;
438 f1 = usize::from(c1 > (c0 - f0 as FastSint));
439 sa[m] = (i + 1) as SaSint;
440 m -= f1 & !f0;
441
442 c0 = t[(i - 1) as usize] as FastSint;
443 f0 = usize::from(c0 > (c1 - f1 as FastSint));
444 sa[m] = i as SaSint;
445 m -= f0 & !f1;
446
447 c1 = t[(i - 2) as usize] as FastSint;
448 f1 = usize::from(c1 > (c0 - f0 as FastSint));
449 sa[m] = (i - 1) as SaSint;
450 m -= f1 & !f0;
451
452 c0 = t[(i - 3) as usize] as FastSint;
453 f0 = usize::from(c0 > (c1 - f1 as FastSint));
454 sa[m] = (i - 2) as SaSint;
455 m -= f0 & !f1;
456
457 i -= 4;
458 }
459
460 while i >= 0 {
461 c1 = c0;
462 c0 = t[i as usize] as FastSint;
463 f1 = f0;
464 f0 = usize::from(c0 > (c1 - f1 as FastSint));
465 sa[m] = (i + 1) as SaSint;
466 m -= f0 & !f1;
467 i -= 1;
468 }
469
470 (n_usize - 1 - m) as SaSint
471}
472
473#[doc(hidden)]
475pub fn gather_compacted_lms_suffixes_32s(t: &[SaSint], sa: &mut [SaSint], n: SaSint) -> SaSint {
476 let n_usize = usize::try_from(n).expect("n must be non-negative");
477 let mut i = n as FastSint - 2;
478 let mut m = n_usize - 1;
479 let mut f0 = 1usize;
480 let mut f1: usize;
481 let mut c0 = t[n_usize - 1] as FastSint;
482 let mut c1: FastSint;
483
484 while i >= 3 {
485 c1 = t[i as usize] as FastSint;
486 f1 = usize::from(c1 > (c0 - f0 as FastSint));
487 sa[m] = (i + 1) as SaSint;
488 m -= f1 & !f0 & usize::from(c0 >= 0);
489
490 c0 = t[(i - 1) as usize] as FastSint;
491 f0 = usize::from(c0 > (c1 - f1 as FastSint));
492 sa[m] = i as SaSint;
493 m -= f0 & !f1 & usize::from(c1 >= 0);
494
495 c1 = t[(i - 2) as usize] as FastSint;
496 f1 = usize::from(c1 > (c0 - f0 as FastSint));
497 sa[m] = (i - 1) as SaSint;
498 m -= f1 & !f0 & usize::from(c0 >= 0);
499
500 c0 = t[(i - 3) as usize] as FastSint;
501 f0 = usize::from(c0 > (c1 - f1 as FastSint));
502 sa[m] = (i - 2) as SaSint;
503 m -= f0 & !f1 & usize::from(c1 >= 0);
504
505 i -= 4;
506 }
507
508 while i >= 0 {
509 c1 = c0;
510 c0 = t[i as usize] as FastSint;
511 f1 = f0;
512 f0 = usize::from(c0 > (c1 - f1 as FastSint));
513 sa[m] = (i + 1) as SaSint;
514 m -= f0 & !f1 & usize::from(c1 >= 0);
515 i -= 1;
516 }
517
518 (n_usize - 1 - m) as SaSint
519}
520
521#[doc(hidden)]
523pub fn count_lms_suffixes_32s_4k(t: &[SaSint], n: SaSint, k: SaSint, buckets: &mut [SaSint]) {
524 buckets.fill(0);
525 let n_usize = usize::try_from(n).expect("n must be non-negative");
526 let _k_usize = usize::try_from(k).expect("k must be non-negative");
527 let mut i = n as FastSint - 2;
528 let mut f0 = 1usize;
529 let mut f1: usize;
530 let mut c0 = t[n_usize - 1] as FastSint;
531 let mut c1: FastSint;
532
533 while i >= 3 {
534 c1 = t[i as usize] as FastSint;
535 f1 = usize::from(c1 > (c0 - f0 as FastSint));
536 buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
537
538 c0 = t[(i - 1) as usize] as FastSint;
539 f0 = usize::from(c0 > (c1 - f1 as FastSint));
540 buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
541
542 c1 = t[(i - 2) as usize] as FastSint;
543 f1 = usize::from(c1 > (c0 - f0 as FastSint));
544 buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
545
546 c0 = t[(i - 3) as usize] as FastSint;
547 f0 = usize::from(c0 > (c1 - f1 as FastSint));
548 buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
549
550 i -= 4;
551 }
552
553 while i >= 0 {
554 c1 = c0;
555 c0 = t[i as usize] as FastSint;
556 f1 = f0;
557 f0 = usize::from(c0 > (c1 - f1 as FastSint));
558 buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
559 i -= 1;
560 }
561
562 buckets[buckets_index4(c0 as usize, f0 + f0)] += 1;
563}
564
565#[doc(hidden)]
567pub fn count_lms_suffixes_32s_2k(t: &[SaSint], n: SaSint, k: SaSint, buckets: &mut [SaSint]) {
568 buckets.fill(0);
569 let n_usize = usize::try_from(n).expect("n must be non-negative");
570 let _k_usize = usize::try_from(k).expect("k must be non-negative");
571 let mut i = n as FastSint - 2;
572 let mut f0 = 1usize;
573 let mut f1: usize;
574 let mut c0 = t[n_usize - 1] as FastSint;
575 let mut c1: FastSint;
576
577 while i >= 3 {
578 c1 = t[i as usize] as FastSint;
579 f1 = usize::from(c1 > (c0 - f0 as FastSint));
580 buckets[buckets_index2(c0 as usize, f1 & !f0)] += 1;
581
582 c0 = t[(i - 1) as usize] as FastSint;
583 f0 = usize::from(c0 > (c1 - f1 as FastSint));
584 buckets[buckets_index2(c1 as usize, f0 & !f1)] += 1;
585
586 c1 = t[(i - 2) as usize] as FastSint;
587 f1 = usize::from(c1 > (c0 - f0 as FastSint));
588 buckets[buckets_index2(c0 as usize, f1 & !f0)] += 1;
589
590 c0 = t[(i - 3) as usize] as FastSint;
591 f0 = usize::from(c0 > (c1 - f1 as FastSint));
592 buckets[buckets_index2(c1 as usize, f0 & !f1)] += 1;
593
594 i -= 4;
595 }
596
597 while i >= 0 {
598 c1 = c0;
599 c0 = t[i as usize] as FastSint;
600 f1 = f0;
601 f0 = usize::from(c0 > (c1 - f1 as FastSint));
602 buckets[buckets_index2(c1 as usize, f0 & !f1)] += 1;
603 i -= 1;
604 }
605
606 buckets[buckets_index2(c0 as usize, 0)] += 1;
607}
608
609#[doc(hidden)]
611pub fn count_compacted_lms_suffixes_32s_2k(
612 t: &[SaSint],
613 n: SaSint,
614 k: SaSint,
615 buckets: &mut [SaSint],
616) {
617 buckets.fill(0);
618 let n_usize = usize::try_from(n).expect("n must be non-negative");
619 let _k_usize = usize::try_from(k).expect("k must be non-negative");
620 let mut i = n as FastSint - 2;
621 let mut f0 = 1usize;
622 let mut f1: usize;
623 let mut c0 = t[n_usize - 1] as FastSint;
624 let mut c1: FastSint;
625
626 while i >= 3 {
627 c1 = t[i as usize] as FastSint;
628 f1 = usize::from(c1 > (c0 - f0 as FastSint));
629 buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
630
631 c0 = t[(i - 1) as usize] as FastSint;
632 f0 = usize::from(c0 > (c1 - f1 as FastSint));
633 buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
634
635 c1 = t[(i - 2) as usize] as FastSint;
636 f1 = usize::from(c1 > (c0 - f0 as FastSint));
637 buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
638
639 c0 = t[(i - 3) as usize] as FastSint;
640 f0 = usize::from(c0 > (c1 - f1 as FastSint));
641 buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
642
643 i -= 4;
644 }
645
646 while i >= 0 {
647 c1 = c0;
648 c0 = t[i as usize] as FastSint;
649 f1 = f0;
650 f0 = usize::from(c0 > (c1 - f1 as FastSint));
651 buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
652 i -= 1;
653 }
654
655 buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, 0)] += 1;
656}
657
658#[doc(hidden)]
660pub fn count_and_gather_lms_suffixes_8u(
661 t: &[u8],
662 sa: &mut [SaSint],
663 n: SaSint,
664 buckets: &mut [SaSint],
665 omp_block_start: FastSint,
666 omp_block_size: FastSint,
667) -> SaSint {
668 buckets.fill(0);
669 let n = n as FastSint;
670 let mut m = omp_block_start + omp_block_size - 1;
671
672 if omp_block_size > 0 {
673 let prefetch_distance = 256 as FastSint;
674 let mut j = m + 1;
675 let mut c0 = t[m as usize] as FastSint;
676 let mut c1 = -1;
677 while j < n {
678 c1 = t[j as usize] as FastSint;
679 if c1 != c0 {
680 break;
681 }
682 j += 1;
683 }
684
685 let mut f0 = usize::from(c0 >= c1);
686 let mut f1: usize;
687 let mut i = m - 1;
688 let limit = omp_block_start + 3;
689
690 while i >= limit {
691 let _prefetch_index = i - prefetch_distance;
692 c1 = t[i as usize] as FastSint;
693 f1 = usize::from(c1 > (c0 - f0 as FastSint));
694 sa[m as usize] = (i + 1) as SaSint;
695 m -= (f1 & !f0) as FastSint;
696 buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
697
698 c0 = t[(i - 1) as usize] as FastSint;
699 f0 = usize::from(c0 > (c1 - f1 as FastSint));
700 sa[m as usize] = i as SaSint;
701 m -= (f0 & !f1) as FastSint;
702 buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
703
704 c1 = t[(i - 2) as usize] as FastSint;
705 f1 = usize::from(c1 > (c0 - f0 as FastSint));
706 sa[m as usize] = (i - 1) as SaSint;
707 m -= (f1 & !f0) as FastSint;
708 buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
709
710 c0 = t[(i - 3) as usize] as FastSint;
711 f0 = usize::from(c0 > (c1 - f1 as FastSint));
712 sa[m as usize] = (i - 2) as SaSint;
713 m -= (f0 & !f1) as FastSint;
714 buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
715
716 i -= 4;
717 }
718
719 let tail_limit = limit - 3;
720 while i >= tail_limit {
721 c1 = c0;
722 c0 = t[i as usize] as FastSint;
723 f1 = f0;
724 f0 = usize::from(c0 > (c1 - f1 as FastSint));
725 sa[m as usize] = (i + 1) as SaSint;
726 m -= (f0 & !f1) as FastSint;
727 buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
728 i -= 1;
729 }
730
731 c1 = if i >= 0 {
732 t[i as usize] as FastSint
733 } else {
734 -1
735 };
736 f1 = usize::from(c1 > (c0 - f0 as FastSint));
737 sa[m as usize] = (i + 1) as SaSint;
738 m -= (f1 & !f0) as FastSint;
739 buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
740 }
741
742 (omp_block_start + omp_block_size - 1 - m) as SaSint
743}
744
745#[doc(hidden)]
747pub fn count_and_gather_lms_suffixes_8u_omp(
748 t: &[u8],
749 sa: &mut [SaSint],
750 n: SaSint,
751 buckets: &mut [SaSint],
752 threads: SaSint,
753 thread_state: &mut [ThreadState],
754) -> SaSint {
755 let mut m = 0;
756 let n_usize = usize::try_from(n).expect("n must be non-negative");
757 let omp_num_threads = if threads > 1 && n >= 65_536 {
758 usize::try_from(threads)
759 .expect("threads must be non-negative")
760 .min(thread_state.len())
761 .max(1)
762 } else {
763 1
764 };
765 let omp_block_stride = (n_usize / omp_num_threads) & !15usize;
766
767 if omp_num_threads == 1 {
768 return count_and_gather_lms_suffixes_8u(t, sa, n, buckets, 0, n as FastSint);
769 }
770
771 for omp_thread_num in 0..omp_num_threads {
772 let omp_block_start = omp_thread_num * omp_block_stride;
773 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
774 omp_block_stride
775 } else {
776 n_usize - omp_block_start
777 };
778
779 let state = &mut thread_state[omp_thread_num];
780 state.position = FastSint::try_from(omp_block_start + omp_block_size)
781 .expect("position must fit FastSint");
782 state.m = FastSint::try_from(count_and_gather_lms_suffixes_8u(
783 t,
784 sa,
785 n,
786 &mut state.buckets,
787 FastSint::try_from(omp_block_start).expect("block start must fit FastSint"),
788 FastSint::try_from(omp_block_size).expect("block size must fit FastSint"),
789 ))
790 .expect("m must fit FastSint");
791
792 if state.m > 0 {
793 let position = usize::try_from(state.position).expect("position must be non-negative");
794 state.last_lms_suffix =
795 FastSint::try_from(sa[position - 1]).expect("suffix must fit FastSint");
796 }
797 }
798
799 buckets.fill(0);
800
801 for tnum in (0..omp_num_threads).rev() {
802 let state = &mut thread_state[tnum];
803 m += SaSint::try_from(state.m).expect("m must fit SaSint");
804
805 if tnum + 1 < omp_num_threads && state.m > 0 {
806 let position = usize::try_from(state.position).expect("position must be non-negative");
807 let count = usize::try_from(state.m).expect("m must be non-negative");
808 let dst = n_usize - usize::try_from(m).expect("m must be non-negative");
809 sa.copy_within(position - count..position, dst);
810 }
811
812 for s in 0..4 * ALPHABET_SIZE {
813 let a = buckets[s];
814 let b = state.buckets[s];
815 buckets[s] = a + b;
816 state.buckets[s] = a;
817 }
818 }
819
820 m
821}
822
823#[doc(hidden)]
825pub fn count_and_gather_lms_suffixes_32s_4k(
826 t: &[SaSint],
827 sa: &mut [SaSint],
828 n: SaSint,
829 k: SaSint,
830 buckets: &mut [SaSint],
831 omp_block_start: FastSint,
832 omp_block_size: FastSint,
833) -> SaSint {
834 buckets.fill(0);
835 let n = n as FastSint;
836 let _k = k as FastSint;
837 let mut m = omp_block_start + omp_block_size - 1;
838
839 if omp_block_size > 0 {
840 let prefetch_distance = 64 as FastSint;
841 let mut j = m + 1;
842 let mut c0 = t[m as usize] as FastSint;
843 let mut c1 = -1;
844
845 while j < n {
846 c1 = t[j as usize] as FastSint;
847 if c1 != c0 {
848 break;
849 }
850 j += 1;
851 }
852
853 let mut f0 = usize::from(c0 >= c1);
854 let mut f1: usize;
855 let mut i = m - 1;
856 let limit = omp_block_start + prefetch_distance + 3;
857
858 while i >= limit {
859 let _prefetch_index = i - 2 * prefetch_distance;
860 c1 = t[i as usize] as FastSint;
861 f1 = usize::from(c1 > (c0 - f0 as FastSint));
862 sa[m as usize] = (i + 1) as SaSint;
863 m -= (f1 & !f0) as FastSint;
864 buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
865
866 c0 = t[(i - 1) as usize] as FastSint;
867 f0 = usize::from(c0 > (c1 - f1 as FastSint));
868 sa[m as usize] = i as SaSint;
869 m -= (f0 & !f1) as FastSint;
870 buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
871
872 c1 = t[(i - 2) as usize] as FastSint;
873 f1 = usize::from(c1 > (c0 - f0 as FastSint));
874 sa[m as usize] = (i - 1) as SaSint;
875 m -= (f1 & !f0) as FastSint;
876 buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
877
878 c0 = t[(i - 3) as usize] as FastSint;
879 f0 = usize::from(c0 > (c1 - f1 as FastSint));
880 sa[m as usize] = (i - 2) as SaSint;
881 m -= (f0 & !f1) as FastSint;
882 buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
883
884 i -= 4;
885 }
886
887 let tail_limit = omp_block_start;
888 while i >= tail_limit {
889 c1 = c0;
890 c0 = t[i as usize] as FastSint;
891 f1 = f0;
892 f0 = usize::from(c0 > (c1 - f1 as FastSint));
893 sa[m as usize] = (i + 1) as SaSint;
894 m -= (f0 & !f1) as FastSint;
895 buckets[buckets_index4(c1 as usize, f1 + f1 + f0)] += 1;
896 i -= 1;
897 }
898
899 c1 = if i >= 0 {
900 t[i as usize] as FastSint
901 } else {
902 -1
903 };
904 f1 = usize::from(c1 > (c0 - f0 as FastSint));
905 sa[m as usize] = (i + 1) as SaSint;
906 m -= (f1 & !f0) as FastSint;
907 buckets[buckets_index4(c0 as usize, f0 + f0 + f1)] += 1;
908 }
909
910 (omp_block_start + omp_block_size - 1 - m) as SaSint
911}
912
913#[doc(hidden)]
915pub fn count_and_gather_lms_suffixes_32s_2k(
916 t: &[SaSint],
917 sa: &mut [SaSint],
918 n: SaSint,
919 k: SaSint,
920 buckets: &mut [SaSint],
921 omp_block_start: FastSint,
922 omp_block_size: FastSint,
923) -> SaSint {
924 buckets.fill(0);
925 let n = n as FastSint;
926 let _k = k as FastSint;
927 let mut m = omp_block_start + omp_block_size - 1;
928
929 if omp_block_size > 0 {
930 let prefetch_distance = 64 as FastSint;
931 let mut j = m + 1;
932 let mut c0 = t[m as usize] as FastSint;
933 let mut c1 = -1;
934
935 while j < n {
936 c1 = t[j as usize] as FastSint;
937 if c1 != c0 {
938 break;
939 }
940 j += 1;
941 }
942
943 let mut f0 = usize::from(c0 >= c1);
944 let mut f1: usize;
945 let mut i = m - 1;
946 let limit = omp_block_start + prefetch_distance + 3;
947
948 while i >= limit {
949 let _prefetch_index = i - 2 * prefetch_distance;
950 c1 = t[i as usize] as FastSint;
951 f1 = usize::from(c1 > (c0 - f0 as FastSint));
952 sa[m as usize] = (i + 1) as SaSint;
953 m -= (f1 & !f0) as FastSint;
954 buckets[buckets_index2(c0 as usize, f1 & !f0)] += 1;
955
956 c0 = t[(i - 1) as usize] as FastSint;
957 f0 = usize::from(c0 > (c1 - f1 as FastSint));
958 sa[m as usize] = i as SaSint;
959 m -= (f0 & !f1) as FastSint;
960 buckets[buckets_index2(c1 as usize, f0 & !f1)] += 1;
961
962 c1 = t[(i - 2) as usize] as FastSint;
963 f1 = usize::from(c1 > (c0 - f0 as FastSint));
964 sa[m as usize] = (i - 1) as SaSint;
965 m -= (f1 & !f0) as FastSint;
966 buckets[buckets_index2(c0 as usize, f1 & !f0)] += 1;
967
968 c0 = t[(i - 3) as usize] as FastSint;
969 f0 = usize::from(c0 > (c1 - f1 as FastSint));
970 sa[m as usize] = (i - 2) as SaSint;
971 m -= (f0 & !f1) as FastSint;
972 buckets[buckets_index2(c1 as usize, f0 & !f1)] += 1;
973
974 i -= 4;
975 }
976
977 let tail_limit = omp_block_start;
978 while i >= tail_limit {
979 c1 = c0;
980 c0 = t[i as usize] as FastSint;
981 f1 = f0;
982 f0 = usize::from(c0 > (c1 - f1 as FastSint));
983 sa[m as usize] = (i + 1) as SaSint;
984 m -= (f0 & !f1) as FastSint;
985 buckets[buckets_index2(c1 as usize, f0 & !f1)] += 1;
986 i -= 1;
987 }
988
989 c1 = if i >= 0 {
990 t[i as usize] as FastSint
991 } else {
992 -1
993 };
994 f1 = usize::from(c1 > (c0 - f0 as FastSint));
995 sa[m as usize] = (i + 1) as SaSint;
996 m -= (f1 & !f0) as FastSint;
997 buckets[buckets_index2(c0 as usize, f1 & !f0)] += 1;
998 }
999
1000 (omp_block_start + omp_block_size - 1 - m) as SaSint
1001}
1002
1003#[doc(hidden)]
1005pub fn count_and_gather_compacted_lms_suffixes_32s_2k(
1006 t: &[SaSint],
1007 sa: &mut [SaSint],
1008 n: SaSint,
1009 k: SaSint,
1010 buckets: &mut [SaSint],
1011 omp_block_start: FastSint,
1012 omp_block_size: FastSint,
1013) -> SaSint {
1014 buckets.fill(0);
1015 let n_usize = usize::try_from(n).expect("n must be non-negative");
1016 let _k_usize = usize::try_from(k).expect("k must be non-negative");
1017 let block_start =
1018 usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
1019 let block_size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
1020 let mut m = block_start + block_size - 1;
1021
1022 if omp_block_size > 0 {
1023 let mut j = m + 1;
1024 let mut c0 = t[m] as FastSint;
1025 let mut c1 = -1;
1026
1027 while j < n_usize {
1028 c1 = t[j] as FastSint;
1029 if c1 != c0 {
1030 break;
1031 }
1032 j += 1;
1033 }
1034
1035 let mut f0 = usize::from(c0 >= c1);
1036 let mut f1: usize;
1037 let mut i = m as FastSint - 1;
1038 let limit = block_start as FastSint + 3;
1039
1040 while i >= limit {
1041 c1 = t[i as usize] as FastSint;
1042 f1 = usize::from(c1 > (c0 - f0 as FastSint));
1043 sa[m] = (i + 1) as SaSint;
1044 m -= f1 & !f0 & usize::from(c0 >= 0);
1045 buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
1046
1047 c0 = t[(i - 1) as usize] as FastSint;
1048 f0 = usize::from(c0 > (c1 - f1 as FastSint));
1049 sa[m] = i as SaSint;
1050 m -= f0 & !f1 & usize::from(c1 >= 0);
1051 buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
1052
1053 c1 = t[(i - 2) as usize] as FastSint;
1054 f1 = usize::from(c1 > (c0 - f0 as FastSint));
1055 sa[m] = (i - 1) as SaSint;
1056 m -= f1 & !f0 & usize::from(c0 >= 0);
1057 buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
1058
1059 c0 = t[(i - 3) as usize] as FastSint;
1060 f0 = usize::from(c0 > (c1 - f1 as FastSint));
1061 sa[m] = (i - 2) as SaSint;
1062 m -= f0 & !f1 & usize::from(c1 >= 0);
1063 buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
1064
1065 i -= 4;
1066 }
1067
1068 let tail_limit = block_start as FastSint;
1069 while i >= tail_limit {
1070 c1 = c0;
1071 c0 = t[i as usize] as FastSint;
1072 f1 = f0;
1073 f0 = usize::from(c0 > (c1 - f1 as FastSint));
1074 sa[m] = (i + 1) as SaSint;
1075 m -= f0 & !f1 & usize::from(c1 >= 0);
1076 buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
1077 i -= 1;
1078 }
1079
1080 c1 = if i >= 0 {
1081 t[i as usize] as FastSint
1082 } else {
1083 -1
1084 };
1085 f1 = usize::from(c1 > (c0 - f0 as FastSint));
1086 sa[m] = (i + 1) as SaSint;
1087 m -= f1 & !f0 & usize::from(c0 >= 0);
1088 buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
1089 }
1090
1091 (block_start + block_size - 1 - m) as SaSint
1092}
1093
1094#[doc(hidden)]
1096pub fn get_bucket_stride(
1097 free_space: FastSint,
1098 bucket_size: FastSint,
1099 num_buckets: FastSint,
1100) -> FastSint {
1101 let bucket_size_1024 = (bucket_size + 1023) & (-1024);
1102 if free_space / (num_buckets - 1) >= bucket_size_1024 {
1103 return bucket_size_1024;
1104 }
1105 let bucket_size_16 = (bucket_size + 15) & (-16);
1106 if free_space / (num_buckets - 1) >= bucket_size_16 {
1107 return bucket_size_16;
1108 }
1109 bucket_size
1110}
1111
1112#[doc(hidden)]
1114pub fn count_and_gather_lms_suffixes_32s_4k_nofs_omp(
1115 t: &[SaSint],
1116 sa: &mut [SaSint],
1117 n: SaSint,
1118 k: SaSint,
1119 buckets: &mut [SaSint],
1120 threads: SaSint,
1121) -> SaSint {
1122 let m;
1123 let omp_num_threads = if threads > 1 && n >= 65_536 { 2 } else { 1 };
1124
1125 if omp_num_threads == 1 {
1126 m = count_and_gather_lms_suffixes_32s_4k(t, sa, n, k, buckets, 0, n as FastSint);
1127 } else {
1128 count_lms_suffixes_32s_4k(t, n, k, buckets);
1129 m = gather_lms_suffixes_32s(t, sa, n);
1130 }
1131
1132 m
1133}
1134
1135#[doc(hidden)]
1137pub fn count_and_gather_lms_suffixes_32s_2k_nofs_omp(
1138 t: &[SaSint],
1139 sa: &mut [SaSint],
1140 n: SaSint,
1141 k: SaSint,
1142 buckets: &mut [SaSint],
1143 threads: SaSint,
1144) -> SaSint {
1145 let m;
1146 let omp_num_threads = if threads > 1 && n >= 65_536 { 2 } else { 1 };
1147
1148 if omp_num_threads == 1 {
1149 m = count_and_gather_lms_suffixes_32s_2k(t, sa, n, k, buckets, 0, n as FastSint);
1150 } else {
1151 count_lms_suffixes_32s_2k(t, n, k, buckets);
1152 m = gather_lms_suffixes_32s(t, sa, n);
1153 }
1154
1155 m
1156}
1157
1158#[doc(hidden)]
1160pub fn count_and_gather_compacted_lms_suffixes_32s_2k_nofs_omp(
1161 t: &[SaSint],
1162 sa: &mut [SaSint],
1163 n: SaSint,
1164 k: SaSint,
1165 buckets: &mut [SaSint],
1166 threads: SaSint,
1167) -> SaSint {
1168 let m;
1169 let omp_num_threads = if threads > 1 && n >= 65_536 { 2 } else { 1 };
1170
1171 if omp_num_threads == 1 {
1172 m = count_and_gather_compacted_lms_suffixes_32s_2k(t, sa, n, k, buckets, 0, n as FastSint);
1173 } else {
1174 count_compacted_lms_suffixes_32s_2k(t, n, k, buckets);
1175 m = gather_compacted_lms_suffixes_32s(t, sa, n);
1176 }
1177
1178 m
1179}
1180
1181#[doc(hidden)]
1183pub fn count_and_gather_lms_suffixes_32s_4k_fs_omp(
1184 t: &[SaSint],
1185 sa: &mut [SaSint],
1186 n: SaSint,
1187 k: SaSint,
1188 buckets: &mut [SaSint],
1189 local_buckets: SaSint,
1190 threads: SaSint,
1191 thread_state: &mut [ThreadState],
1192) -> SaSint {
1193 let n_usize = usize::try_from(n).expect("n must be non-negative");
1194 let k_usize = usize::try_from(k).expect("k must be non-negative");
1195 let omp_num_threads = usize::try_from(threads).expect("threads must be non-negative");
1196 let bucket_size = FastSint::try_from(4 * k_usize).expect("bucket size must fit FastSint");
1197
1198 if omp_num_threads <= 1 || n < 65_536 {
1199 return count_and_gather_lms_suffixes_32s_4k(t, sa, n, k, buckets, 0, n as FastSint);
1200 }
1201
1202 let omp_block_stride = (n_usize / omp_num_threads) & !15usize;
1203 let free_space = if local_buckets == 1 {
1204 FastSint::try_from(LIBSAIS_LOCAL_BUFFER_SIZE).expect("free space must fit FastSint")
1205 } else if local_buckets > 1 {
1206 FastSint::try_from(local_buckets).expect("free space must fit FastSint")
1207 } else {
1208 FastSint::try_from(buckets.len()).expect("free space must fit FastSint")
1209 };
1210 let bucket_stride = get_bucket_stride(
1211 free_space,
1212 bucket_size,
1213 FastSint::try_from(omp_num_threads).expect("thread count must fit FastSint"),
1214 );
1215 let bucket_size_usize = usize::try_from(bucket_size).expect("bucket size must be non-negative");
1216 let bucket_stride_usize =
1217 usize::try_from(bucket_stride).expect("bucket stride must be non-negative");
1218 let workspace_len =
1219 bucket_size_usize + bucket_stride_usize.saturating_mul(omp_num_threads.saturating_sub(1));
1220 let mut workspace = vec![0; workspace_len];
1221
1222 for omp_thread_num in 0..omp_num_threads {
1223 let omp_block_start = omp_thread_num * omp_block_stride;
1224 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
1225 omp_block_stride
1226 } else {
1227 n_usize - omp_block_start
1228 };
1229 let workspace_end = workspace_len - omp_thread_num * bucket_stride_usize;
1230 let workspace_start = workspace_end - bucket_size_usize;
1231 let count = count_and_gather_lms_suffixes_32s_4k(
1232 t,
1233 sa,
1234 n,
1235 k,
1236 &mut workspace[workspace_start..workspace_end],
1237 omp_block_start as FastSint,
1238 omp_block_size as FastSint,
1239 );
1240
1241 thread_state[omp_thread_num].position = (omp_block_start + omp_block_size) as FastSint;
1242 thread_state[omp_thread_num].count = count as FastSint;
1243 }
1244
1245 let mut m = 0;
1246 for t in (0..omp_num_threads).rev() {
1247 m += thread_state[t].count as SaSint;
1248
1249 if t + 1 != omp_num_threads && thread_state[t].count > 0 {
1250 let src_end =
1251 usize::try_from(thread_state[t].position).expect("position must be non-negative");
1252 let src_start = src_end
1253 - usize::try_from(thread_state[t].count).expect("count must be non-negative");
1254 let dst_start = usize::try_from(n - m).expect("destination must be non-negative");
1255 sa.copy_within(src_start..src_end, dst_start);
1256 }
1257 }
1258
1259 let omp_num_threads = omp_num_threads - 1;
1260 let omp_block_stride = (bucket_size_usize / omp_num_threads) & !15usize;
1261 for omp_thread_num in 0..omp_num_threads {
1262 let omp_block_start = omp_thread_num * omp_block_stride;
1263 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
1264 omp_block_stride
1265 } else {
1266 bucket_size_usize - omp_block_start
1267 };
1268 accumulate_counts_s32(
1269 &mut workspace[omp_block_start..],
1270 omp_block_size as FastSint,
1271 bucket_stride,
1272 FastSint::try_from(omp_num_threads + 1).expect("thread count must fit FastSint"),
1273 );
1274 }
1275
1276 let accumulated_start = omp_num_threads * bucket_stride_usize;
1277 buckets[..bucket_size_usize]
1278 .copy_from_slice(&workspace[accumulated_start..accumulated_start + bucket_size_usize]);
1279 m
1280}
1281
1282#[doc(hidden)]
1284pub fn count_and_gather_lms_suffixes_32s_2k_fs_omp(
1285 t: &[SaSint],
1286 sa: &mut [SaSint],
1287 n: SaSint,
1288 k: SaSint,
1289 buckets: &mut [SaSint],
1290 local_buckets: SaSint,
1291 threads: SaSint,
1292 thread_state: &mut [ThreadState],
1293) -> SaSint {
1294 let n_usize = usize::try_from(n).expect("n must be non-negative");
1295 let k_usize = usize::try_from(k).expect("k must be non-negative");
1296 let omp_num_threads = usize::try_from(threads).expect("threads must be non-negative");
1297 let bucket_size = FastSint::try_from(2 * k_usize).expect("bucket size must fit FastSint");
1298
1299 if omp_num_threads <= 1 || n < 65_536 {
1300 return count_and_gather_lms_suffixes_32s_2k(t, sa, n, k, buckets, 0, n as FastSint);
1301 }
1302
1303 let omp_block_stride = (n_usize / omp_num_threads) & !15usize;
1304 let free_space = if local_buckets == 1 {
1305 FastSint::try_from(LIBSAIS_LOCAL_BUFFER_SIZE).expect("free space must fit FastSint")
1306 } else if local_buckets > 1 {
1307 FastSint::try_from(local_buckets).expect("free space must fit FastSint")
1308 } else {
1309 FastSint::try_from(buckets.len()).expect("free space must fit FastSint")
1310 };
1311 let bucket_stride = get_bucket_stride(
1312 free_space,
1313 bucket_size,
1314 FastSint::try_from(omp_num_threads).expect("thread count must fit FastSint"),
1315 );
1316 let bucket_size_usize = usize::try_from(bucket_size).expect("bucket size must be non-negative");
1317 let bucket_stride_usize =
1318 usize::try_from(bucket_stride).expect("bucket stride must be non-negative");
1319 let workspace_len =
1320 bucket_size_usize + bucket_stride_usize.saturating_mul(omp_num_threads.saturating_sub(1));
1321 let mut workspace = vec![0; workspace_len];
1322
1323 for omp_thread_num in 0..omp_num_threads {
1324 let omp_block_start = omp_thread_num * omp_block_stride;
1325 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
1326 omp_block_stride
1327 } else {
1328 n_usize - omp_block_start
1329 };
1330 let workspace_end = workspace_len - omp_thread_num * bucket_stride_usize;
1331 let workspace_start = workspace_end - bucket_size_usize;
1332 let count = count_and_gather_lms_suffixes_32s_2k(
1333 t,
1334 sa,
1335 n,
1336 k,
1337 &mut workspace[workspace_start..workspace_end],
1338 omp_block_start as FastSint,
1339 omp_block_size as FastSint,
1340 );
1341
1342 thread_state[omp_thread_num].position = (omp_block_start + omp_block_size) as FastSint;
1343 thread_state[omp_thread_num].count = count as FastSint;
1344 }
1345
1346 let mut m = 0;
1347 for t in (0..omp_num_threads).rev() {
1348 m += thread_state[t].count as SaSint;
1349 if t + 1 != omp_num_threads && thread_state[t].count > 0 {
1350 let src_end =
1351 usize::try_from(thread_state[t].position).expect("position must be non-negative");
1352 let src_start = src_end
1353 - usize::try_from(thread_state[t].count).expect("count must be non-negative");
1354 let dst_start = usize::try_from(n - m).expect("destination must be non-negative");
1355 sa.copy_within(src_start..src_end, dst_start);
1356 }
1357 }
1358
1359 let omp_num_threads = omp_num_threads - 1;
1360 let omp_block_stride = (bucket_size_usize / omp_num_threads) & !15usize;
1361 for omp_thread_num in 0..omp_num_threads {
1362 let omp_block_start = omp_thread_num * omp_block_stride;
1363 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
1364 omp_block_stride
1365 } else {
1366 bucket_size_usize - omp_block_start
1367 };
1368 accumulate_counts_s32(
1369 &mut workspace[omp_block_start..],
1370 omp_block_size as FastSint,
1371 bucket_stride,
1372 FastSint::try_from(omp_num_threads + 1).expect("thread count must fit FastSint"),
1373 );
1374 }
1375
1376 let accumulated_start = omp_num_threads * bucket_stride_usize;
1377 buckets[..bucket_size_usize]
1378 .copy_from_slice(&workspace[accumulated_start..accumulated_start + bucket_size_usize]);
1379 m
1380}
1381
1382#[doc(hidden)]
1384pub fn count_and_gather_compacted_lms_suffixes_32s_2k_fs_omp(
1385 t: &[SaSint],
1386 sa: &mut [SaSint],
1387 n: SaSint,
1388 k: SaSint,
1389 buckets: &mut [SaSint],
1390 _local_buckets: SaSint,
1391 threads: SaSint,
1392 thread_state: &mut [ThreadState],
1393) {
1394 let n_usize = usize::try_from(n).expect("n must be non-negative");
1395 let k_usize = usize::try_from(k).expect("k must be non-negative");
1396 let thread_count = usize::try_from(threads).expect("threads must be non-negative");
1397 let bucket_size = 2 * k_usize;
1398
1399 if thread_count <= 1 || n < 65_536 {
1400 let _ =
1401 count_and_gather_compacted_lms_suffixes_32s_2k(t, sa, n, k, buckets, 0, n as FastSint);
1402 return;
1403 }
1404
1405 if thread_state.len() < thread_count || sa.len() < 2 * n_usize {
1406 let _ =
1407 count_and_gather_compacted_lms_suffixes_32s_2k_nofs_omp(t, sa, n, k, buckets, threads);
1408 return;
1409 }
1410
1411 let omp_block_stride = (n_usize / thread_count) & !15usize;
1412 let free_space = if _local_buckets != 0 {
1413 FastSint::try_from(LIBSAIS_LOCAL_BUFFER_SIZE).expect("free space must fit FastSint")
1414 } else {
1415 FastSint::try_from(buckets.len()).expect("free space must fit FastSint")
1416 };
1417 let bucket_stride = get_bucket_stride(
1418 free_space,
1419 FastSint::try_from(bucket_size).expect("bucket size must fit FastSint"),
1420 FastSint::try_from(thread_count).expect("thread count must fit FastSint"),
1421 );
1422 let bucket_stride_usize =
1423 usize::try_from(bucket_stride).expect("bucket stride must be non-negative");
1424 let workspace_len =
1425 bucket_size + bucket_stride_usize.saturating_mul(thread_count.saturating_sub(1));
1426 let mut workspace = vec![0; workspace_len];
1427
1428 for omp_thread_num in 0..thread_count {
1429 let omp_block_start = omp_thread_num * omp_block_stride;
1430 let omp_block_size = if omp_thread_num + 1 < thread_count {
1431 omp_block_stride
1432 } else {
1433 n_usize - omp_block_start
1434 };
1435
1436 let workspace_end = workspace_len - omp_thread_num * bucket_stride_usize;
1437 let workspace_start = workspace_end - bucket_size;
1438 let count = count_and_gather_compacted_lms_suffixes_32s_2k(
1439 t,
1440 &mut sa[n_usize..],
1441 n,
1442 k,
1443 &mut workspace[workspace_start..workspace_end],
1444 omp_block_start as FastSint,
1445 omp_block_size as FastSint,
1446 );
1447
1448 if omp_thread_num < thread_state.len() {
1449 thread_state[omp_thread_num].position = (omp_block_start + omp_block_size) as FastSint;
1450 thread_state[omp_thread_num].count = count as FastSint;
1451 }
1452 }
1453
1454 let mut m = 0usize;
1455 for omp_thread_num in (0..thread_count).rev() {
1456 let count = usize::try_from(thread_state[omp_thread_num].count)
1457 .expect("count must be non-negative");
1458 m += count;
1459 if count > 0 {
1460 let position = usize::try_from(thread_state[omp_thread_num].position)
1461 .expect("position must be non-negative");
1462 let src_start = n_usize + position - count;
1463 let src_end = n_usize + position;
1464 let dst_start = n_usize - m;
1465 sa.copy_within(src_start..src_end, dst_start);
1466 }
1467 }
1468
1469 let accumulation_threads = thread_count;
1470 let omp_block_stride = (bucket_size / accumulation_threads) & !15usize;
1471 for omp_thread_num in 0..accumulation_threads {
1472 let omp_block_start = omp_thread_num * omp_block_stride;
1473 let omp_block_size = if omp_thread_num + 1 < accumulation_threads {
1474 omp_block_stride
1475 } else {
1476 bucket_size - omp_block_start
1477 };
1478 accumulate_counts_s32(
1479 &mut workspace[omp_block_start..],
1480 omp_block_size as FastSint,
1481 bucket_stride,
1482 FastSint::try_from(thread_count).expect("thread count must fit FastSint"),
1483 );
1484 }
1485 let accumulated_start = (accumulation_threads - 1) * bucket_stride_usize;
1486 buckets[..bucket_size]
1487 .copy_from_slice(&workspace[accumulated_start..accumulated_start + bucket_size]);
1488}
1489
1490#[doc(hidden)]
1492pub fn count_and_gather_lms_suffixes_32s_4k_omp(
1493 t: &[SaSint],
1494 sa: &mut [SaSint],
1495 n: SaSint,
1496 k: SaSint,
1497 buckets: &mut [SaSint],
1498 local_buckets: SaSint,
1499 threads: SaSint,
1500 thread_state: &mut [ThreadState],
1501) -> SaSint {
1502 let free_space = if local_buckets != 0 {
1503 LIBSAIS_LOCAL_BUFFER_SIZE as FastSint
1504 } else {
1505 FastSint::try_from(buckets.len()).expect("bucket length must fit FastSint")
1506 };
1507 let threads_fast = threads as FastSint;
1508 let mut max_threads = (free_space / (((4 * k as FastSint) + 15) & -16)).min(threads_fast);
1509
1510 if max_threads > 1 && n >= 65_536 && n / k >= 2 {
1511 let thread_cap = (n / (16 * k)) as FastSint;
1512 if max_threads > thread_cap {
1513 max_threads = thread_cap;
1514 }
1515 return count_and_gather_lms_suffixes_32s_4k_fs_omp(
1516 t,
1517 sa,
1518 n,
1519 k,
1520 buckets,
1521 local_buckets,
1522 max_threads.max(2) as SaSint,
1523 thread_state,
1524 );
1525 }
1526
1527 if threads > 1 && n >= 65_536 {
1528 count_lms_suffixes_32s_4k(t, n, k, buckets);
1529 gather_lms_suffixes_32s(t, sa, n)
1530 } else {
1531 count_and_gather_lms_suffixes_32s_4k(t, sa, n, k, buckets, 0, n as FastSint)
1532 }
1533}
1534
1535#[doc(hidden)]
1537pub fn count_and_gather_lms_suffixes_32s_2k_omp(
1538 t: &[SaSint],
1539 sa: &mut [SaSint],
1540 n: SaSint,
1541 k: SaSint,
1542 buckets: &mut [SaSint],
1543 local_buckets: SaSint,
1544 threads: SaSint,
1545 thread_state: &mut [ThreadState],
1546) -> SaSint {
1547 let free_space = if local_buckets != 0 {
1548 LIBSAIS_LOCAL_BUFFER_SIZE as FastSint
1549 } else {
1550 FastSint::try_from(buckets.len()).expect("bucket length must fit FastSint")
1551 };
1552 let threads_fast = threads as FastSint;
1553 let mut max_threads = (free_space / (((2 * k as FastSint) + 15) & -16)).min(threads_fast);
1554
1555 if max_threads > 1 && n >= 65_536 && n / k >= 2 {
1556 let thread_cap = (n / (8 * k)) as FastSint;
1557 if max_threads > thread_cap {
1558 max_threads = thread_cap;
1559 }
1560 return count_and_gather_lms_suffixes_32s_2k_fs_omp(
1561 t,
1562 sa,
1563 n,
1564 k,
1565 buckets,
1566 local_buckets,
1567 max_threads.max(2) as SaSint,
1568 thread_state,
1569 );
1570 }
1571
1572 if threads > 1 && n >= 65_536 {
1573 count_lms_suffixes_32s_2k(t, n, k, buckets);
1574 gather_lms_suffixes_32s(t, sa, n)
1575 } else {
1576 count_and_gather_lms_suffixes_32s_2k(t, sa, n, k, buckets, 0, n as FastSint)
1577 }
1578}
1579
1580#[doc(hidden)]
1582pub fn count_and_gather_compacted_lms_suffixes_32s_2k_omp(
1583 t: &[SaSint],
1584 sa: &mut [SaSint],
1585 n: SaSint,
1586 k: SaSint,
1587 buckets: &mut [SaSint],
1588 local_buckets: SaSint,
1589 threads: SaSint,
1590 thread_state: &mut [ThreadState],
1591) {
1592 let free_space = if local_buckets != 0 {
1593 LIBSAIS_LOCAL_BUFFER_SIZE as FastSint
1594 } else {
1595 FastSint::try_from(buckets.len()).expect("bucket length must fit FastSint")
1596 };
1597 let threads_fast = threads as FastSint;
1598 let mut max_threads = (free_space / (((2 * k as FastSint) + 15) & -16)).min(threads_fast);
1599
1600 if local_buckets == 0 && max_threads > 1 && n >= 65_536 && n / k >= 2 {
1601 let thread_cap = (n / (8 * k)) as FastSint;
1602 if max_threads > thread_cap {
1603 max_threads = thread_cap;
1604 }
1605 count_and_gather_compacted_lms_suffixes_32s_2k_fs_omp(
1606 t,
1607 sa,
1608 n,
1609 k,
1610 buckets,
1611 local_buckets,
1612 max_threads.max(2) as SaSint,
1613 thread_state,
1614 );
1615 return;
1616 }
1617
1618 let _ = count_and_gather_compacted_lms_suffixes_32s_2k_nofs_omp(t, sa, n, k, buckets, threads);
1619}
1620
1621#[doc(hidden)]
1623pub fn count_suffixes_32s(t: &[SaSint], n: SaSint, k: SaSint, buckets: &mut [SaSint]) {
1624 let n_usize = usize::try_from(n).expect("n must be non-negative");
1625 let k_usize = usize::try_from(k).expect("k must be non-negative");
1626 buckets[..k_usize].fill(0);
1627
1628 let mut i = 0usize;
1629 let mut j = n_usize.saturating_sub(7);
1630 while i < j {
1631 buckets[t[i] as usize] += 1;
1632 buckets[t[i + 1] as usize] += 1;
1633 buckets[t[i + 2] as usize] += 1;
1634 buckets[t[i + 3] as usize] += 1;
1635 buckets[t[i + 4] as usize] += 1;
1636 buckets[t[i + 5] as usize] += 1;
1637 buckets[t[i + 6] as usize] += 1;
1638 buckets[t[i + 7] as usize] += 1;
1639 i += 8;
1640 }
1641
1642 j += 7;
1643 while i < j {
1644 buckets[t[i] as usize] += 1;
1645 i += 1;
1646 }
1647}
1648
1649#[doc(hidden)]
1651pub fn initialize_buckets_start_and_end_8u(
1652 buckets: &mut [SaSint],
1653 freq: Option<&mut [SaSint]>,
1654) -> SaSint {
1655 let start_offset = 6 * ALPHABET_SIZE;
1656 let end_offset = 7 * ALPHABET_SIZE;
1657 let mut k = -1isize;
1658 let mut sum = 0;
1659
1660 match freq {
1661 Some(freq) => {
1662 for j in 0..ALPHABET_SIZE {
1663 let i = buckets_index4(j, 0);
1664 let total = buckets[i] + buckets[i + 1] + buckets[i + 2] + buckets[i + 3];
1665 buckets[start_offset + j] = sum;
1666 sum += total;
1667 buckets[end_offset + j] = sum;
1668 if total > 0 {
1669 k = j as isize;
1670 }
1671 freq[j] = total;
1672 }
1673 }
1674 None => {
1675 for j in 0..ALPHABET_SIZE {
1676 let i = buckets_index4(j, 0);
1677 let total = buckets[i] + buckets[i + 1] + buckets[i + 2] + buckets[i + 3];
1678 buckets[start_offset + j] = sum;
1679 sum += total;
1680 buckets[end_offset + j] = sum;
1681 if total > 0 {
1682 k = j as isize;
1683 }
1684 }
1685 }
1686 }
1687
1688 (k + 1) as SaSint
1689}
1690
1691#[doc(hidden)]
1693pub fn initialize_buckets_start_and_end_32s_6k(k: SaSint, buckets: &mut [SaSint]) {
1694 let k_usize = usize::try_from(k).expect("k must be non-negative");
1695 let start_offset = 4 * k_usize;
1696 let end_offset = 5 * k_usize;
1697 let mut sum = 0;
1698 for j in 0..k_usize {
1699 let i = buckets_index4(j, 0);
1700 buckets[start_offset + j] = sum;
1701 sum += buckets[i] + buckets[i + 1] + buckets[i + 2] + buckets[i + 3];
1702 buckets[end_offset + j] = sum;
1703 }
1704}
1705
1706#[doc(hidden)]
1708pub fn initialize_buckets_start_and_end_32s_4k(k: SaSint, buckets: &mut [SaSint]) {
1709 let k_usize = usize::try_from(k).expect("k must be non-negative");
1710 let start_offset = 2 * k_usize;
1711 let end_offset = 3 * k_usize;
1712 let mut sum = 0;
1713 for j in 0..k_usize {
1714 let i = buckets_index2(j, 0);
1715 buckets[start_offset + j] = sum;
1716 sum += buckets[i] + buckets[i + 1];
1717 buckets[end_offset + j] = sum;
1718 }
1719}
1720
1721#[doc(hidden)]
1723pub fn initialize_buckets_end_32s_2k(k: SaSint, buckets: &mut [SaSint]) {
1724 let k_usize = usize::try_from(k).expect("k must be non-negative");
1725 let mut sum0 = 0;
1726 for j in 0..k_usize {
1727 let i = buckets_index2(j, 0);
1728 sum0 += buckets[i] + buckets[i + 1];
1729 buckets[i] = sum0;
1730 }
1731}
1732
1733#[doc(hidden)]
1735pub fn initialize_buckets_start_and_end_32s_2k(k: SaSint, buckets: &mut [SaSint]) {
1736 let k_usize = usize::try_from(k).expect("k must be non-negative");
1737 for j in 0..k_usize {
1738 let i = buckets_index2(j, 0);
1739 buckets[j] = buckets[i];
1740 }
1741 buckets[k_usize] = 0;
1742 for j in 1..k_usize {
1743 buckets[k_usize + j] = buckets[j - 1];
1744 }
1745}
1746
1747#[doc(hidden)]
1749pub fn initialize_buckets_start_32s_1k(k: SaSint, buckets: &mut [SaSint]) {
1750 let k_usize = usize::try_from(k).expect("k must be non-negative");
1751 let mut sum = 0;
1752 for bucket in buckets.iter_mut().take(k_usize) {
1753 let tmp = *bucket;
1754 *bucket = sum;
1755 sum += tmp;
1756 }
1757}
1758
1759#[doc(hidden)]
1761pub fn initialize_buckets_end_32s_1k(k: SaSint, buckets: &mut [SaSint]) {
1762 let k_usize = usize::try_from(k).expect("k must be non-negative");
1763 let mut sum = 0;
1764 for bucket in buckets.iter_mut().take(k_usize) {
1765 sum += *bucket;
1766 *bucket = sum;
1767 }
1768}
1769
1770#[doc(hidden)]
1772pub fn initialize_buckets_for_lms_suffixes_radix_sort_8u(
1773 t: &[u8],
1774 buckets: &mut [SaSint],
1775 mut first_lms_suffix: SaSint,
1776) -> SaSint {
1777 let mut f0 = 0usize;
1778 let mut f1: usize;
1779 let mut c0 = t[first_lms_suffix as usize] as FastSint;
1780 let mut c1: FastSint;
1781
1782 while {
1783 first_lms_suffix -= 1;
1784 first_lms_suffix >= 0
1785 } {
1786 c1 = c0;
1787 c0 = t[first_lms_suffix as usize] as FastSint;
1788 f1 = f0;
1789 f0 = usize::from(c0 > (c1 - f1 as FastSint));
1790 let idx = 4 * c1 as usize + (f1 + f1 + f0);
1791 buckets[idx] -= 1;
1792 }
1793 buckets[4 * c0 as usize + (f0 + f0)] -= 1;
1794
1795 let temp_offset = 4 * ALPHABET_SIZE;
1796 let mut sum = 0;
1797 for j in 0..ALPHABET_SIZE {
1798 let i = 4 * j;
1799 let tj = 2 * j;
1800 buckets[temp_offset + tj + 1] = sum;
1801 sum += buckets[i + 1] + buckets[i + 3];
1802 buckets[temp_offset + tj] = sum;
1803 }
1804 sum
1805}
1806
1807#[doc(hidden)]
1809pub fn initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(
1810 t: &[SaSint],
1811 k: SaSint,
1812 buckets: &mut [SaSint],
1813 first_lms_suffix: SaSint,
1814) {
1815 let _k_usize = usize::try_from(k).expect("k must be non-negative");
1816 buckets[buckets_index2(t[first_lms_suffix as usize] as usize, 0)] += 1;
1817 buckets[buckets_index2(t[first_lms_suffix as usize] as usize, 1)] -= 1;
1818
1819 let mut sum0 = 0;
1820 let mut sum1 = 0;
1821 for j in 0..usize::try_from(k).unwrap() {
1822 let i = buckets_index2(j, 0);
1823 sum0 += buckets[i] + buckets[i + 1];
1824 sum1 += buckets[i + 1];
1825 buckets[i] = sum0;
1826 buckets[i + 1] = sum1;
1827 }
1828}
1829
1830#[doc(hidden)]
1832pub fn initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(
1833 t: &[SaSint],
1834 k: SaSint,
1835 buckets: &mut [SaSint],
1836 mut first_lms_suffix: SaSint,
1837) -> SaSint {
1838 let mut f0 = 0usize;
1839 let mut f1: usize;
1840 let mut c0 = t[first_lms_suffix as usize] as FastSint;
1841 let mut c1: FastSint;
1842
1843 while {
1844 first_lms_suffix -= 1;
1845 first_lms_suffix >= 0
1846 } {
1847 c1 = c0;
1848 c0 = t[first_lms_suffix as usize] as FastSint;
1849 f1 = f0;
1850 f0 = usize::from(c0 > (c1 - f1 as FastSint));
1851 buckets[4 * c1 as usize + (f1 + f1 + f0)] -= 1;
1852 }
1853 buckets[4 * c0 as usize + (f0 + f0)] -= 1;
1854
1855 let temp_offset = 4 * usize::try_from(k).unwrap();
1856 let mut sum = 0;
1857 for j in 0..usize::try_from(k).unwrap() {
1858 let i = 4 * j;
1859 sum += buckets[i + 1] + buckets[i + 3];
1860 buckets[temp_offset + j] = sum;
1861 }
1862 sum
1863}
1864
1865#[doc(hidden)]
1867pub fn initialize_buckets_for_radix_and_partial_sorting_32s_4k(
1868 t: &[SaSint],
1869 k: SaSint,
1870 buckets: &mut [SaSint],
1871 first_lms_suffix: SaSint,
1872) {
1873 let k_usize = usize::try_from(k).expect("k must be non-negative");
1874 let start_offset = 2 * k_usize;
1875 let end_offset = 3 * k_usize;
1876
1877 buckets[buckets_index2(t[first_lms_suffix as usize] as usize, 0)] += 1;
1878 buckets[buckets_index2(t[first_lms_suffix as usize] as usize, 1)] -= 1;
1879
1880 let mut sum0 = 0;
1881 let mut sum1 = 0;
1882 for j in 0..k_usize {
1883 let i = buckets_index2(j, 0);
1884 buckets[start_offset + j] = sum1;
1885 sum0 += buckets[i + 1];
1886 sum1 += buckets[i] + buckets[i + 1];
1887 buckets[i + 1] = sum0;
1888 buckets[end_offset + j] = sum1;
1889 }
1890}
1891
1892#[doc(hidden)]
1894pub fn radix_sort_lms_suffixes_8u(
1895 t: &[u8],
1896 sa: &mut [SaSint],
1897 induction_bucket: &mut [SaSint],
1898 omp_block_start: FastSint,
1899 omp_block_size: FastSint,
1900) {
1901 let prefetch_distance = 64 as FastSint;
1902 let mut i = omp_block_start + omp_block_size - 1;
1903 let mut j = omp_block_start + prefetch_distance + 3;
1904
1905 while i >= j {
1906 let p0 = sa[i as usize];
1907 let idx0 = buckets_index2(t[p0 as usize] as usize, 0);
1908 induction_bucket[idx0] -= 1;
1909 sa[induction_bucket[idx0] as usize] = p0;
1910
1911 let p1 = sa[(i - 1) as usize];
1912 let idx1 = buckets_index2(t[p1 as usize] as usize, 0);
1913 induction_bucket[idx1] -= 1;
1914 sa[induction_bucket[idx1] as usize] = p1;
1915
1916 let p2 = sa[(i - 2) as usize];
1917 let idx2 = buckets_index2(t[p2 as usize] as usize, 0);
1918 induction_bucket[idx2] -= 1;
1919 sa[induction_bucket[idx2] as usize] = p2;
1920
1921 let p3 = sa[(i - 3) as usize];
1922 let idx3 = buckets_index2(t[p3 as usize] as usize, 0);
1923 induction_bucket[idx3] -= 1;
1924 sa[induction_bucket[idx3] as usize] = p3;
1925
1926 i -= 4;
1927 }
1928
1929 j -= prefetch_distance + 3;
1930 while i >= j {
1931 let p = sa[i as usize];
1932 let idx = buckets_index2(t[p as usize] as usize, 0);
1933 induction_bucket[idx] -= 1;
1934 sa[induction_bucket[idx] as usize] = p;
1935 i -= 1;
1936 }
1937}
1938
1939#[doc(hidden)]
1941pub fn radix_sort_lms_suffixes_8u_omp(
1942 t: &[u8],
1943 sa: &mut [SaSint],
1944 n: SaSint,
1945 m: SaSint,
1946 flags: SaSint,
1947 buckets: &mut [SaSint],
1948 threads: SaSint,
1949 thread_state: &mut [ThreadState],
1950) {
1951 if (flags & LIBSAIS_FLAGS_GSA) != 0 {
1952 buckets[4 * ALPHABET_SIZE] -= 1;
1953 }
1954
1955 let omp_num_threads = if threads > 1 && n >= 65_536 && m >= 65_536 {
1956 usize::try_from(threads)
1957 .expect("threads must be non-negative")
1958 .min(thread_state.len())
1959 .max(1)
1960 } else {
1961 1
1962 };
1963
1964 if omp_num_threads == 1 {
1965 radix_sort_lms_suffixes_8u(
1966 t,
1967 sa,
1968 &mut buckets[4 * ALPHABET_SIZE..],
1969 n as FastSint - m as FastSint + 1,
1970 m as FastSint - 1,
1971 );
1972 return;
1973 }
1974
1975 let (_, src_bucket) = buckets.split_at_mut(4 * ALPHABET_SIZE);
1976
1977 for state in thread_state.iter_mut().take(omp_num_threads) {
1978 for (i, j) in (0..=buckets_index2(ALPHABET_SIZE - 1, 0))
1979 .step_by(buckets_index2(1, 0))
1980 .zip((buckets_index4(0, 1)..).step_by(buckets_index4(1, 0)))
1981 {
1982 state.buckets[i] = src_bucket[i] - state.buckets[j];
1983 }
1984 }
1985
1986 for thread_num in 0..omp_num_threads {
1987 let mut omp_block_start = 0;
1988 for state in thread_state
1989 .iter()
1990 .take(omp_num_threads)
1991 .skip(thread_num)
1992 .rev()
1993 {
1994 omp_block_start += state.m;
1995 }
1996
1997 let mut omp_block_size = thread_state[thread_num].m;
1998 if omp_block_start == m as FastSint && omp_block_size > 0 {
1999 omp_block_start -= 1;
2000 omp_block_size -= 1;
2001 }
2002
2003 radix_sort_lms_suffixes_8u(
2004 t,
2005 sa,
2006 &mut thread_state[thread_num].buckets,
2007 n as FastSint - omp_block_start,
2008 omp_block_size,
2009 );
2010 }
2011}
2012
2013#[doc(hidden)]
2015pub fn radix_sort_lms_suffixes_32s_6k(
2016 t: &[SaSint],
2017 sa: &mut [SaSint],
2018 induction_bucket: &mut [SaSint],
2019 omp_block_start: FastSint,
2020 omp_block_size: FastSint,
2021) {
2022 let prefetch_distance = 64 as FastSint;
2023 let mut i = omp_block_start + omp_block_size - 1;
2024 let mut j = omp_block_start + 2 * prefetch_distance + 3;
2025
2026 while i >= j {
2027 let p0 = sa[i as usize];
2028 let idx0 = t[p0 as usize] as usize;
2029 induction_bucket[idx0] -= 1;
2030 sa[induction_bucket[idx0] as usize] = p0;
2031
2032 let p1 = sa[(i - 1) as usize];
2033 let idx1 = t[p1 as usize] as usize;
2034 induction_bucket[idx1] -= 1;
2035 sa[induction_bucket[idx1] as usize] = p1;
2036
2037 let p2 = sa[(i - 2) as usize];
2038 let idx2 = t[p2 as usize] as usize;
2039 induction_bucket[idx2] -= 1;
2040 sa[induction_bucket[idx2] as usize] = p2;
2041
2042 let p3 = sa[(i - 3) as usize];
2043 let idx3 = t[p3 as usize] as usize;
2044 induction_bucket[idx3] -= 1;
2045 sa[induction_bucket[idx3] as usize] = p3;
2046
2047 i -= 4;
2048 }
2049
2050 j -= 2 * prefetch_distance + 3;
2051 while i >= j {
2052 let p = sa[i as usize];
2053 let idx = t[p as usize] as usize;
2054 induction_bucket[idx] -= 1;
2055 sa[induction_bucket[idx] as usize] = p;
2056 i -= 1;
2057 }
2058}
2059
2060#[doc(hidden)]
2062pub fn radix_sort_lms_suffixes_32s_2k(
2063 t: &[SaSint],
2064 sa: &mut [SaSint],
2065 induction_bucket: &mut [SaSint],
2066 omp_block_start: FastSint,
2067 omp_block_size: FastSint,
2068) {
2069 let prefetch_distance = 64 as FastSint;
2070 let mut i = omp_block_start + omp_block_size - 1;
2071 let mut j = omp_block_start + 2 * prefetch_distance + 3;
2072
2073 while i >= j {
2074 let p0 = sa[i as usize];
2075 let idx0 = buckets_index2(t[p0 as usize] as usize, 0);
2076 induction_bucket[idx0] -= 1;
2077 sa[induction_bucket[idx0] as usize] = p0;
2078
2079 let p1 = sa[(i - 1) as usize];
2080 let idx1 = buckets_index2(t[p1 as usize] as usize, 0);
2081 induction_bucket[idx1] -= 1;
2082 sa[induction_bucket[idx1] as usize] = p1;
2083
2084 let p2 = sa[(i - 2) as usize];
2085 let idx2 = buckets_index2(t[p2 as usize] as usize, 0);
2086 induction_bucket[idx2] -= 1;
2087 sa[induction_bucket[idx2] as usize] = p2;
2088
2089 let p3 = sa[(i - 3) as usize];
2090 let idx3 = buckets_index2(t[p3 as usize] as usize, 0);
2091 induction_bucket[idx3] -= 1;
2092 sa[induction_bucket[idx3] as usize] = p3;
2093
2094 i -= 4;
2095 }
2096
2097 j -= 2 * prefetch_distance + 3;
2098 while i >= j {
2099 let p = sa[i as usize];
2100 let idx = buckets_index2(t[p as usize] as usize, 0);
2101 induction_bucket[idx] -= 1;
2102 sa[induction_bucket[idx] as usize] = p;
2103 i -= 1;
2104 }
2105}
2106
2107#[doc(hidden)]
2109pub fn radix_sort_lms_suffixes_32s_block_gather(
2110 t: &[SaSint],
2111 sa: &[SaSint],
2112 cache: &mut [ThreadCache],
2113 omp_block_start: FastSint,
2114 omp_block_size: FastSint,
2115) {
2116 if omp_block_size <= 0 {
2117 return;
2118 }
2119
2120 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
2121 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
2122 let mut i = start;
2123 let mut j = if size > 67 { start + size - 67 } else { start };
2124
2125 while i < j {
2126 for current in [i, i + 1, i + 2, i + 3] {
2127 let ci = current - start;
2128 let index = sa[current];
2129 cache[ci].index = index;
2130 cache[ci].symbol = t[index as usize];
2131 }
2132 i += 4;
2133 }
2134
2135 j = if size > 67 { j + 67 } else { start + size };
2136 while i < j {
2137 let ci = i - start;
2138 let index = sa[i];
2139 cache[ci].index = index;
2140 cache[ci].symbol = t[index as usize];
2141 i += 1;
2142 }
2143}
2144
2145#[doc(hidden)]
2147pub fn radix_sort_lms_suffixes_32s_6k_block_sort(
2148 induction_bucket: &mut [SaSint],
2149 cache: &mut [ThreadCache],
2150 omp_block_start: FastSint,
2151 omp_block_size: FastSint,
2152) {
2153 if omp_block_size <= 0 {
2154 return;
2155 }
2156
2157 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
2158 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
2159 let mut i = start + size - 1;
2160 let mut j = start + 64 + 3;
2161
2162 while i >= j {
2163 for current in [i, i - 1, i - 2, i - 3] {
2164 let ci = current - start;
2165 let v = cache[ci].symbol as usize;
2166 induction_bucket[v] -= 1;
2167 cache[ci].symbol = induction_bucket[v];
2168 }
2169 i -= 4;
2170 }
2171
2172 j -= 64 + 3;
2173 while i >= j {
2174 let ci = i - start;
2175 let v = cache[ci].symbol as usize;
2176 induction_bucket[v] -= 1;
2177 cache[ci].symbol = induction_bucket[v];
2178 if i == 0 {
2179 break;
2180 }
2181 i -= 1;
2182 }
2183}
2184
2185#[doc(hidden)]
2187pub fn radix_sort_lms_suffixes_32s_2k_block_sort(
2188 induction_bucket: &mut [SaSint],
2189 cache: &mut [ThreadCache],
2190 omp_block_start: FastSint,
2191 omp_block_size: FastSint,
2192) {
2193 if omp_block_size <= 0 {
2194 return;
2195 }
2196
2197 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
2198 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
2199 let mut i = start + size - 1;
2200 let mut j = start + 64 + 3;
2201
2202 while i >= j {
2203 for current in [i, i - 1, i - 2, i - 3] {
2204 let ci = current - start;
2205 let v = buckets_index2(cache[ci].symbol as usize, 0);
2206 induction_bucket[v] -= 1;
2207 cache[ci].symbol = induction_bucket[v];
2208 }
2209 i -= 4;
2210 }
2211
2212 j -= 64 + 3;
2213 while i >= j {
2214 let ci = i - start;
2215 let v = buckets_index2(cache[ci].symbol as usize, 0);
2216 induction_bucket[v] -= 1;
2217 cache[ci].symbol = induction_bucket[v];
2218 if i == 0 {
2219 break;
2220 }
2221 i -= 1;
2222 }
2223}
2224
2225#[doc(hidden)]
2227pub fn radix_sort_lms_suffixes_32s_6k_block_omp(
2228 t: &[SaSint],
2229 sa: &mut [SaSint],
2230 induction_bucket: &mut [SaSint],
2231 cache: &mut [ThreadCache],
2232 block_start: FastSint,
2233 block_size: FastSint,
2234 threads: SaSint,
2235) {
2236 if threads <= 1 || block_size < 16_384 {
2237 radix_sort_lms_suffixes_32s_6k(t, sa, induction_bucket, block_start, block_size);
2238 return;
2239 }
2240
2241 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
2242 let threads_usize = usize::try_from(threads)
2243 .expect("threads must be positive")
2244 .min(block_size_usize.max(1));
2245 let omp_block_stride = (block_size_usize / threads_usize) & !15usize;
2246
2247 for omp_thread_num in 0..threads_usize {
2248 let omp_block_start = omp_thread_num * omp_block_stride;
2249 let omp_block_size = if omp_thread_num + 1 < threads_usize {
2250 omp_block_stride
2251 } else {
2252 block_size_usize - omp_block_start
2253 };
2254 if omp_block_size > 0 {
2255 radix_sort_lms_suffixes_32s_block_gather(
2256 t,
2257 sa,
2258 &mut cache[omp_block_start..],
2259 block_start + omp_block_start as FastSint,
2260 omp_block_size as FastSint,
2261 );
2262 }
2263 }
2264
2265 radix_sort_lms_suffixes_32s_6k_block_sort(induction_bucket, cache, block_start, block_size);
2266
2267 for omp_thread_num in 0..threads_usize {
2268 let omp_block_start = omp_thread_num * omp_block_stride;
2269 let omp_block_size = if omp_thread_num + 1 < threads_usize {
2270 omp_block_stride
2271 } else {
2272 block_size_usize - omp_block_start
2273 };
2274 if omp_block_size > 0 {
2275 place_cached_suffixes(sa, &cache[omp_block_start..], 0, omp_block_size as FastSint);
2276 }
2277 }
2278}
2279
2280#[doc(hidden)]
2282pub fn radix_sort_lms_suffixes_32s_2k_block_omp(
2283 t: &[SaSint],
2284 sa: &mut [SaSint],
2285 induction_bucket: &mut [SaSint],
2286 cache: &mut [ThreadCache],
2287 block_start: FastSint,
2288 block_size: FastSint,
2289 threads: SaSint,
2290) {
2291 if threads <= 1 || block_size < 16_384 {
2292 radix_sort_lms_suffixes_32s_2k(t, sa, induction_bucket, block_start, block_size);
2293 return;
2294 }
2295
2296 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
2297 let threads_usize = usize::try_from(threads)
2298 .expect("threads must be positive")
2299 .min(block_size_usize.max(1));
2300 let omp_block_stride = (block_size_usize / threads_usize) & !15usize;
2301
2302 for omp_thread_num in 0..threads_usize {
2303 let omp_block_start = omp_thread_num * omp_block_stride;
2304 let omp_block_size = if omp_thread_num + 1 < threads_usize {
2305 omp_block_stride
2306 } else {
2307 block_size_usize - omp_block_start
2308 };
2309 if omp_block_size > 0 {
2310 radix_sort_lms_suffixes_32s_block_gather(
2311 t,
2312 sa,
2313 &mut cache[omp_block_start..],
2314 block_start + omp_block_start as FastSint,
2315 omp_block_size as FastSint,
2316 );
2317 }
2318 }
2319
2320 radix_sort_lms_suffixes_32s_2k_block_sort(induction_bucket, cache, block_start, block_size);
2321
2322 for omp_thread_num in 0..threads_usize {
2323 let omp_block_start = omp_thread_num * omp_block_stride;
2324 let omp_block_size = if omp_thread_num + 1 < threads_usize {
2325 omp_block_stride
2326 } else {
2327 block_size_usize - omp_block_start
2328 };
2329 if omp_block_size > 0 {
2330 place_cached_suffixes(sa, &cache[omp_block_start..], 0, omp_block_size as FastSint);
2331 }
2332 }
2333}
2334
2335#[doc(hidden)]
2337pub fn radix_sort_lms_suffixes_32s_6k_omp(
2338 t: &[SaSint],
2339 sa: &mut [SaSint],
2340 n: SaSint,
2341 m: SaSint,
2342 induction_bucket: &mut [SaSint],
2343 threads: SaSint,
2344 _thread_state: &mut [ThreadState],
2345) {
2346 if threads <= 1 || m < 65_536 {
2347 radix_sort_lms_suffixes_32s_6k(
2348 t,
2349 sa,
2350 induction_bucket,
2351 n as FastSint - m as FastSint + 1,
2352 m as FastSint - 1,
2353 );
2354 return;
2355 }
2356
2357 let threads_usize = usize::try_from(threads).expect("threads must be positive");
2358 let mut cache = vec![ThreadCache::default(); threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE];
2359 let mut block_start = 0usize;
2360 let m_usize = usize::try_from(m).expect("m must be non-negative");
2361 let n_usize = usize::try_from(n).expect("n must be non-negative");
2362 let last = m_usize - 1;
2363
2364 while block_start < last {
2365 let block_end = (block_start + threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE).min(last);
2366 radix_sort_lms_suffixes_32s_6k_block_omp(
2367 t,
2368 sa,
2369 induction_bucket,
2370 &mut cache,
2371 (n_usize - block_end) as FastSint,
2372 (block_end - block_start) as FastSint,
2373 threads,
2374 );
2375 block_start = block_end;
2376 }
2377}
2378
2379#[doc(hidden)]
2381pub fn radix_sort_lms_suffixes_32s_2k_omp(
2382 t: &[SaSint],
2383 sa: &mut [SaSint],
2384 n: SaSint,
2385 m: SaSint,
2386 induction_bucket: &mut [SaSint],
2387 threads: SaSint,
2388 _thread_state: &mut [ThreadState],
2389) {
2390 if threads <= 1 || m < 65_536 {
2391 radix_sort_lms_suffixes_32s_2k(
2392 t,
2393 sa,
2394 induction_bucket,
2395 n as FastSint - m as FastSint + 1,
2396 m as FastSint - 1,
2397 );
2398 return;
2399 }
2400
2401 let threads_usize = usize::try_from(threads).expect("threads must be positive");
2402 let mut cache = vec![ThreadCache::default(); threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE];
2403 let mut block_start = 0usize;
2404 let m_usize = usize::try_from(m).expect("m must be non-negative");
2405 let n_usize = usize::try_from(n).expect("n must be non-negative");
2406 let last = m_usize - 1;
2407
2408 while block_start < last {
2409 let block_end = (block_start + threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE).min(last);
2410 radix_sort_lms_suffixes_32s_2k_block_omp(
2411 t,
2412 sa,
2413 induction_bucket,
2414 &mut cache,
2415 (n_usize - block_end) as FastSint,
2416 (block_end - block_start) as FastSint,
2417 threads,
2418 );
2419 block_start = block_end;
2420 }
2421}
2422
2423#[doc(hidden)]
2425pub fn radix_sort_lms_suffixes_32s_1k(
2426 t: &[SaSint],
2427 sa: &mut [SaSint],
2428 n: SaSint,
2429 buckets: &mut [SaSint],
2430) -> SaSint {
2431 let n_usize = usize::try_from(n).expect("n must be non-negative");
2432 let mut i = n as FastSint - 2;
2433 let mut m = 0;
2434 let mut f0 = 1usize;
2435 let mut f1: usize;
2436 let mut c0 = t[n_usize - 1] as FastSint;
2437 let mut c1: FastSint;
2438 let mut c2 = 0 as FastSint;
2439
2440 while i >= 67 {
2441 c1 = t[i as usize] as FastSint;
2442 f1 = usize::from(c1 > (c0 - f0 as FastSint));
2443 if (f1 & !f0) != 0 {
2444 c2 = c0;
2445 buckets[c2 as usize] -= 1;
2446 sa[buckets[c2 as usize] as usize] = (i + 1) as SaSint;
2447 m += 1;
2448 }
2449
2450 c0 = t[(i - 1) as usize] as FastSint;
2451 f0 = usize::from(c0 > (c1 - f1 as FastSint));
2452 if (f0 & !f1) != 0 {
2453 c2 = c1;
2454 buckets[c2 as usize] -= 1;
2455 sa[buckets[c2 as usize] as usize] = i as SaSint;
2456 m += 1;
2457 }
2458
2459 c1 = t[(i - 2) as usize] as FastSint;
2460 f1 = usize::from(c1 > (c0 - f0 as FastSint));
2461 if (f1 & !f0) != 0 {
2462 c2 = c0;
2463 buckets[c2 as usize] -= 1;
2464 sa[buckets[c2 as usize] as usize] = (i - 1) as SaSint;
2465 m += 1;
2466 }
2467
2468 c0 = t[(i - 3) as usize] as FastSint;
2469 f0 = usize::from(c0 > (c1 - f1 as FastSint));
2470 if (f0 & !f1) != 0 {
2471 c2 = c1;
2472 buckets[c2 as usize] -= 1;
2473 sa[buckets[c2 as usize] as usize] = (i - 2) as SaSint;
2474 m += 1;
2475 }
2476
2477 i -= 4;
2478 }
2479
2480 while i >= 0 {
2481 c1 = c0;
2482 c0 = t[i as usize] as FastSint;
2483 f1 = f0;
2484 f0 = usize::from(c0 > (c1 - f1 as FastSint));
2485 if (f0 & !f1) != 0 {
2486 c2 = c1;
2487 buckets[c2 as usize] -= 1;
2488 sa[buckets[c2 as usize] as usize] = (i + 1) as SaSint;
2489 m += 1;
2490 }
2491 i -= 1;
2492 }
2493
2494 if m > 1 {
2495 sa[buckets[c2 as usize] as usize] = 0;
2496 }
2497
2498 m
2499}
2500
2501#[doc(hidden)]
2503pub fn radix_sort_set_markers_32s_6k(
2504 sa: &mut [SaSint],
2505 induction_bucket: &[SaSint],
2506 omp_block_start: FastSint,
2507 omp_block_size: FastSint,
2508) {
2509 let mut i = omp_block_start;
2510 let mut j = omp_block_start + omp_block_size - 67;
2511
2512 while i < j {
2513 sa[induction_bucket[i as usize] as usize] |= SAINT_MIN;
2514 sa[induction_bucket[(i + 1) as usize] as usize] |= SAINT_MIN;
2515 sa[induction_bucket[(i + 2) as usize] as usize] |= SAINT_MIN;
2516 sa[induction_bucket[(i + 3) as usize] as usize] |= SAINT_MIN;
2517 i += 4;
2518 }
2519
2520 j += 67;
2521 while i < j {
2522 sa[induction_bucket[i as usize] as usize] |= SAINT_MIN;
2523 i += 1;
2524 }
2525}
2526
2527#[doc(hidden)]
2529pub fn radix_sort_set_markers_32s_4k(
2530 sa: &mut [SaSint],
2531 induction_bucket: &[SaSint],
2532 omp_block_start: FastSint,
2533 omp_block_size: FastSint,
2534) {
2535 let mut i = omp_block_start;
2536 let mut j = omp_block_start + omp_block_size - 67;
2537
2538 while i < j {
2539 sa[induction_bucket[buckets_index2(i as usize, 0)] as usize] |= SUFFIX_GROUP_MARKER;
2540 sa[induction_bucket[buckets_index2((i + 1) as usize, 0)] as usize] |= SUFFIX_GROUP_MARKER;
2541 sa[induction_bucket[buckets_index2((i + 2) as usize, 0)] as usize] |= SUFFIX_GROUP_MARKER;
2542 sa[induction_bucket[buckets_index2((i + 3) as usize, 0)] as usize] |= SUFFIX_GROUP_MARKER;
2543 i += 4;
2544 }
2545
2546 j += 67;
2547 while i < j {
2548 sa[induction_bucket[buckets_index2(i as usize, 0)] as usize] |= SUFFIX_GROUP_MARKER;
2549 i += 1;
2550 }
2551}
2552
2553#[doc(hidden)]
2555pub fn radix_sort_set_markers_32s_6k_omp(
2556 sa: &mut [SaSint],
2557 k: SaSint,
2558 induction_bucket: &[SaSint],
2559 threads: SaSint,
2560) {
2561 if k <= 1 {
2562 return;
2563 }
2564
2565 if threads <= 1 || k < 65_536 {
2566 radix_sort_set_markers_32s_6k(sa, induction_bucket, 0, k as FastSint - 1);
2567 return;
2568 }
2569
2570 let threads_usize = usize::try_from(threads).expect("threads must be positive");
2571 let last = usize::try_from(k - 1).expect("k must be positive");
2572 let stride = (last / threads_usize) & !15usize;
2573 let mut start = 0usize;
2574
2575 for thread in 0..threads_usize {
2576 let end = if thread + 1 == threads_usize {
2577 last
2578 } else {
2579 start + stride
2580 };
2581 if end > start {
2582 radix_sort_set_markers_32s_6k(
2583 sa,
2584 induction_bucket,
2585 start as FastSint,
2586 (end - start) as FastSint,
2587 );
2588 }
2589 start = end;
2590 }
2591}
2592
2593#[doc(hidden)]
2595pub fn radix_sort_set_markers_32s_4k_omp(
2596 sa: &mut [SaSint],
2597 k: SaSint,
2598 induction_bucket: &[SaSint],
2599 threads: SaSint,
2600) {
2601 if k <= 1 {
2602 return;
2603 }
2604
2605 if threads <= 1 || k < 65_536 {
2606 radix_sort_set_markers_32s_4k(sa, induction_bucket, 0, k as FastSint - 1);
2607 return;
2608 }
2609
2610 let threads_usize = usize::try_from(threads).expect("threads must be positive");
2611 let last = usize::try_from(k - 1).expect("k must be positive");
2612 let stride = (last / threads_usize) & !15usize;
2613 let mut start = 0usize;
2614
2615 for thread in 0..threads_usize {
2616 let end = if thread + 1 == threads_usize {
2617 last
2618 } else {
2619 start + stride
2620 };
2621 if end > start {
2622 radix_sort_set_markers_32s_4k(
2623 sa,
2624 induction_bucket,
2625 start as FastSint,
2626 (end - start) as FastSint,
2627 );
2628 }
2629 start = end;
2630 }
2631}
2632
2633#[doc(hidden)]
2635pub fn initialize_buckets_for_partial_sorting_8u(
2636 t: &[u8],
2637 buckets: &mut [SaSint],
2638 first_lms_suffix: SaSint,
2639 left_suffixes_count: SaSint,
2640) {
2641 let temp_offset = 4 * ALPHABET_SIZE;
2642 buckets[buckets_index4(t[first_lms_suffix as usize] as usize, 1)] += 1;
2643
2644 let mut sum0 = left_suffixes_count + 1;
2645 let mut sum1 = 0;
2646 for j in 0..ALPHABET_SIZE {
2647 let i = buckets_index4(j, 0);
2648 let tj = buckets_index2(j, 0);
2649 buckets[temp_offset + tj] = sum0;
2650 sum0 += buckets[i] + buckets[i + 2];
2651 sum1 += buckets[i + 1];
2652 buckets[tj] = sum0;
2653 buckets[tj + 1] = sum1;
2654 }
2655}
2656
2657#[doc(hidden)]
2659pub fn initialize_buckets_for_partial_sorting_32s_6k(
2660 t: &[SaSint],
2661 k: SaSint,
2662 buckets: &mut [SaSint],
2663 first_lms_suffix: SaSint,
2664 left_suffixes_count: SaSint,
2665) {
2666 let k_usize = usize::try_from(k).expect("k must be non-negative");
2667 let temp_offset = 4 * k_usize;
2668 let first_symbol = t[first_lms_suffix as usize] as usize;
2669 let mut sum0 = left_suffixes_count + 1;
2670 let mut sum1 = 0;
2671 let mut sum2 = 0;
2672
2673 for j in 0..first_symbol {
2674 let i = buckets_index4(j, 0);
2675 let tj = buckets_index2(j, 0);
2676 let ss = buckets[i];
2677 let ls = buckets[i + 1];
2678 let sl = buckets[i + 2];
2679 let ll = buckets[i + 3];
2680
2681 buckets[i] = sum0;
2682 buckets[i + 1] = sum2;
2683 buckets[i + 2] = 0;
2684 buckets[i + 3] = 0;
2685
2686 sum0 += ss + sl;
2687 sum1 += ls;
2688 sum2 += ls + ll;
2689
2690 buckets[temp_offset + tj] = sum0;
2691 buckets[temp_offset + tj + 1] = sum1;
2692 }
2693
2694 sum1 += 1;
2695 for j in first_symbol..k_usize {
2696 let i = buckets_index4(j, 0);
2697 let tj = buckets_index2(j, 0);
2698 let ss = buckets[i];
2699 let ls = buckets[i + 1];
2700 let sl = buckets[i + 2];
2701 let ll = buckets[i + 3];
2702
2703 buckets[i] = sum0;
2704 buckets[i + 1] = sum2;
2705 buckets[i + 2] = 0;
2706 buckets[i + 3] = 0;
2707
2708 sum0 += ss + sl;
2709 sum1 += ls;
2710 sum2 += ls + ll;
2711
2712 buckets[temp_offset + tj] = sum0;
2713 buckets[temp_offset + tj + 1] = sum1;
2714 }
2715}
2716
2717#[doc(hidden)]
2719pub fn partial_sorting_scan_left_to_right_8u(
2720 t: &[u8],
2721 sa: &mut [SaSint],
2722 buckets: &mut [SaSint],
2723 mut d: SaSint,
2724 omp_block_start: FastSint,
2725 omp_block_size: FastSint,
2726) -> SaSint {
2727 let induction_offset = 4 * ALPHABET_SIZE;
2728 let distinct_offset = 2 * ALPHABET_SIZE;
2729 let prefetch_distance = 64 as FastSint;
2730 let mut i = omp_block_start;
2731 let mut j = if omp_block_size > prefetch_distance + 1 {
2732 omp_block_start + omp_block_size - prefetch_distance - 1
2733 } else {
2734 omp_block_start
2735 };
2736
2737 while i < j {
2738 let mut p0 = sa[i as usize];
2739 d += SaSint::from(p0 < 0);
2740 p0 &= SAINT_MAX;
2741 let v0 = buckets_index2(
2742 t[(p0 - 1) as usize] as usize,
2743 usize::from(t[(p0 - 2) as usize] >= t[(p0 - 1) as usize]),
2744 );
2745 let pos0 = buckets[induction_offset + v0] as usize;
2746 sa[pos0] = (p0 - 1) | (((buckets[distinct_offset + v0] != d) as SaSint) << (SAINT_BIT - 1));
2747 buckets[induction_offset + v0] += 1;
2748 buckets[distinct_offset + v0] = d;
2749
2750 let mut p1 = sa[(i + 1) as usize];
2751 d += SaSint::from(p1 < 0);
2752 p1 &= SAINT_MAX;
2753 let v1 = buckets_index2(
2754 t[(p1 - 1) as usize] as usize,
2755 usize::from(t[(p1 - 2) as usize] >= t[(p1 - 1) as usize]),
2756 );
2757 let pos1 = buckets[induction_offset + v1] as usize;
2758 sa[pos1] = (p1 - 1) | (((buckets[distinct_offset + v1] != d) as SaSint) << (SAINT_BIT - 1));
2759 buckets[induction_offset + v1] += 1;
2760 buckets[distinct_offset + v1] = d;
2761
2762 i += 2;
2763 }
2764
2765 j = omp_block_start + omp_block_size;
2766 while i < j {
2767 let mut p = sa[i as usize];
2768 d += SaSint::from(p < 0);
2769 p &= SAINT_MAX;
2770 let v = buckets_index2(
2771 t[(p - 1) as usize] as usize,
2772 usize::from(t[(p - 2) as usize] >= t[(p - 1) as usize]),
2773 );
2774 let pos = buckets[induction_offset + v] as usize;
2775 sa[pos] = (p - 1) | (((buckets[distinct_offset + v] != d) as SaSint) << (SAINT_BIT - 1));
2776 buckets[induction_offset + v] += 1;
2777 buckets[distinct_offset + v] = d;
2778 i += 1;
2779 }
2780
2781 d
2782}
2783
2784#[doc(hidden)]
2786pub fn partial_sorting_scan_left_to_right_8u_omp(
2787 t: &[u8],
2788 sa: &mut [SaSint],
2789 n: SaSint,
2790 k: SaSint,
2791 buckets: &mut [SaSint],
2792 left_suffixes_count: SaSint,
2793 mut d: SaSint,
2794 threads: SaSint,
2795 thread_state: &mut [ThreadState],
2796) -> SaSint {
2797 let v = buckets_index2(
2798 t[(n - 1) as usize] as usize,
2799 usize::from(t[(n - 2) as usize] >= t[(n - 1) as usize]),
2800 );
2801 let induction_offset = 4 * ALPHABET_SIZE;
2802 let distinct_offset = 2 * ALPHABET_SIZE;
2803 let pos = buckets[induction_offset + v] as usize;
2804 sa[pos] = (n - 1) | SAINT_MIN;
2805 buckets[induction_offset + v] += 1;
2806 d += 1;
2807 buckets[distinct_offset + v] = d;
2808
2809 if threads == 1 || left_suffixes_count < 65_536 {
2810 return partial_sorting_scan_left_to_right_8u(
2811 t,
2812 sa,
2813 buckets,
2814 d,
2815 0,
2816 left_suffixes_count as FastSint,
2817 );
2818 }
2819
2820 let mut block_start = 0usize;
2821 let left_suffixes_count =
2822 usize::try_from(left_suffixes_count).expect("left_suffixes_count must be non-negative");
2823 let threads_usize = usize::try_from(threads)
2824 .expect("threads must be non-negative")
2825 .min(thread_state.len())
2826 .max(1);
2827 while block_start < left_suffixes_count {
2828 if sa[block_start] == 0 {
2829 block_start += 1;
2830 } else {
2831 let mut block_max_end =
2832 block_start + threads_usize * (LIBSAIS_PER_THREAD_CACHE_SIZE - 16 * threads_usize);
2833 if block_max_end > left_suffixes_count {
2834 block_max_end = left_suffixes_count;
2835 }
2836 let mut block_end = block_start + 1;
2837 while block_end < block_max_end && sa[block_end] != 0 {
2838 block_end += 1;
2839 }
2840 let block_size = block_end - block_start;
2841
2842 if block_size < 32 {
2843 while block_start < block_end {
2844 let p = sa[block_start];
2845 d += SaSint::from(p < 0);
2846 let p = p & SAINT_MAX;
2847 let v = buckets_index2(
2848 t[(p - 1) as usize] as usize,
2849 usize::from(t[(p - 2) as usize] >= t[(p - 1) as usize]),
2850 );
2851 let pos = buckets[induction_offset + v] as usize;
2852 sa[pos] = (p - 1)
2853 | (((buckets[distinct_offset + v] != d) as SaSint) << (SAINT_BIT - 1));
2854 buckets[induction_offset + v] += 1;
2855 buckets[distinct_offset + v] = d;
2856 block_start += 1;
2857 }
2858 } else {
2859 d = partial_sorting_scan_left_to_right_8u_block_omp(
2860 t,
2861 sa,
2862 k,
2863 buckets,
2864 d,
2865 block_start as FastSint,
2866 block_size as FastSint,
2867 threads,
2868 thread_state,
2869 );
2870 block_start = block_end;
2871 }
2872 }
2873 }
2874
2875 d
2876}
2877
2878#[doc(hidden)]
2880pub fn partial_sorting_scan_left_to_right_32s_6k(
2881 t: &[SaSint],
2882 sa: &mut [SaSint],
2883 buckets: &mut [SaSint],
2884 mut d: SaSint,
2885 omp_block_start: FastSint,
2886 omp_block_size: FastSint,
2887) -> SaSint {
2888 let prefetch_distance: FastSint = 64;
2889
2890 let mut i = omp_block_start;
2891 let mut j = omp_block_start + omp_block_size - 2 * prefetch_distance - 1;
2892 while i < j {
2893 let mut p0 = sa[i as usize];
2894 d += SaSint::from(p0 < 0);
2895 p0 &= SAINT_MAX;
2896 let p0u = p0 as usize;
2897 let v0 = buckets_index4(t[p0u - 1] as usize, usize::from(t[p0u - 2] >= t[p0u - 1]));
2898 let pos0 = buckets[v0] as usize;
2899 sa[pos0] = (p0 - 1) | (((buckets[2 + v0] != d) as SaSint) << (SAINT_BIT - 1));
2900 buckets[v0] += 1;
2901 buckets[2 + v0] = d;
2902
2903 let mut p1 = sa[(i + 1) as usize];
2904 d += SaSint::from(p1 < 0);
2905 p1 &= SAINT_MAX;
2906 let p1u = p1 as usize;
2907 let v1 = buckets_index4(t[p1u - 1] as usize, usize::from(t[p1u - 2] >= t[p1u - 1]));
2908 let pos1 = buckets[v1] as usize;
2909 sa[pos1] = (p1 - 1) | (((buckets[2 + v1] != d) as SaSint) << (SAINT_BIT - 1));
2910 buckets[v1] += 1;
2911 buckets[2 + v1] = d;
2912
2913 i += 2;
2914 }
2915
2916 j += 2 * prefetch_distance + 1;
2917 while i < j {
2918 let mut p = sa[i as usize];
2919 d += SaSint::from(p < 0);
2920 p &= SAINT_MAX;
2921 let pu = p as usize;
2922 let v = buckets_index4(t[pu - 1] as usize, usize::from(t[pu - 2] >= t[pu - 1]));
2923 let pos = buckets[v] as usize;
2924 sa[pos] = (p - 1) | (((buckets[2 + v] != d) as SaSint) << (SAINT_BIT - 1));
2925 buckets[v] += 1;
2926 buckets[2 + v] = d;
2927 i += 1;
2928 }
2929
2930 d
2931}
2932
2933#[doc(hidden)]
2935pub fn partial_sorting_scan_left_to_right_32s_4k(
2936 t: &[SaSint],
2937 sa: &mut [SaSint],
2938 k: SaSint,
2939 buckets: &mut [SaSint],
2940 mut d: SaSint,
2941 omp_block_start: FastSint,
2942 omp_block_size: FastSint,
2943) -> SaSint {
2944 let k_usize = usize::try_from(k).expect("k must be non-negative");
2945 let prefetch_distance: FastSint = 64;
2946 let induction_offset = 2 * k_usize;
2947 let mut i = omp_block_start;
2948 let mut j = omp_block_start + omp_block_size - 2 * prefetch_distance - 1;
2949
2950 while i < j {
2951 let i0 = i as usize;
2952 let mut p0 = sa[i0];
2953 sa[i0] = p0 & SAINT_MAX;
2954 if p0 > 0 {
2955 sa[i0] = 0;
2956 d += p0 >> (SUFFIX_GROUP_BIT - 1);
2957 p0 &= !SUFFIX_GROUP_MARKER;
2958 let p0u = p0 as usize;
2959 let c0 = t[p0u - 1];
2960 let f0 = usize::from(t[p0u - 2] < c0);
2961 let v0 = buckets_index2(c0 as usize, f0);
2962 let c0u = c0 as usize;
2963 let pos0 = buckets[induction_offset + c0u] as usize;
2964 sa[pos0] = (p0 - 1)
2965 | ((f0 as SaSint) << (SAINT_BIT - 1))
2966 | (((buckets[v0] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
2967 buckets[induction_offset + c0u] += 1;
2968 buckets[v0] = d;
2969 }
2970
2971 let i1 = (i + 1) as usize;
2972 let mut p1 = sa[i1];
2973 sa[i1] = p1 & SAINT_MAX;
2974 if p1 > 0 {
2975 sa[i1] = 0;
2976 d += p1 >> (SUFFIX_GROUP_BIT - 1);
2977 p1 &= !SUFFIX_GROUP_MARKER;
2978 let p1u = p1 as usize;
2979 let c1 = t[p1u - 1];
2980 let f1 = usize::from(t[p1u - 2] < c1);
2981 let v1 = buckets_index2(c1 as usize, f1);
2982 let c1u = c1 as usize;
2983 let pos1 = buckets[induction_offset + c1u] as usize;
2984 sa[pos1] = (p1 - 1)
2985 | ((f1 as SaSint) << (SAINT_BIT - 1))
2986 | (((buckets[v1] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
2987 buckets[induction_offset + c1u] += 1;
2988 buckets[v1] = d;
2989 }
2990
2991 i += 2;
2992 }
2993
2994 j += 2 * prefetch_distance + 1;
2995 while i < j {
2996 let iu = i as usize;
2997 let mut p = sa[iu];
2998 sa[iu] = p & SAINT_MAX;
2999 if p > 0 {
3000 sa[iu] = 0;
3001 d += p >> (SUFFIX_GROUP_BIT - 1);
3002 p &= !SUFFIX_GROUP_MARKER;
3003 let pu = p as usize;
3004 let c = t[pu - 1];
3005 let f = usize::from(t[pu - 2] < c);
3006 let v = buckets_index2(c as usize, f);
3007 let cu = c as usize;
3008 let pos = buckets[induction_offset + cu] as usize;
3009 sa[pos] = (p - 1)
3010 | ((f as SaSint) << (SAINT_BIT - 1))
3011 | (((buckets[v] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3012 buckets[induction_offset + cu] += 1;
3013 buckets[v] = d;
3014 }
3015 i += 1;
3016 }
3017
3018 d
3019}
3020
3021#[doc(hidden)]
3023pub fn partial_sorting_scan_left_to_right_32s_1k(
3024 t: &[SaSint],
3025 sa: &mut [SaSint],
3026 induction_bucket: &mut [SaSint],
3027 omp_block_start: FastSint,
3028 omp_block_size: FastSint,
3029) {
3030 let prefetch_distance = 64 as FastSint;
3031 let mut i = omp_block_start;
3032 let mut j = omp_block_start + omp_block_size - 2 * prefetch_distance - 1;
3033
3034 while i < j {
3035 let p0 = sa[i as usize];
3036 sa[i as usize] = p0 & SAINT_MAX;
3037 if p0 > 0 {
3038 sa[i as usize] = 0;
3039 let c0 = t[(p0 - 1) as usize] as usize;
3040 let pos0 = induction_bucket[c0] as usize;
3041 induction_bucket[c0] += 1;
3042 sa[pos0] = (p0 - 1)
3043 | ((usize::from(t[(p0 - 2) as usize] < t[(p0 - 1) as usize]) as SaSint)
3044 << (SAINT_BIT - 1));
3045 }
3046
3047 let p1 = sa[(i + 1) as usize];
3048 sa[(i + 1) as usize] = p1 & SAINT_MAX;
3049 if p1 > 0 {
3050 sa[(i + 1) as usize] = 0;
3051 let c1 = t[(p1 - 1) as usize] as usize;
3052 let pos1 = induction_bucket[c1] as usize;
3053 induction_bucket[c1] += 1;
3054 sa[pos1] = (p1 - 1)
3055 | ((usize::from(t[(p1 - 2) as usize] < t[(p1 - 1) as usize]) as SaSint)
3056 << (SAINT_BIT - 1));
3057 }
3058
3059 i += 2;
3060 }
3061
3062 j += 2 * prefetch_distance + 1;
3063 while i < j {
3064 let p = sa[i as usize];
3065 sa[i as usize] = p & SAINT_MAX;
3066 if p > 0 {
3067 sa[i as usize] = 0;
3068 let c = t[(p - 1) as usize] as usize;
3069 let pos = induction_bucket[c] as usize;
3070 induction_bucket[c] += 1;
3071 sa[pos] = (p - 1)
3072 | ((usize::from(t[(p - 2) as usize] < t[(p - 1) as usize]) as SaSint)
3073 << (SAINT_BIT - 1));
3074 }
3075 i += 1;
3076 }
3077}
3078
3079#[doc(hidden)]
3081pub fn partial_sorting_scan_left_to_right_32s_6k_omp(
3082 t: &[SaSint],
3083 sa: &mut [SaSint],
3084 n: SaSint,
3085 buckets: &mut [SaSint],
3086 left_suffixes_count: SaSint,
3087 mut d: SaSint,
3088 threads: SaSint,
3089 thread_state: &mut [ThreadState],
3090) -> SaSint {
3091 let v = buckets_index4(
3092 t[(n - 1) as usize] as usize,
3093 usize::from(t[(n - 2) as usize] >= t[(n - 1) as usize]),
3094 );
3095 let pos = buckets[v] as usize;
3096 sa[pos] = (n - 1) | SAINT_MIN;
3097 buckets[v] += 1;
3098 d += 1;
3099 buckets[2 + v] = d;
3100 if threads == 1 || left_suffixes_count < 65_536 {
3101 return partial_sorting_scan_left_to_right_32s_6k(
3102 t,
3103 sa,
3104 buckets,
3105 d,
3106 0,
3107 left_suffixes_count as FastSint,
3108 );
3109 }
3110 if thread_state.is_empty() {
3111 return partial_sorting_scan_left_to_right_32s_6k(
3112 t,
3113 sa,
3114 buckets,
3115 d,
3116 0,
3117 left_suffixes_count as FastSint,
3118 );
3119 }
3120
3121 let left_suffixes_count =
3122 usize::try_from(left_suffixes_count).expect("left_suffixes_count must be non-negative");
3123 let threads_usize = usize::try_from(threads)
3124 .expect("threads must be non-negative")
3125 .max(1);
3126 let mut block_start = 0usize;
3127 let block_span = threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE;
3128 let mut cache = vec![ThreadCache::default(); block_span];
3129 while block_start < left_suffixes_count {
3130 let mut block_end = block_start + block_span;
3131 if block_end > left_suffixes_count {
3132 block_end = left_suffixes_count;
3133 }
3134
3135 d = partial_sorting_scan_left_to_right_32s_6k_block_omp(
3136 t,
3137 sa,
3138 buckets,
3139 d,
3140 &mut cache,
3141 block_start as FastSint,
3142 (block_end - block_start) as FastSint,
3143 threads,
3144 );
3145
3146 block_start = block_end;
3147 }
3148
3149 d
3150}
3151
3152#[doc(hidden)]
3154pub fn partial_sorting_scan_left_to_right_32s_4k_omp(
3155 t: &[SaSint],
3156 sa: &mut [SaSint],
3157 n: SaSint,
3158 k: SaSint,
3159 buckets: &mut [SaSint],
3160 mut d: SaSint,
3161 threads: SaSint,
3162 thread_state: &mut [ThreadState],
3163) -> SaSint {
3164 let k_usize = usize::try_from(k).expect("k must be non-negative");
3165 let induction_offset = 2 * k_usize;
3166 let distinct_offset = 0usize;
3167 let symbol = t[(n - 1) as usize] as usize;
3168 let is_s = usize::from(t[(n - 2) as usize] < t[(n - 1) as usize]);
3169 let pos = buckets[induction_offset + symbol] as usize;
3170 sa[pos] = (n - 1) | ((is_s as SaSint) << (SAINT_BIT - 1)) | SUFFIX_GROUP_MARKER;
3171 buckets[induction_offset + symbol] += 1;
3172 d += 1;
3173 buckets[distinct_offset + buckets_index2(symbol, is_s)] = d;
3174
3175 if threads == 1 || n < 65_536 {
3176 d = partial_sorting_scan_left_to_right_32s_4k(t, sa, k, buckets, d, 0, n as FastSint);
3177 } else {
3178 if thread_state.is_empty() {
3179 return partial_sorting_scan_left_to_right_32s_4k(
3180 t,
3181 sa,
3182 k,
3183 buckets,
3184 d,
3185 0,
3186 n as FastSint,
3187 );
3188 }
3189 let mut block_start = 0usize;
3190 let n_usize = usize::try_from(n).expect("n must be non-negative");
3191 let threads_usize = usize::try_from(threads)
3192 .expect("threads must be non-negative")
3193 .max(1);
3194 let chunk_capacity = threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE;
3195 let mut cache = vec![ThreadCache::default(); chunk_capacity];
3196
3197 while block_start < n_usize {
3198 let mut block_end = block_start + chunk_capacity;
3199 if block_end > n_usize {
3200 block_end = n_usize;
3201 }
3202
3203 d = partial_sorting_scan_left_to_right_32s_4k_block_omp(
3204 t,
3205 sa,
3206 k,
3207 buckets,
3208 d,
3209 &mut cache,
3210 block_start as FastSint,
3211 (block_end - block_start) as FastSint,
3212 threads,
3213 );
3214
3215 block_start = block_end;
3216 }
3217 }
3218
3219 d
3220}
3221
3222#[doc(hidden)]
3224pub fn partial_sorting_scan_left_to_right_32s_1k_omp(
3225 t: &[SaSint],
3226 sa: &mut [SaSint],
3227 n: SaSint,
3228 buckets: &mut [SaSint],
3229 threads: SaSint,
3230 thread_state: &mut [ThreadState],
3231) {
3232 let symbol = t[(n - 1) as usize] as usize;
3233 let pos = buckets[symbol] as usize;
3234 sa[pos] = (n - 1)
3235 | ((usize::from(t[(n - 2) as usize] < t[(n - 1) as usize]) as SaSint) << (SAINT_BIT - 1));
3236 buckets[symbol] += 1;
3237 if threads == 1 || n < 65_536 {
3238 partial_sorting_scan_left_to_right_32s_1k(t, sa, buckets, 0, n as FastSint);
3239 } else {
3240 if thread_state.is_empty() {
3241 partial_sorting_scan_left_to_right_32s_1k(t, sa, buckets, 0, n as FastSint);
3242 return;
3243 }
3244 let n_usize = usize::try_from(n).expect("n must be non-negative");
3245 let threads_usize = usize::try_from(threads)
3246 .expect("threads must be non-negative")
3247 .max(1);
3248 let mut block_start = 0usize;
3249 let block_span = threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE;
3250 let mut cache = vec![ThreadCache::default(); block_span];
3251
3252 while block_start < n_usize {
3253 let mut block_end = block_start + block_span;
3254 if block_end > n_usize {
3255 block_end = n_usize;
3256 }
3257
3258 partial_sorting_scan_left_to_right_32s_1k_block_omp(
3259 t,
3260 sa,
3261 buckets,
3262 &mut cache,
3263 block_start as FastSint,
3264 (block_end - block_start) as FastSint,
3265 threads,
3266 );
3267
3268 block_start = block_end;
3269 }
3270 }
3271}
3272
3273#[doc(hidden)]
3275pub fn partial_sorting_scan_left_to_right_8u_block_prepare(
3276 t: &[u8],
3277 sa: &[SaSint],
3278 k: SaSint,
3279 buckets: &mut [SaSint],
3280 cache: &mut [ThreadCache],
3281 omp_block_start: FastSint,
3282 omp_block_size: FastSint,
3283) -> (FastSint, FastSint) {
3284 let k_usize = usize::try_from(k).expect("k must be non-negative");
3285 buckets[..2 * k_usize].fill(0);
3286 buckets[2 * k_usize..4 * k_usize].fill(0);
3287
3288 let mut i = omp_block_start;
3289 let mut j = omp_block_start + omp_block_size - 65;
3290 let mut count = 0usize;
3291 let mut d: SaSint = 1;
3292
3293 while i < j {
3294 let mut p0 = sa[i as usize];
3295 cache[count].index = p0;
3296 d += SaSint::from(p0 < 0);
3297 p0 &= SAINT_MAX;
3298 let v0 = buckets_index2(
3299 t[(p0 - 1) as usize] as usize,
3300 usize::from(t[(p0 - 2) as usize] >= t[(p0 - 1) as usize]),
3301 );
3302 cache[count].symbol = v0 as SaSint;
3303 count += 1;
3304 buckets[v0] += 1;
3305 buckets[2 * k_usize + v0] = d;
3306
3307 let mut p1 = sa[(i + 1) as usize];
3308 cache[count].index = p1;
3309 d += SaSint::from(p1 < 0);
3310 p1 &= SAINT_MAX;
3311 let v1 = buckets_index2(
3312 t[(p1 - 1) as usize] as usize,
3313 usize::from(t[(p1 - 2) as usize] >= t[(p1 - 1) as usize]),
3314 );
3315 cache[count].symbol = v1 as SaSint;
3316 count += 1;
3317 buckets[v1] += 1;
3318 buckets[2 * k_usize + v1] = d;
3319
3320 i += 2;
3321 }
3322
3323 j += 65;
3324 while i < j {
3325 let mut p = sa[i as usize];
3326 cache[count].index = p;
3327 d += SaSint::from(p < 0);
3328 p &= SAINT_MAX;
3329 let v = buckets_index2(
3330 t[(p - 1) as usize] as usize,
3331 usize::from(t[(p - 2) as usize] >= t[(p - 1) as usize]),
3332 );
3333 cache[count].symbol = v as SaSint;
3334 count += 1;
3335 buckets[v] += 1;
3336 buckets[2 * k_usize + v] = d;
3337 i += 1;
3338 }
3339
3340 (d as FastSint - 1, count as FastSint)
3341}
3342
3343#[doc(hidden)]
3345pub fn partial_sorting_scan_left_to_right_8u_block_place(
3346 sa: &mut [SaSint],
3347 buckets: &mut [SaSint],
3348 k: SaSint,
3349 cache: &[ThreadCache],
3350 count: FastSint,
3351 mut d: SaSint,
3352) {
3353 let split = 2 * usize::try_from(k).expect("k must be non-negative");
3354 let (induction_bucket, distinct_names) = buckets.split_at_mut(split);
3355
3356 let mut i = 0usize;
3357 let mut j = usize::try_from(count)
3358 .expect("count must be non-negative")
3359 .saturating_sub(1);
3360 while i < j {
3361 let p0 = cache[i].index;
3362 d += SaSint::from(p0 < 0);
3363 let v0 = cache[i].symbol as usize;
3364 let pos0 = induction_bucket[v0] as usize;
3365 sa[pos0] = (p0 - 1) | (((distinct_names[v0] != d) as SaSint) << (SAINT_BIT - 1));
3366 induction_bucket[v0] += 1;
3367 distinct_names[v0] = d;
3368
3369 let p1 = cache[i + 1].index;
3370 d += SaSint::from(p1 < 0);
3371 let v1 = cache[i + 1].symbol as usize;
3372 let pos1 = induction_bucket[v1] as usize;
3373 sa[pos1] = (p1 - 1) | (((distinct_names[v1] != d) as SaSint) << (SAINT_BIT - 1));
3374 induction_bucket[v1] += 1;
3375 distinct_names[v1] = d;
3376
3377 i += 2;
3378 }
3379
3380 j += 1;
3381 while i < j {
3382 let p = cache[i].index;
3383 d += SaSint::from(p < 0);
3384 let v = cache[i].symbol as usize;
3385 let pos = induction_bucket[v] as usize;
3386 sa[pos] = (p - 1) | (((distinct_names[v] != d) as SaSint) << (SAINT_BIT - 1));
3387 induction_bucket[v] += 1;
3388 distinct_names[v] = d;
3389 i += 1;
3390 }
3391}
3392
3393#[doc(hidden)]
3395pub fn partial_sorting_scan_left_to_right_8u_block_omp(
3396 t: &[u8],
3397 sa: &mut [SaSint],
3398 k: SaSint,
3399 buckets: &mut [SaSint],
3400 d: SaSint,
3401 block_start: FastSint,
3402 block_size: FastSint,
3403 threads: SaSint,
3404 thread_state: &mut [ThreadState],
3405) -> SaSint {
3406 let mut d = d;
3407 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
3408 let k_usize = usize::try_from(k).expect("k must be non-negative");
3409 let omp_num_threads = if threads > 1 && block_size_usize >= 64 * k_usize.max(256) {
3410 usize::try_from(threads)
3411 .expect("threads must be non-negative")
3412 .min(thread_state.len())
3413 .max(1)
3414 } else {
3415 1
3416 };
3417 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
3418
3419 if omp_num_threads == 1 {
3420 return partial_sorting_scan_left_to_right_8u(t, sa, buckets, d, block_start, block_size);
3421 }
3422
3423 for omp_thread_num in 0..omp_num_threads {
3424 let mut omp_block_start = omp_thread_num * omp_block_stride;
3425 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
3426 omp_block_stride
3427 } else {
3428 block_size_usize - omp_block_start
3429 };
3430 omp_block_start += usize::try_from(block_start).expect("block_start must be non-negative");
3431
3432 let state = &mut thread_state[omp_thread_num];
3433 let (position, count) = partial_sorting_scan_left_to_right_8u_block_prepare(
3434 t,
3435 sa,
3436 k,
3437 &mut state.buckets,
3438 &mut state.cache,
3439 FastSint::try_from(omp_block_start).expect("block start must fit FastSint"),
3440 FastSint::try_from(omp_block_size).expect("block size must fit FastSint"),
3441 );
3442 state.position = position;
3443 state.count = count;
3444 }
3445
3446 let induction_offset = 4 * ALPHABET_SIZE;
3447 let distinct_offset = 2 * ALPHABET_SIZE;
3448 let (prefix, induction_tail) = buckets.split_at_mut(induction_offset);
3449 let induction_bucket = &mut induction_tail[..2 * k_usize];
3450 let distinct_names = &mut prefix[distinct_offset..distinct_offset + 2 * k_usize];
3451
3452 for tnum in 0..omp_num_threads {
3453 let state = &mut thread_state[tnum];
3454 let (temp_induction_bucket, temp_tail) = state.buckets.split_at_mut(2 * k_usize);
3455 let temp_distinct_names = &mut temp_tail[..2 * k_usize];
3456
3457 for c in 0..2 * k_usize {
3458 let a = induction_bucket[c];
3459 let b = temp_induction_bucket[c];
3460 induction_bucket[c] = a + b;
3461 temp_induction_bucket[c] = a;
3462 }
3463
3464 d -= 1;
3465 for c in 0..2 * k_usize {
3466 let a = distinct_names[c];
3467 let b = temp_distinct_names[c];
3468 let next_d = b + d;
3469 distinct_names[c] = if b > 0 { next_d } else { a };
3470 temp_distinct_names[c] = a;
3471 }
3472 d += 1 + SaSint::try_from(state.position).expect("position must fit SaSint");
3473 state.position = FastSint::try_from(d).expect("d must fit FastSint") - state.position;
3474 }
3475
3476 for tnum in 0..omp_num_threads {
3477 let state = &mut thread_state[tnum];
3478 partial_sorting_scan_left_to_right_8u_block_place(
3479 sa,
3480 &mut state.buckets,
3481 k,
3482 &state.cache,
3483 state.count,
3484 state.position as SaSint,
3485 );
3486 }
3487
3488 d
3489}
3490
3491#[doc(hidden)]
3493pub fn partial_sorting_shift_markers_8u_omp(
3494 sa: &mut [SaSint],
3495 n: SaSint,
3496 buckets: &[SaSint],
3497 threads: SaSint,
3498) {
3499 let temp_bucket = &buckets[4 * ALPHABET_SIZE..];
3500 let thread_count = if threads > 1 && n >= 65536 {
3501 usize::try_from(threads).expect("threads must be positive")
3502 } else {
3503 1
3504 };
3505 let c_step = buckets_index2(1, 0) as isize;
3506 let c_min = buckets_index2(1, 0) as isize;
3507 let c_max = buckets_index2(ALPHABET_SIZE - 1, 0) as isize;
3508 for t in 0..thread_count {
3509 let mut c = c_max - (t as isize * c_step);
3510 while c >= c_min {
3511 let c_usize = c as usize;
3512 let mut i = temp_bucket[c_usize] as isize - 1;
3513 let mut j = buckets[c_usize - buckets_index2(1, 0)] as isize + 3;
3514 let mut s = SAINT_MIN;
3515
3516 while i >= j {
3517 let p0 = sa[i as usize];
3518 let q0 = (p0 & SAINT_MIN) ^ s;
3519 s ^= q0;
3520 sa[i as usize] = p0 ^ q0;
3521
3522 let p1 = sa[(i - 1) as usize];
3523 let q1 = (p1 & SAINT_MIN) ^ s;
3524 s ^= q1;
3525 sa[(i - 1) as usize] = p1 ^ q1;
3526
3527 let p2 = sa[(i - 2) as usize];
3528 let q2 = (p2 & SAINT_MIN) ^ s;
3529 s ^= q2;
3530 sa[(i - 2) as usize] = p2 ^ q2;
3531
3532 let p3 = sa[(i - 3) as usize];
3533 let q3 = (p3 & SAINT_MIN) ^ s;
3534 s ^= q3;
3535 sa[(i - 3) as usize] = p3 ^ q3;
3536
3537 i -= 4;
3538 }
3539
3540 j -= 3;
3541 while i >= j {
3542 let p = sa[i as usize];
3543 let q = (p & SAINT_MIN) ^ s;
3544 s ^= q;
3545 sa[i as usize] = p ^ q;
3546 i -= 1;
3547 }
3548
3549 c -= c_step * thread_count as isize;
3550 }
3551 }
3552}
3553
3554#[doc(hidden)]
3556pub fn partial_sorting_shift_markers_32s_6k_omp(
3557 sa: &mut [SaSint],
3558 k: SaSint,
3559 buckets: &[SaSint],
3560 threads: SaSint,
3561) {
3562 let k_usize = usize::try_from(k).expect("k must be non-negative");
3563 let temp_bucket = &buckets[4 * k_usize..];
3564 let thread_count = if threads > 1 && k >= 65536 {
3565 usize::try_from(threads).expect("threads must be positive")
3566 } else {
3567 1
3568 };
3569 for t in 0..thread_count {
3570 let mut c = k_usize as isize - 1 - t as isize;
3571 while c >= 1 {
3572 let c_usize = c as usize;
3573 let mut i = buckets[buckets_index4(c_usize, 0)] as isize - 1;
3574 let mut j = temp_bucket[buckets_index2(c_usize - 1, 0)] as isize + 3;
3575 let mut s = SAINT_MIN;
3576
3577 while i >= j {
3578 let p0 = sa[i as usize];
3579 let q0 = (p0 & SAINT_MIN) ^ s;
3580 s ^= q0;
3581 sa[i as usize] = p0 ^ q0;
3582
3583 let p1 = sa[(i - 1) as usize];
3584 let q1 = (p1 & SAINT_MIN) ^ s;
3585 s ^= q1;
3586 sa[(i - 1) as usize] = p1 ^ q1;
3587
3588 let p2 = sa[(i - 2) as usize];
3589 let q2 = (p2 & SAINT_MIN) ^ s;
3590 s ^= q2;
3591 sa[(i - 2) as usize] = p2 ^ q2;
3592
3593 let p3 = sa[(i - 3) as usize];
3594 let q3 = (p3 & SAINT_MIN) ^ s;
3595 s ^= q3;
3596 sa[(i - 3) as usize] = p3 ^ q3;
3597
3598 i -= 4;
3599 }
3600
3601 j -= 3;
3602 while i >= j {
3603 let p = sa[i as usize];
3604 let q = (p & SAINT_MIN) ^ s;
3605 s ^= q;
3606 sa[i as usize] = p ^ q;
3607 i -= 1;
3608 }
3609
3610 c -= thread_count as isize;
3611 }
3612 }
3613}
3614
3615#[doc(hidden)]
3617pub fn partial_sorting_shift_markers_32s_4k(sa: &mut [SaSint], n: SaSint) {
3618 let mut i = n as isize - 1;
3619 let mut s = SUFFIX_GROUP_MARKER;
3620 while i >= 3 {
3621 let p0 = sa[i as usize];
3622 let q0 =
3623 ((p0 & SUFFIX_GROUP_MARKER) ^ s) & (((p0 > 0) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3624 s ^= q0;
3625 sa[i as usize] = p0 ^ q0;
3626
3627 let p1 = sa[(i - 1) as usize];
3628 let q1 =
3629 ((p1 & SUFFIX_GROUP_MARKER) ^ s) & (((p1 > 0) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3630 s ^= q1;
3631 sa[(i - 1) as usize] = p1 ^ q1;
3632
3633 let p2 = sa[(i - 2) as usize];
3634 let q2 =
3635 ((p2 & SUFFIX_GROUP_MARKER) ^ s) & (((p2 > 0) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3636 s ^= q2;
3637 sa[(i - 2) as usize] = p2 ^ q2;
3638
3639 let p3 = sa[(i - 3) as usize];
3640 let q3 =
3641 ((p3 & SUFFIX_GROUP_MARKER) ^ s) & (((p3 > 0) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3642 s ^= q3;
3643 sa[(i - 3) as usize] = p3 ^ q3;
3644
3645 i -= 4;
3646 }
3647
3648 while i >= 0 {
3649 let p = sa[i as usize];
3650 let q = ((p & SUFFIX_GROUP_MARKER) ^ s) & (((p > 0) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3651 s ^= q;
3652 sa[i as usize] = p ^ q;
3653 i -= 1;
3654 }
3655}
3656
3657#[doc(hidden)]
3659pub fn partial_sorting_shift_buckets_32s_6k(k: SaSint, buckets: &mut [SaSint]) {
3660 let k_usize = usize::try_from(k).expect("k must be non-negative");
3661 let temp_offset = 4 * k_usize;
3662 for i in 0..k_usize {
3663 let src = buckets_index2(i, 0);
3664 let dst = 2 * src;
3665 buckets[dst] = buckets[temp_offset + src];
3666 buckets[dst + 1] = buckets[temp_offset + src + 1];
3667 }
3668}
3669
3670#[doc(hidden)]
3672pub fn partial_sorting_scan_right_to_left_8u(
3673 t: &[u8],
3674 sa: &mut [SaSint],
3675 buckets: &mut [SaSint],
3676 mut d: SaSint,
3677 omp_block_start: FastSint,
3678 omp_block_size: FastSint,
3679) -> SaSint {
3680 if omp_block_size <= 0 {
3681 return d;
3682 }
3683
3684 let prefetch_distance = 64usize;
3685 let (induction_bucket, distinct_names_all) = buckets.split_at_mut(2 * ALPHABET_SIZE);
3686 let distinct_names = &mut distinct_names_all[..2 * ALPHABET_SIZE];
3687
3688 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
3689 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
3690 let mut i = start + size - 1;
3691 let mut j = start + prefetch_distance + 1;
3692
3693 while i >= j {
3694 let mut p0 = sa[i];
3695 d += SaSint::from(p0 < 0);
3696 p0 &= SAINT_MAX;
3697
3698 let p0_usize = p0 as usize;
3699 let v0 = buckets_index2(
3700 t[p0_usize - 1] as usize,
3701 usize::from(t[p0_usize - 2] > t[p0_usize - 1]),
3702 );
3703
3704 induction_bucket[v0] -= 1;
3705 let slot0 = induction_bucket[v0] as usize;
3706 sa[slot0] = (p0 - 1) | (((distinct_names[v0] != d) as SaSint) << (SAINT_BIT - 1));
3707 distinct_names[v0] = d;
3708
3709 let mut p1 = sa[i - 1];
3710 d += SaSint::from(p1 < 0);
3711 p1 &= SAINT_MAX;
3712
3713 let p1_usize = p1 as usize;
3714 let v1 = buckets_index2(
3715 t[p1_usize - 1] as usize,
3716 usize::from(t[p1_usize - 2] > t[p1_usize - 1]),
3717 );
3718
3719 induction_bucket[v1] -= 1;
3720 let slot1 = induction_bucket[v1] as usize;
3721 sa[slot1] = (p1 - 1) | (((distinct_names[v1] != d) as SaSint) << (SAINT_BIT - 1));
3722 distinct_names[v1] = d;
3723
3724 i -= 2;
3725 }
3726
3727 j = if start + prefetch_distance < start + size {
3728 start
3729 } else {
3730 start
3731 };
3732 while i >= j {
3733 let mut p = sa[i];
3734 d += SaSint::from(p < 0);
3735 p &= SAINT_MAX;
3736
3737 let p_usize = p as usize;
3738 let v = buckets_index2(
3739 t[p_usize - 1] as usize,
3740 usize::from(t[p_usize - 2] > t[p_usize - 1]),
3741 );
3742
3743 induction_bucket[v] -= 1;
3744 let slot = induction_bucket[v] as usize;
3745 sa[slot] = (p - 1) | (((distinct_names[v] != d) as SaSint) << (SAINT_BIT - 1));
3746 distinct_names[v] = d;
3747
3748 if i == 0 {
3749 break;
3750 }
3751 i -= 1;
3752 }
3753
3754 d
3755}
3756
3757#[doc(hidden)]
3759pub fn partial_gsa_scan_right_to_left_8u(
3760 t: &[u8],
3761 sa: &mut [SaSint],
3762 buckets: &mut [SaSint],
3763 mut d: SaSint,
3764 omp_block_start: FastSint,
3765 omp_block_size: FastSint,
3766) -> SaSint {
3767 if omp_block_size <= 0 {
3768 return d;
3769 }
3770
3771 let prefetch_distance = 64usize;
3772 let (induction_bucket, distinct_names_all) = buckets.split_at_mut(2 * ALPHABET_SIZE);
3773 let distinct_names = &mut distinct_names_all[..2 * ALPHABET_SIZE];
3774
3775 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
3776 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
3777 let mut i = start + size - 1;
3778 let mut j = start + prefetch_distance + 1;
3779
3780 while i >= j {
3781 let mut p0 = sa[i];
3782 d += SaSint::from(p0 < 0);
3783 p0 &= SAINT_MAX;
3784
3785 let p0_usize = p0 as usize;
3786 let v0 = buckets_index2(
3787 t[p0_usize - 1] as usize,
3788 usize::from(t[p0_usize - 2] > t[p0_usize - 1]),
3789 );
3790
3791 if v0 != 1 {
3792 induction_bucket[v0] -= 1;
3793 let slot0 = induction_bucket[v0] as usize;
3794 sa[slot0] = (p0 - 1) | (((distinct_names[v0] != d) as SaSint) << (SAINT_BIT - 1));
3795 distinct_names[v0] = d;
3796 }
3797
3798 let mut p1 = sa[i - 1];
3799 d += SaSint::from(p1 < 0);
3800 p1 &= SAINT_MAX;
3801
3802 let p1_usize = p1 as usize;
3803 let v1 = buckets_index2(
3804 t[p1_usize - 1] as usize,
3805 usize::from(t[p1_usize - 2] > t[p1_usize - 1]),
3806 );
3807
3808 if v1 != 1 {
3809 induction_bucket[v1] -= 1;
3810 let slot1 = induction_bucket[v1] as usize;
3811 sa[slot1] = (p1 - 1) | (((distinct_names[v1] != d) as SaSint) << (SAINT_BIT - 1));
3812 distinct_names[v1] = d;
3813 }
3814
3815 i -= 2;
3816 }
3817
3818 j = start;
3819 while i >= j {
3820 let mut p = sa[i];
3821 d += SaSint::from(p < 0);
3822 p &= SAINT_MAX;
3823
3824 let p_usize = p as usize;
3825 let v = buckets_index2(
3826 t[p_usize - 1] as usize,
3827 usize::from(t[p_usize - 2] > t[p_usize - 1]),
3828 );
3829
3830 if v != 1 {
3831 induction_bucket[v] -= 1;
3832 let slot = induction_bucket[v] as usize;
3833 sa[slot] = (p - 1) | (((distinct_names[v] != d) as SaSint) << (SAINT_BIT - 1));
3834 distinct_names[v] = d;
3835 }
3836
3837 if i == 0 {
3838 break;
3839 }
3840 i -= 1;
3841 }
3842
3843 d
3844}
3845
3846#[doc(hidden)]
3848pub fn partial_sorting_scan_right_to_left_8u_block_prepare(
3849 t: &[u8],
3850 sa: &[SaSint],
3851 k: SaSint,
3852 buckets: &mut [SaSint],
3853 cache: &mut [ThreadCache],
3854 omp_block_start: FastSint,
3855 omp_block_size: FastSint,
3856) -> (FastSint, FastSint) {
3857 let k_usize = usize::try_from(k).expect("k must be non-negative");
3858 let (induction_bucket, distinct_names_all) = buckets.split_at_mut(2 * k_usize);
3859 let distinct_names = &mut distinct_names_all[..2 * k_usize];
3860 induction_bucket.fill(0);
3861 distinct_names.fill(0);
3862
3863 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
3864 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
3865 let mut count = 0usize;
3866 let mut d = 1;
3867
3868 let mut i = start + size;
3869 while i > start {
3870 i -= 1;
3871
3872 let mut p = sa[i];
3873 cache[count].index = p;
3874 d += SaSint::from(p < 0);
3875 p &= SAINT_MAX;
3876
3877 let p_usize = usize::try_from(p).expect("suffix index must be non-negative");
3878 let v = buckets_index2(
3879 t[p_usize - 1] as usize,
3880 usize::from(t[p_usize - 2] > t[p_usize - 1]),
3881 );
3882
3883 cache[count].symbol = v as SaSint;
3884 induction_bucket[v] += 1;
3885 distinct_names[v] = d;
3886 count += 1;
3887 }
3888
3889 ((d - 1) as FastSint, count as FastSint)
3890}
3891
3892#[doc(hidden)]
3894pub fn partial_sorting_scan_right_to_left_8u_block_place(
3895 sa: &mut [SaSint],
3896 buckets: &mut [SaSint],
3897 k: SaSint,
3898 cache: &[ThreadCache],
3899 count: FastSint,
3900 mut d: SaSint,
3901) {
3902 let split = 2 * usize::try_from(k).expect("k must be non-negative");
3903 let (induction_bucket, distinct_names) = buckets.split_at_mut(split);
3904
3905 let count = usize::try_from(count).expect("count must be non-negative");
3906 for entry in &cache[..count] {
3907 let p = entry.index;
3908 d += SaSint::from(p < 0);
3909 let v = usize::try_from(entry.symbol).expect("cache symbol must be non-negative");
3910 induction_bucket[v] -= 1;
3911 let slot = usize::try_from(induction_bucket[v]).expect("bucket slot must be non-negative");
3912 sa[slot] = (p - 1) | (((distinct_names[v] != d) as SaSint) << (SAINT_BIT - 1));
3913 distinct_names[v] = d;
3914 }
3915}
3916
3917#[doc(hidden)]
3919pub fn partial_gsa_scan_right_to_left_8u_block_place(
3920 sa: &mut [SaSint],
3921 buckets: &mut [SaSint],
3922 k: SaSint,
3923 cache: &[ThreadCache],
3924 count: FastSint,
3925 mut d: SaSint,
3926) {
3927 let split = 2 * usize::try_from(k).expect("k must be non-negative");
3928 let (induction_bucket, distinct_names) = buckets.split_at_mut(split);
3929
3930 let count = usize::try_from(count).expect("count must be non-negative");
3931 for entry in &cache[..count] {
3932 let p = entry.index;
3933 d += SaSint::from(p < 0);
3934 let v = usize::try_from(entry.symbol).expect("cache symbol must be non-negative");
3935 if v != 1 {
3936 induction_bucket[v] -= 1;
3937 let slot =
3938 usize::try_from(induction_bucket[v]).expect("bucket slot must be non-negative");
3939 sa[slot] = (p - 1) | (((distinct_names[v] != d) as SaSint) << (SAINT_BIT - 1));
3940 distinct_names[v] = d;
3941 }
3942 }
3943}
3944
3945#[doc(hidden)]
3947pub fn partial_sorting_scan_right_to_left_8u_block_omp(
3948 t: &[u8],
3949 sa: &mut [SaSint],
3950 k: SaSint,
3951 buckets: &mut [SaSint],
3952 d: SaSint,
3953 block_start: FastSint,
3954 block_size: FastSint,
3955 threads: SaSint,
3956 thread_state: &mut [ThreadState],
3957) -> SaSint {
3958 let mut d = d;
3959 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
3960 let k_usize = usize::try_from(k).expect("k must be non-negative");
3961 let omp_num_threads = if threads > 1 && block_size_usize >= 64 * k_usize.max(256) {
3962 usize::try_from(threads)
3963 .expect("threads must be non-negative")
3964 .min(thread_state.len())
3965 .max(1)
3966 } else {
3967 1
3968 };
3969 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
3970
3971 if omp_num_threads == 1 {
3972 return partial_sorting_scan_right_to_left_8u(t, sa, buckets, d, block_start, block_size);
3973 }
3974
3975 for omp_thread_num in 0..omp_num_threads {
3976 let mut omp_block_start = omp_thread_num * omp_block_stride;
3977 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
3978 omp_block_stride
3979 } else {
3980 block_size_usize - omp_block_start
3981 };
3982 omp_block_start += usize::try_from(block_start).expect("block_start must be non-negative");
3983
3984 let state = &mut thread_state[omp_thread_num];
3985 let (position, count) = partial_sorting_scan_right_to_left_8u_block_prepare(
3986 t,
3987 sa,
3988 k,
3989 &mut state.buckets,
3990 &mut state.cache,
3991 FastSint::try_from(omp_block_start).expect("block start must fit FastSint"),
3992 FastSint::try_from(omp_block_size).expect("block size must fit FastSint"),
3993 );
3994 state.position = position;
3995 state.count = count;
3996 }
3997
3998 let distinct_offset = 2 * ALPHABET_SIZE;
3999 let (induction_bucket, distinct_tail) = buckets.split_at_mut(distinct_offset);
4000 let distinct_names = &mut distinct_tail[..2 * k_usize];
4001
4002 for tnum in (0..omp_num_threads).rev() {
4003 let state = &mut thread_state[tnum];
4004 let (temp_induction_bucket, temp_tail) = state.buckets.split_at_mut(2 * k_usize);
4005 let temp_distinct_names = &mut temp_tail[..2 * k_usize];
4006
4007 for c in 0..2 * k_usize {
4008 let a = induction_bucket[c];
4009 let b = temp_induction_bucket[c];
4010 induction_bucket[c] = a - b;
4011 temp_induction_bucket[c] = a;
4012 }
4013
4014 d -= 1;
4015 for c in 0..2 * k_usize {
4016 let a = distinct_names[c];
4017 let b = temp_distinct_names[c];
4018 let next_d = b + d;
4019 distinct_names[c] = if b > 0 { next_d } else { a };
4020 temp_distinct_names[c] = a;
4021 }
4022 d += 1 + SaSint::try_from(state.position).expect("position must fit SaSint");
4023 state.position = FastSint::try_from(d).expect("d must fit FastSint") - state.position;
4024 }
4025
4026 for tnum in 0..omp_num_threads {
4027 let state = &mut thread_state[tnum];
4028 partial_sorting_scan_right_to_left_8u_block_place(
4029 sa,
4030 &mut state.buckets,
4031 k,
4032 &state.cache,
4033 state.count,
4034 state.position as SaSint,
4035 );
4036 }
4037
4038 d
4039}
4040
4041#[doc(hidden)]
4043pub fn partial_gsa_scan_right_to_left_8u_block_omp(
4044 t: &[u8],
4045 sa: &mut [SaSint],
4046 k: SaSint,
4047 buckets: &mut [SaSint],
4048 d: SaSint,
4049 block_start: FastSint,
4050 block_size: FastSint,
4051 threads: SaSint,
4052 thread_state: &mut [ThreadState],
4053) -> SaSint {
4054 let mut d = d;
4055 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
4056 let k_usize = usize::try_from(k).expect("k must be non-negative");
4057 let omp_num_threads = if threads > 1 && block_size_usize >= 64 * k_usize.max(256) {
4058 usize::try_from(threads)
4059 .expect("threads must be non-negative")
4060 .min(thread_state.len())
4061 .max(1)
4062 } else {
4063 1
4064 };
4065 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
4066
4067 if omp_num_threads == 1 {
4068 return partial_gsa_scan_right_to_left_8u(t, sa, buckets, d, block_start, block_size);
4069 }
4070
4071 for omp_thread_num in 0..omp_num_threads {
4072 let mut omp_block_start = omp_thread_num * omp_block_stride;
4073 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4074 omp_block_stride
4075 } else {
4076 block_size_usize - omp_block_start
4077 };
4078 omp_block_start += usize::try_from(block_start).expect("block_start must be non-negative");
4079
4080 let state = &mut thread_state[omp_thread_num];
4081 let (position, count) = partial_sorting_scan_right_to_left_8u_block_prepare(
4082 t,
4083 sa,
4084 k,
4085 &mut state.buckets,
4086 &mut state.cache,
4087 FastSint::try_from(omp_block_start).expect("block start must fit FastSint"),
4088 FastSint::try_from(omp_block_size).expect("block size must fit FastSint"),
4089 );
4090 state.position = position;
4091 state.count = count;
4092 }
4093
4094 let distinct_offset = 2 * ALPHABET_SIZE;
4095 let (induction_bucket, distinct_tail) = buckets.split_at_mut(distinct_offset);
4096 let distinct_names = &mut distinct_tail[..2 * k_usize];
4097
4098 for tnum in (0..omp_num_threads).rev() {
4099 let state = &mut thread_state[tnum];
4100 let (temp_induction_bucket, temp_tail) = state.buckets.split_at_mut(2 * k_usize);
4101 let temp_distinct_names = &mut temp_tail[..2 * k_usize];
4102
4103 for c in 0..2 * k_usize {
4104 let a = induction_bucket[c];
4105 let b = temp_induction_bucket[c];
4106 induction_bucket[c] = a - b;
4107 temp_induction_bucket[c] = a;
4108 }
4109
4110 d -= 1;
4111 for c in 0..2 * k_usize {
4112 let a = distinct_names[c];
4113 let b = temp_distinct_names[c];
4114 let next_d = b + d;
4115 distinct_names[c] = if b > 0 { next_d } else { a };
4116 temp_distinct_names[c] = a;
4117 }
4118 d += 1 + SaSint::try_from(state.position).expect("position must fit SaSint");
4119 state.position = FastSint::try_from(d).expect("d must fit FastSint") - state.position;
4120 }
4121
4122 for tnum in 0..omp_num_threads {
4123 let state = &mut thread_state[tnum];
4124 partial_gsa_scan_right_to_left_8u_block_place(
4125 sa,
4126 &mut state.buckets,
4127 k,
4128 &state.cache,
4129 state.count,
4130 state.position as SaSint,
4131 );
4132 }
4133
4134 d
4135}
4136
4137#[doc(hidden)]
4139pub fn partial_sorting_scan_right_to_left_8u_omp(
4140 t: &[u8],
4141 sa: &mut [SaSint],
4142 n: SaSint,
4143 k: SaSint,
4144 buckets: &mut [SaSint],
4145 first_lms_suffix: SaSint,
4146 left_suffixes_count: SaSint,
4147 mut d: SaSint,
4148 threads: SaSint,
4149 thread_state: &mut [ThreadState],
4150) {
4151 let scan_start = left_suffixes_count as FastSint + 1;
4152 let scan_end = n as FastSint - first_lms_suffix as FastSint;
4153
4154 if threads == 1 || (scan_end - scan_start) < 65_536 {
4155 let _ = partial_sorting_scan_right_to_left_8u(
4156 t,
4157 sa,
4158 buckets,
4159 d,
4160 scan_start,
4161 scan_end - scan_start,
4162 );
4163 return;
4164 }
4165
4166 let distinct_offset = 2 * ALPHABET_SIZE;
4167
4168 let mut block_start = usize::try_from(scan_end - 1).expect("scan end must be positive");
4169 let scan_start_usize = usize::try_from(scan_start).expect("scan_start must be non-negative");
4170 let threads_usize = usize::try_from(threads)
4171 .expect("threads must be non-negative")
4172 .min(thread_state.len())
4173 .max(1);
4174
4175 while block_start >= scan_start_usize {
4176 if sa[block_start] == 0 {
4177 if block_start == 0 {
4178 break;
4179 }
4180 block_start -= 1;
4181 } else {
4182 let mut block_max_end = block_start.saturating_sub(
4183 threads_usize * (LIBSAIS_PER_THREAD_CACHE_SIZE - 16 * threads_usize),
4184 );
4185 if block_max_end + 1 < scan_start_usize {
4186 block_max_end = scan_start_usize.saturating_sub(1);
4187 }
4188 let mut block_end = block_start - 1;
4189 while block_end > block_max_end && sa[block_end] != 0 {
4190 block_end -= 1;
4191 }
4192 let block_size = block_start - block_end;
4193
4194 if block_size < 32 {
4195 while block_start > block_end {
4196 let p = sa[block_start];
4197 d += SaSint::from(p < 0);
4198 let p = p & SAINT_MAX;
4199 let v = buckets_index2(
4200 t[(p - 1) as usize] as usize,
4201 usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
4202 );
4203 buckets[v] -= 1;
4204 let slot =
4205 usize::try_from(buckets[v]).expect("bucket slot must be non-negative");
4206 sa[slot] = (p - 1)
4207 | (((buckets[distinct_offset + v] != d) as SaSint) << (SAINT_BIT - 1));
4208 buckets[distinct_offset + v] = d;
4209
4210 if block_start == 0 {
4211 break;
4212 }
4213 block_start -= 1;
4214 }
4215 } else {
4216 d = partial_sorting_scan_right_to_left_8u_block_omp(
4217 t,
4218 sa,
4219 k,
4220 buckets,
4221 d,
4222 FastSint::try_from(block_end + 1).expect("block start must fit FastSint"),
4223 FastSint::try_from(block_size).expect("block size must fit FastSint"),
4224 threads,
4225 thread_state,
4226 );
4227 block_start = block_end;
4228 }
4229 }
4230 }
4231}
4232
4233#[doc(hidden)]
4235pub fn partial_gsa_scan_right_to_left_8u_omp(
4236 t: &[u8],
4237 sa: &mut [SaSint],
4238 n: SaSint,
4239 k: SaSint,
4240 buckets: &mut [SaSint],
4241 first_lms_suffix: SaSint,
4242 left_suffixes_count: SaSint,
4243 mut d: SaSint,
4244 threads: SaSint,
4245 thread_state: &mut [ThreadState],
4246) {
4247 let scan_start = left_suffixes_count as FastSint + 1;
4248 let scan_end = n as FastSint - first_lms_suffix as FastSint;
4249
4250 if threads == 1 || (scan_end - scan_start) < 65_536 {
4251 let _ =
4252 partial_gsa_scan_right_to_left_8u(t, sa, buckets, d, scan_start, scan_end - scan_start);
4253 return;
4254 }
4255
4256 let distinct_offset = 2 * ALPHABET_SIZE;
4257 let mut block_start = usize::try_from(scan_end - 1).expect("scan end must be positive");
4258 let scan_start_usize = usize::try_from(scan_start).expect("scan_start must be non-negative");
4259 let threads_usize = usize::try_from(threads)
4260 .expect("threads must be non-negative")
4261 .min(thread_state.len())
4262 .max(1);
4263
4264 while block_start >= scan_start_usize {
4265 if sa[block_start] == 0 {
4266 if block_start == 0 {
4267 break;
4268 }
4269 block_start -= 1;
4270 } else {
4271 let mut block_max_end = block_start.saturating_sub(
4272 threads_usize * (LIBSAIS_PER_THREAD_CACHE_SIZE - 16 * threads_usize),
4273 );
4274 if block_max_end + 1 < scan_start_usize {
4275 block_max_end = scan_start_usize.saturating_sub(1);
4276 }
4277 let mut block_end = block_start - 1;
4278 while block_end > block_max_end && sa[block_end] != 0 {
4279 block_end -= 1;
4280 }
4281 let block_size = block_start - block_end;
4282
4283 if block_size < 32 {
4284 while block_start > block_end {
4285 let p = sa[block_start];
4286 d += SaSint::from(p < 0);
4287 let p = p & SAINT_MAX;
4288 let v = buckets_index2(
4289 t[(p - 1) as usize] as usize,
4290 usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
4291 );
4292 if v != 1 {
4293 buckets[v] -= 1;
4294 let slot =
4295 usize::try_from(buckets[v]).expect("bucket slot must be non-negative");
4296 sa[slot] = (p - 1)
4297 | (((buckets[distinct_offset + v] != d) as SaSint) << (SAINT_BIT - 1));
4298 buckets[distinct_offset + v] = d;
4299 }
4300
4301 if block_start == 0 {
4302 break;
4303 }
4304 block_start -= 1;
4305 }
4306 } else {
4307 d = partial_gsa_scan_right_to_left_8u_block_omp(
4308 t,
4309 sa,
4310 k,
4311 buckets,
4312 d,
4313 FastSint::try_from(block_end + 1).expect("block start must fit FastSint"),
4314 FastSint::try_from(block_size).expect("block size must fit FastSint"),
4315 threads,
4316 thread_state,
4317 );
4318 block_start = block_end;
4319 }
4320 }
4321 }
4322}
4323
4324#[doc(hidden)]
4326pub fn partial_sorting_scan_right_to_left_32s_6k(
4327 t: &[SaSint],
4328 sa: &mut [SaSint],
4329 buckets: &mut [SaSint],
4330 mut d: SaSint,
4331 omp_block_start: FastSint,
4332 omp_block_size: FastSint,
4333) -> SaSint {
4334 if omp_block_size <= 0 {
4335 return d;
4336 }
4337
4338 let prefetch_distance: FastSint = 64;
4339 let mut i = omp_block_start + omp_block_size - 1;
4340 let mut j = omp_block_start + 2 * prefetch_distance + 1;
4341
4342 while i >= j {
4343 let mut p0 = sa[i as usize];
4344 d += SaSint::from(p0 < 0);
4345 p0 &= SAINT_MAX;
4346 let p0u = p0 as usize;
4347 let v0 = buckets_index4(t[p0u - 1] as usize, usize::from(t[p0u - 2] > t[p0u - 1]));
4348 buckets[v0] -= 1;
4349 let slot0 = buckets[v0] as usize;
4350 sa[slot0] = (p0 - 1) | (((buckets[2 + v0] != d) as SaSint) << (SAINT_BIT - 1));
4351 buckets[2 + v0] = d;
4352
4353 let mut p1 = sa[(i - 1) as usize];
4354 d += SaSint::from(p1 < 0);
4355 p1 &= SAINT_MAX;
4356 let p1u = p1 as usize;
4357 let v1 = buckets_index4(t[p1u - 1] as usize, usize::from(t[p1u - 2] > t[p1u - 1]));
4358 buckets[v1] -= 1;
4359 let slot1 = buckets[v1] as usize;
4360 sa[slot1] = (p1 - 1) | (((buckets[2 + v1] != d) as SaSint) << (SAINT_BIT - 1));
4361 buckets[2 + v1] = d;
4362
4363 i -= 2;
4364 }
4365
4366 j -= 2 * prefetch_distance + 1;
4367 while i >= j {
4368 let mut p = sa[i as usize];
4369 d += SaSint::from(p < 0);
4370 p &= SAINT_MAX;
4371 let pu = p as usize;
4372 let v = buckets_index4(t[pu - 1] as usize, usize::from(t[pu - 2] > t[pu - 1]));
4373
4374 buckets[v] -= 1;
4375 let slot = buckets[v] as usize;
4376 sa[slot] = (p - 1) | (((buckets[2 + v] != d) as SaSint) << (SAINT_BIT - 1));
4377 buckets[2 + v] = d;
4378 i -= 1;
4379 }
4380
4381 d
4382}
4383
4384#[doc(hidden)]
4386pub fn partial_sorting_scan_right_to_left_32s_4k(
4387 t: &[SaSint],
4388 sa: &mut [SaSint],
4389 k: SaSint,
4390 buckets: &mut [SaSint],
4391 mut d: SaSint,
4392 omp_block_start: FastSint,
4393 omp_block_size: FastSint,
4394) -> SaSint {
4395 if omp_block_size <= 0 {
4396 return d;
4397 }
4398
4399 let k_usize = usize::try_from(k).expect("k must be non-negative");
4400 let prefetch_distance: FastSint = 64;
4401 let induction_offset = 3 * k_usize;
4402
4403 let mut i = omp_block_start + omp_block_size - 1;
4404 let mut j = omp_block_start + 2 * prefetch_distance + 1;
4405
4406 while i >= j {
4407 let i0 = i as usize;
4408 let mut p0 = sa[i0];
4409 if p0 > 0 {
4410 sa[i0] = 0;
4411 d += p0 >> (SUFFIX_GROUP_BIT - 1);
4412 p0 &= !SUFFIX_GROUP_MARKER;
4413
4414 let p0u = p0 as usize;
4415 let c0 = t[p0u - 1];
4416 let f0 = usize::from(t[p0u - 2] > c0);
4417 let v0 = buckets_index2(c0 as usize, f0);
4418 let c0u = c0 as usize;
4419 buckets[induction_offset + c0u] -= 1;
4420 let slot0 = buckets[induction_offset + c0u] as usize;
4421 sa[slot0] = (p0 - 1)
4422 | ((f0 as SaSint) << (SAINT_BIT - 1))
4423 | (((buckets[v0] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
4424 buckets[v0] = d;
4425 }
4426
4427 let i1 = (i - 1) as usize;
4428 let mut p1 = sa[i1];
4429 if p1 > 0 {
4430 sa[i1] = 0;
4431 d += p1 >> (SUFFIX_GROUP_BIT - 1);
4432 p1 &= !SUFFIX_GROUP_MARKER;
4433
4434 let p1u = p1 as usize;
4435 let c1 = t[p1u - 1];
4436 let f1 = usize::from(t[p1u - 2] > c1);
4437 let v1 = buckets_index2(c1 as usize, f1);
4438 let c1u = c1 as usize;
4439 buckets[induction_offset + c1u] -= 1;
4440 let slot1 = buckets[induction_offset + c1u] as usize;
4441 sa[slot1] = (p1 - 1)
4442 | ((f1 as SaSint) << (SAINT_BIT - 1))
4443 | (((buckets[v1] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
4444 buckets[v1] = d;
4445 }
4446
4447 i -= 2;
4448 }
4449
4450 j -= 2 * prefetch_distance + 1;
4451 while i >= j {
4452 let iu = i as usize;
4453 let mut p = sa[iu];
4454 if p > 0 {
4455 sa[iu] = 0;
4456 d += p >> (SUFFIX_GROUP_BIT - 1);
4457 p &= !SUFFIX_GROUP_MARKER;
4458
4459 let pu = p as usize;
4460 let c = t[pu - 1];
4461 let f = usize::from(t[pu - 2] > c);
4462 let v = buckets_index2(c as usize, f);
4463 let cu = c as usize;
4464 buckets[induction_offset + cu] -= 1;
4465 let slot = buckets[induction_offset + cu] as usize;
4466 sa[slot] = (p - 1)
4467 | ((f as SaSint) << (SAINT_BIT - 1))
4468 | (((buckets[v] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
4469 buckets[v] = d;
4470 }
4471 i -= 1;
4472 }
4473
4474 d
4475}
4476
4477#[doc(hidden)]
4479pub fn partial_sorting_scan_right_to_left_32s_1k(
4480 t: &[SaSint],
4481 sa: &mut [SaSint],
4482 induction_bucket: &mut [SaSint],
4483 omp_block_start: FastSint,
4484 omp_block_size: FastSint,
4485) {
4486 if omp_block_size <= 0 {
4487 return;
4488 }
4489
4490 let prefetch_distance = 64usize;
4491 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
4492 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
4493 let mut i = (start + size - 1) as isize;
4494 let mut j = (start + 2 * prefetch_distance + 1) as isize;
4495
4496 while i >= j {
4497 let p0 = sa[i as usize];
4498 if p0 > 0 {
4499 sa[i as usize] = 0;
4500 let p0_usize = usize::try_from(p0).expect("suffix index must be non-negative");
4501 let bucket_index0 =
4502 usize::try_from(t[p0_usize - 1]).expect("bucket symbol must be non-negative");
4503 induction_bucket[bucket_index0] -= 1;
4504 let slot0 = usize::try_from(induction_bucket[bucket_index0])
4505 .expect("bucket slot must be non-negative");
4506 sa[slot0] = (p0 - 1)
4507 | ((usize::from(t[p0_usize - 2] > t[p0_usize - 1]) as SaSint) << (SAINT_BIT - 1));
4508 }
4509 let p1 = sa[(i - 1) as usize];
4510 if p1 > 0 {
4511 sa[(i - 1) as usize] = 0;
4512 let p1_usize = usize::try_from(p1).expect("suffix index must be non-negative");
4513 let bucket_index1 =
4514 usize::try_from(t[p1_usize - 1]).expect("bucket symbol must be non-negative");
4515 induction_bucket[bucket_index1] -= 1;
4516 let slot1 = usize::try_from(induction_bucket[bucket_index1])
4517 .expect("bucket slot must be non-negative");
4518 sa[slot1] = (p1 - 1)
4519 | ((usize::from(t[p1_usize - 2] > t[p1_usize - 1]) as SaSint) << (SAINT_BIT - 1));
4520 }
4521
4522 i -= 2;
4523 }
4524
4525 j -= (2 * prefetch_distance + 1) as isize;
4526 while i >= j {
4527 let p = sa[i as usize];
4528 if p > 0 {
4529 sa[i as usize] = 0;
4530 let p_usize = usize::try_from(p).expect("suffix index must be non-negative");
4531 let bucket_index =
4532 usize::try_from(t[p_usize - 1]).expect("bucket symbol must be non-negative");
4533 induction_bucket[bucket_index] -= 1;
4534 let slot = usize::try_from(induction_bucket[bucket_index])
4535 .expect("bucket slot must be non-negative");
4536 sa[slot] = (p - 1)
4537 | ((usize::from(t[p_usize - 2] > t[p_usize - 1]) as SaSint) << (SAINT_BIT - 1));
4538 }
4539 if i == 0 {
4540 break;
4541 }
4542 i -= 1;
4543 }
4544}
4545
4546#[doc(hidden)]
4548pub fn partial_sorting_scan_right_to_left_32s_6k_block_gather(
4549 t: &[SaSint],
4550 sa: &[SaSint],
4551 cache: &mut [ThreadCache],
4552 omp_block_start: FastSint,
4553 omp_block_size: FastSint,
4554) {
4555 if omp_block_size <= 0 {
4556 return;
4557 }
4558
4559 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
4560 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
4561 for offset in 0..size {
4562 let i = start + offset;
4563 let mut p = sa[i];
4564 let mut symbol = 0usize;
4565 p &= SAINT_MAX;
4566 if p != 0 {
4567 let p_usize = usize::try_from(p).expect("suffix index must be non-negative");
4568 symbol = buckets_index4(
4569 usize::try_from(t[p_usize - 1]).expect("bucket symbol must be non-negative"),
4570 usize::from(t[p_usize - 2] > t[p_usize - 1]),
4571 );
4572 }
4573 cache[offset].index = sa[i];
4574 cache[offset].symbol = symbol as SaSint;
4575 }
4576}
4577
4578#[doc(hidden)]
4580pub fn partial_sorting_scan_right_to_left_32s_4k_block_gather(
4581 t: &[SaSint],
4582 sa: &mut [SaSint],
4583 cache: &mut [ThreadCache],
4584 omp_block_start: FastSint,
4585 omp_block_size: FastSint,
4586) {
4587 if omp_block_size <= 0 {
4588 return;
4589 }
4590
4591 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
4592 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
4593 for offset in 0..size {
4594 let i = start + offset;
4595 let mut symbol = SAINT_MIN;
4596 let mut p = sa[i];
4597 if p > 0 {
4598 sa[i] = 0;
4599 cache[offset].index = p;
4600 p &= !SUFFIX_GROUP_MARKER;
4601 let p_usize = usize::try_from(p).expect("suffix index must be non-negative");
4602 symbol = buckets_index2(
4603 usize::try_from(t[p_usize - 1]).expect("bucket symbol must be non-negative"),
4604 usize::from(t[p_usize - 2] > t[p_usize - 1]),
4605 ) as SaSint;
4606 }
4607 cache[offset].symbol = symbol;
4608 }
4609}
4610
4611#[doc(hidden)]
4613pub fn partial_sorting_scan_right_to_left_32s_1k_block_gather(
4614 t: &[SaSint],
4615 sa: &mut [SaSint],
4616 cache: &mut [ThreadCache],
4617 omp_block_start: FastSint,
4618 omp_block_size: FastSint,
4619) {
4620 if omp_block_size <= 0 {
4621 return;
4622 }
4623 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
4624 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
4625 for offset in 0..size {
4626 let i = start + offset;
4627 let mut symbol = SAINT_MIN;
4628 let p = sa[i];
4629 if p > 0 {
4630 sa[i] = 0;
4631 cache[offset].index = (p - 1)
4632 | ((usize::from(t[p as usize - 2] > t[p as usize - 1]) as SaSint)
4633 << (SAINT_BIT - 1));
4634 symbol = t[p as usize - 1];
4635 }
4636 cache[offset].symbol = symbol;
4637 }
4638}
4639
4640#[doc(hidden)]
4642pub fn partial_sorting_scan_right_to_left_32s_6k_block_sort(
4643 t: &[SaSint],
4644 buckets: &mut [SaSint],
4645 mut d: SaSint,
4646 cache: &mut [ThreadCache],
4647 omp_block_start: FastSint,
4648 omp_block_size: FastSint,
4649) -> SaSint {
4650 if omp_block_size <= 0 {
4651 return d;
4652 }
4653
4654 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
4655 let mut i = size;
4656 while i > 0 {
4657 i -= 1;
4658
4659 let v = usize::try_from(cache[i].symbol).expect("cache symbol must be non-negative");
4660 let p = cache[i].index;
4661 d += SaSint::from(p < 0);
4662 buckets[v] -= 1;
4663 let target = buckets[v];
4664 cache[i].symbol = target;
4665 cache[i].index = (p - 1) | (((buckets[2 + v] != d) as SaSint) << (SAINT_BIT - 1));
4666 buckets[2 + v] = d;
4667
4668 if target >= omp_block_start as SaSint
4669 && target < (omp_block_start + omp_block_size) as SaSint
4670 {
4671 let s = usize::try_from(target - omp_block_start as SaSint)
4672 .expect("cache slot must be non-negative");
4673 let q = cache[i].index & SAINT_MAX;
4674 let q_usize = usize::try_from(q).expect("suffix index must be non-negative");
4675 cache[s].index = cache[i].index;
4676 cache[s].symbol = buckets_index4(
4677 usize::try_from(t[q_usize - 1]).expect("bucket symbol must be non-negative"),
4678 usize::from(t[q_usize - 2] > t[q_usize - 1]),
4679 ) as SaSint;
4680 }
4681 }
4682
4683 d
4684}
4685
4686#[doc(hidden)]
4688pub fn partial_sorting_scan_right_to_left_32s_4k_block_sort(
4689 t: &[SaSint],
4690 k: SaSint,
4691 buckets: &mut [SaSint],
4692 mut d: SaSint,
4693 cache: &mut [ThreadCache],
4694 omp_block_start: FastSint,
4695 omp_block_size: FastSint,
4696) -> SaSint {
4697 if omp_block_size <= 0 {
4698 return d;
4699 }
4700
4701 let k_usize = usize::try_from(k).expect("k must be non-negative");
4702 let (distinct_names, tail) = buckets.split_at_mut(2 * k_usize);
4703 let induction_bucket = &mut tail[k_usize..2 * k_usize];
4704
4705 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
4706 let mut i = size;
4707 while i > 0 {
4708 i -= 1;
4709
4710 let v = cache[i].symbol;
4711 if v >= 0 {
4712 let p = cache[i].index;
4713 d += p >> (SUFFIX_GROUP_BIT - 1);
4714 let bucket_index = usize::try_from(v >> 1).expect("bucket symbol must be non-negative");
4715 induction_bucket[bucket_index] -= 1;
4716 let target = induction_bucket[bucket_index];
4717 cache[i].symbol = target;
4718 cache[i].index = (p - 1)
4719 | ((v & 1) << (SAINT_BIT - 1))
4720 | (((distinct_names
4721 [usize::try_from(v).expect("bucket symbol must be non-negative")]
4722 != d) as SaSint)
4723 << (SUFFIX_GROUP_BIT - 1));
4724 distinct_names[usize::try_from(v).expect("bucket symbol must be non-negative")] = d;
4725
4726 if target >= omp_block_start as SaSint
4727 && target < (omp_block_start + omp_block_size) as SaSint
4728 {
4729 let ni = usize::try_from(target - omp_block_start as SaSint)
4730 .expect("cache slot must be non-negative");
4731 let mut np = cache[i].index;
4732 if np > 0 {
4733 cache[i].index = 0;
4734 cache[ni].index = np;
4735 np &= !SUFFIX_GROUP_MARKER;
4736 let np_usize = usize::try_from(np).expect("suffix index must be non-negative");
4737 cache[ni].symbol = buckets_index2(
4738 usize::try_from(t[np_usize - 1])
4739 .expect("bucket symbol must be non-negative"),
4740 usize::from(t[np_usize - 2] > t[np_usize - 1]),
4741 ) as SaSint;
4742 }
4743 }
4744 }
4745 }
4746
4747 d
4748}
4749
4750#[doc(hidden)]
4752pub fn partial_sorting_scan_right_to_left_32s_1k_block_sort(
4753 t: &[SaSint],
4754 induction_bucket: &mut [SaSint],
4755 cache: &mut [ThreadCache],
4756 omp_block_start: FastSint,
4757 omp_block_size: FastSint,
4758) {
4759 if omp_block_size <= 0 {
4760 return;
4761 }
4762 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
4763 let mut offset = size;
4764
4765 while offset > 0 {
4766 offset -= 1;
4767 let v = cache[offset].symbol;
4768 if v >= 0 {
4769 let bucket_index = v as usize;
4770 induction_bucket[bucket_index] -= 1;
4771 let target = induction_bucket[bucket_index];
4772 cache[offset].symbol = target;
4773 let block_end = omp_block_start as SaSint + omp_block_size as SaSint;
4774 if target >= omp_block_start as SaSint && target < block_end {
4775 let ni = usize::try_from(target - omp_block_start as SaSint)
4776 .expect("cache slot must be non-negative");
4777 let np = cache[offset].index;
4778 if np > 0 {
4779 cache[offset].index = 0;
4780 cache[ni].index = (np - 1)
4781 | ((usize::from(t[np as usize - 2] > t[np as usize - 1]) as SaSint)
4782 << (SAINT_BIT - 1));
4783 cache[ni].symbol = t[np as usize - 1];
4784 }
4785 }
4786 }
4787 }
4788}
4789
4790#[doc(hidden)]
4792pub fn partial_sorting_scan_right_to_left_32s_6k_block_omp(
4793 t: &[SaSint],
4794 sa: &mut [SaSint],
4795 buckets: &mut [SaSint],
4796 mut d: SaSint,
4797 cache: &mut [ThreadCache],
4798 block_start: FastSint,
4799 block_size: FastSint,
4800 threads: SaSint,
4801) -> SaSint {
4802 if block_size <= 0 {
4803 return d;
4804 }
4805 if threads == 1 || block_size < 16_384 {
4806 return partial_sorting_scan_right_to_left_32s_6k(
4807 t,
4808 sa,
4809 buckets,
4810 d,
4811 block_start,
4812 block_size,
4813 );
4814 }
4815
4816 let threads_usize = usize::try_from(threads)
4817 .expect("threads must be non-negative")
4818 .max(1);
4819 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
4820 let omp_num_threads = threads_usize.min(block_size_usize.max(1));
4821 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
4822
4823 for omp_thread_num in 0..omp_num_threads {
4824 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4825 omp_block_stride
4826 } else {
4827 block_size_usize - omp_thread_num * omp_block_stride
4828 };
4829 let omp_block_start = usize::try_from(block_start)
4830 .expect("block_start must be non-negative")
4831 + omp_thread_num * omp_block_stride;
4832 if omp_block_size > 0 {
4833 partial_sorting_scan_right_to_left_32s_6k_block_gather(
4834 t,
4835 sa,
4836 &mut cache[omp_thread_num * omp_block_stride
4837 ..omp_thread_num * omp_block_stride + omp_block_size],
4838 omp_block_start as FastSint,
4839 omp_block_size as FastSint,
4840 );
4841 }
4842 }
4843
4844 d = partial_sorting_scan_right_to_left_32s_6k_block_sort(
4845 t,
4846 buckets,
4847 d,
4848 &mut cache[..block_size_usize],
4849 block_start,
4850 block_size,
4851 );
4852
4853 for omp_thread_num in 0..omp_num_threads {
4854 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4855 omp_block_stride
4856 } else {
4857 block_size_usize - omp_thread_num * omp_block_stride
4858 };
4859 let cache_start = omp_thread_num * omp_block_stride;
4860 if omp_block_size > 0 {
4861 place_cached_suffixes(sa, &cache[cache_start..], 0, omp_block_size as FastSint);
4862 }
4863 }
4864
4865 d
4866}
4867
4868#[doc(hidden)]
4870pub fn partial_sorting_scan_right_to_left_32s_4k_block_omp(
4871 t: &[SaSint],
4872 sa: &mut [SaSint],
4873 k: SaSint,
4874 buckets: &mut [SaSint],
4875 mut d: SaSint,
4876 cache: &mut [ThreadCache],
4877 block_start: FastSint,
4878 block_size: FastSint,
4879 threads: SaSint,
4880) -> SaSint {
4881 if block_size <= 0 {
4882 return d;
4883 }
4884 if threads == 1 || block_size < 16_384 {
4885 return partial_sorting_scan_right_to_left_32s_4k(
4886 t,
4887 sa,
4888 k,
4889 buckets,
4890 d,
4891 block_start,
4892 block_size,
4893 );
4894 }
4895
4896 let threads_usize = usize::try_from(threads)
4897 .expect("threads must be non-negative")
4898 .max(1);
4899 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
4900 let omp_num_threads = threads_usize.min(block_size_usize.max(1));
4901 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
4902
4903 for omp_thread_num in 0..omp_num_threads {
4904 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4905 omp_block_stride
4906 } else {
4907 block_size_usize - omp_thread_num * omp_block_stride
4908 };
4909 let omp_block_start = usize::try_from(block_start)
4910 .expect("block_start must be non-negative")
4911 + omp_thread_num * omp_block_stride;
4912 if omp_block_size > 0 {
4913 partial_sorting_scan_right_to_left_32s_4k_block_gather(
4914 t,
4915 sa,
4916 &mut cache[omp_thread_num * omp_block_stride
4917 ..omp_thread_num * omp_block_stride + omp_block_size],
4918 omp_block_start as FastSint,
4919 omp_block_size as FastSint,
4920 );
4921 }
4922 }
4923
4924 d = partial_sorting_scan_right_to_left_32s_4k_block_sort(
4925 t,
4926 k,
4927 buckets,
4928 d,
4929 &mut cache[..block_size_usize],
4930 block_start,
4931 block_size,
4932 );
4933
4934 for omp_thread_num in 0..omp_num_threads {
4935 let omp_block_start = omp_thread_num * omp_block_stride;
4936 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4937 omp_block_stride
4938 } else {
4939 block_size_usize - omp_block_start
4940 };
4941 if omp_block_size > 0 {
4942 compact_and_place_cached_suffixes(
4943 sa,
4944 &mut cache[omp_block_start..],
4945 0,
4946 omp_block_size as FastSint,
4947 );
4948 }
4949 }
4950
4951 d
4952}
4953
4954#[doc(hidden)]
4956pub fn partial_sorting_scan_right_to_left_32s_1k_block_omp(
4957 t: &[SaSint],
4958 sa: &mut [SaSint],
4959 buckets: &mut [SaSint],
4960 cache: &mut [ThreadCache],
4961 block_start: FastSint,
4962 block_size: FastSint,
4963 threads: SaSint,
4964) {
4965 if block_size <= 0 {
4966 return;
4967 }
4968 if threads == 1 || block_size < 16_384 {
4969 partial_sorting_scan_right_to_left_32s_1k(t, sa, buckets, block_start, block_size);
4970 return;
4971 }
4972
4973 let threads_usize = usize::try_from(threads)
4974 .expect("threads must be non-negative")
4975 .max(1);
4976 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
4977 let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
4978 let omp_num_threads = threads_usize.min(block_size_usize.max(1));
4979 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
4980
4981 for omp_thread_num in 0..omp_num_threads {
4982 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4983 omp_block_stride
4984 } else {
4985 block_size_usize - omp_thread_num * omp_block_stride
4986 };
4987 let omp_block_start = block_start_usize + omp_thread_num * omp_block_stride;
4988 if omp_block_size > 0 {
4989 partial_sorting_scan_right_to_left_32s_1k_block_gather(
4990 t,
4991 sa,
4992 &mut cache[omp_thread_num * omp_block_stride
4993 ..omp_thread_num * omp_block_stride + omp_block_size],
4994 omp_block_start as FastSint,
4995 omp_block_size as FastSint,
4996 );
4997 }
4998 }
4999
5000 let cache = &mut cache[..block_size_usize];
5001 partial_sorting_scan_right_to_left_32s_1k_block_sort(
5002 t,
5003 buckets,
5004 cache,
5005 block_start,
5006 block_size,
5007 );
5008 for omp_thread_num in 0..omp_num_threads {
5009 let omp_block_start = omp_thread_num * omp_block_stride;
5010 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
5011 omp_block_stride
5012 } else {
5013 block_size_usize - omp_block_start
5014 };
5015 if omp_block_size > 0 {
5016 compact_and_place_cached_suffixes(
5017 sa,
5018 &mut cache[omp_block_start..],
5019 0,
5020 omp_block_size as FastSint,
5021 );
5022 }
5023 }
5024}
5025
5026#[doc(hidden)]
5028pub fn partial_sorting_scan_left_to_right_32s_6k_block_gather(
5029 t: &[SaSint],
5030 sa: &mut [SaSint],
5031 cache: &mut [ThreadCache],
5032 omp_block_start: FastSint,
5033 omp_block_size: FastSint,
5034) {
5035 if omp_block_size <= 0 {
5036 return;
5037 }
5038
5039 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
5040 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
5041 for offset in 0..size {
5042 let i = start + offset;
5043 let p = sa[i];
5044 cache[offset].index = p;
5045 let q = p & SAINT_MAX;
5046 cache[offset].symbol = if q != 0 {
5047 buckets_index4(
5048 usize::try_from(t[q as usize - 1]).expect("bucket symbol must be non-negative"),
5049 usize::from(t[q as usize - 2] >= t[q as usize - 1]),
5050 ) as SaSint
5051 } else {
5052 0
5053 };
5054 }
5055}
5056
5057#[doc(hidden)]
5059pub fn partial_sorting_scan_left_to_right_32s_4k_block_gather(
5060 t: &[SaSint],
5061 sa: &mut [SaSint],
5062 cache: &mut [ThreadCache],
5063 omp_block_start: FastSint,
5064 omp_block_size: FastSint,
5065) {
5066 if omp_block_size <= 0 {
5067 return;
5068 }
5069
5070 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
5071 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
5072 for offset in 0..size {
5073 let i = start + offset;
5074 let mut symbol = SAINT_MIN;
5075 let mut p = sa[i];
5076 if p > 0 {
5077 cache[offset].index = p;
5078 p &= !SUFFIX_GROUP_MARKER;
5079 let p_usize = usize::try_from(p).expect("suffix index must be non-negative");
5080 symbol = buckets_index2(
5081 usize::try_from(t[p_usize - 1]).expect("bucket symbol must be non-negative"),
5082 usize::from(t[p_usize - 2] < t[p_usize - 1]),
5083 ) as SaSint;
5084 p = 0;
5085 }
5086 cache[offset].symbol = symbol;
5087 sa[i] = p & SAINT_MAX;
5088 }
5089}
5090
5091#[doc(hidden)]
5093pub fn partial_sorting_scan_left_to_right_32s_1k_block_gather(
5094 t: &[SaSint],
5095 sa: &mut [SaSint],
5096 cache: &mut [ThreadCache],
5097 omp_block_start: FastSint,
5098 omp_block_size: FastSint,
5099) {
5100 if omp_block_size <= 0 {
5101 return;
5102 }
5103 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
5104 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
5105 for offset in 0..size {
5106 let i = start + offset;
5107 let mut symbol = SAINT_MIN;
5108 let mut p = sa[i];
5109 if p > 0 {
5110 cache[offset].index = (p - 1)
5111 | ((usize::from(t[p as usize - 2] < t[p as usize - 1]) as SaSint)
5112 << (SAINT_BIT - 1));
5113 symbol = t[p as usize - 1];
5114 p = 0;
5115 }
5116 cache[offset].symbol = symbol;
5117 sa[i] = p & SAINT_MAX;
5118 }
5119}
5120
5121#[doc(hidden)]
5123pub fn partial_sorting_scan_left_to_right_32s_6k_block_sort(
5124 t: &[SaSint],
5125 buckets: &mut [SaSint],
5126 mut d: SaSint,
5127 cache: &mut [ThreadCache],
5128 omp_block_start: FastSint,
5129 omp_block_size: FastSint,
5130) -> SaSint {
5131 if omp_block_size <= 0 {
5132 return d;
5133 }
5134
5135 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
5136 let block_end =
5137 start + usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
5138
5139 let mut i = start;
5140 let mut j = block_end.saturating_sub(65);
5141 while i < j {
5142 let cache_i0 = i - start;
5143 let cache_i1 = cache_i0 + 1;
5144
5145 let v0 =
5146 usize::try_from(cache[cache_i0].symbol).expect("cache symbol must be non-negative");
5147 let p0 = cache[cache_i0].index;
5148 d += SaSint::from(p0 < 0);
5149 cache[cache_i0].symbol = buckets[v0];
5150 buckets[v0] += 1;
5151 cache[cache_i0].index =
5152 (p0 - 1) | ((SaSint::from(buckets[2 + v0] != d)) << (SAINT_BIT - 1));
5153 buckets[2 + v0] = d;
5154 if cache[cache_i0].symbol >= omp_block_start as SaSint
5155 && cache[cache_i0].symbol < block_end as SaSint
5156 {
5157 let s = usize::try_from(cache[cache_i0].symbol - omp_block_start as SaSint)
5158 .expect("cache slot must be non-negative");
5159 let q = cache[cache_i0].index & SAINT_MAX;
5160 cache[s].index = cache[cache_i0].index;
5161 let q_usize = usize::try_from(q).expect("suffix index must be non-negative");
5162 cache[s].symbol = buckets_index4(
5163 usize::try_from(t[q_usize - 1]).expect("bucket symbol must be non-negative"),
5164 usize::from(t[q_usize - 2] >= t[q_usize - 1]),
5165 ) as SaSint;
5166 }
5167
5168 let v1 =
5169 usize::try_from(cache[cache_i1].symbol).expect("cache symbol must be non-negative");
5170 let p1 = cache[cache_i1].index;
5171 d += SaSint::from(p1 < 0);
5172 cache[cache_i1].symbol = buckets[v1];
5173 buckets[v1] += 1;
5174 cache[cache_i1].index =
5175 (p1 - 1) | ((SaSint::from(buckets[2 + v1] != d)) << (SAINT_BIT - 1));
5176 buckets[2 + v1] = d;
5177 if cache[cache_i1].symbol >= omp_block_start as SaSint
5178 && cache[cache_i1].symbol < block_end as SaSint
5179 {
5180 let s = usize::try_from(cache[cache_i1].symbol - omp_block_start as SaSint)
5181 .expect("cache slot must be non-negative");
5182 let q = cache[cache_i1].index & SAINT_MAX;
5183 cache[s].index = cache[cache_i1].index;
5184 let q_usize = usize::try_from(q).expect("suffix index must be non-negative");
5185 cache[s].symbol = buckets_index4(
5186 usize::try_from(t[q_usize - 1]).expect("bucket symbol must be non-negative"),
5187 usize::from(t[q_usize - 2] >= t[q_usize - 1]),
5188 ) as SaSint;
5189 }
5190
5191 i += 2;
5192 }
5193
5194 j += 65;
5195 while i < j {
5196 let cache_i = i - start;
5197 let v = usize::try_from(cache[cache_i].symbol).expect("cache symbol must be non-negative");
5198 let p = cache[cache_i].index;
5199 d += SaSint::from(p < 0);
5200 cache[cache_i].symbol = buckets[v];
5201 buckets[v] += 1;
5202 cache[cache_i].index = (p - 1) | ((SaSint::from(buckets[2 + v] != d)) << (SAINT_BIT - 1));
5203 buckets[2 + v] = d;
5204 if cache[cache_i].symbol >= omp_block_start as SaSint
5205 && cache[cache_i].symbol < block_end as SaSint
5206 {
5207 let s = usize::try_from(cache[cache_i].symbol - omp_block_start as SaSint)
5208 .expect("cache slot must be non-negative");
5209 let q = cache[cache_i].index & SAINT_MAX;
5210 cache[s].index = cache[cache_i].index;
5211 let q_usize = usize::try_from(q).expect("suffix index must be non-negative");
5212 cache[s].symbol = buckets_index4(
5213 usize::try_from(t[q_usize - 1]).expect("bucket symbol must be non-negative"),
5214 usize::from(t[q_usize - 2] >= t[q_usize - 1]),
5215 ) as SaSint;
5216 }
5217 i += 1;
5218 }
5219
5220 d
5221}
5222
5223#[doc(hidden)]
5225pub fn partial_sorting_scan_left_to_right_32s_4k_block_sort(
5226 t: &[SaSint],
5227 k: SaSint,
5228 buckets: &mut [SaSint],
5229 mut d: SaSint,
5230 cache: &mut [ThreadCache],
5231 omp_block_start: FastSint,
5232 omp_block_size: FastSint,
5233) -> SaSint {
5234 if omp_block_size <= 0 {
5235 return d;
5236 }
5237
5238 let k_usize = usize::try_from(k).expect("k must be non-negative");
5239 let (distinct_names, tail) = buckets.split_at_mut(2 * k_usize);
5240 let induction_bucket = &mut tail[..k_usize];
5241
5242 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
5243 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
5244 let block_end = start + size;
5245
5246 for offset in 0..size {
5247 let v = cache[offset].symbol;
5248 if v >= 0 {
5249 let p = cache[offset].index;
5250 d += p >> (SUFFIX_GROUP_BIT - 1);
5251
5252 let bucket_index = usize::try_from(v >> 1).expect("bucket index must be non-negative");
5253 let v_usize = usize::try_from(v).expect("cache symbol must be non-negative");
5254 let target = induction_bucket[bucket_index];
5255 induction_bucket[bucket_index] += 1;
5256
5257 cache[offset].symbol = target;
5258 cache[offset].index = (p - 1)
5259 | ((v & 1) << (SAINT_BIT - 1))
5260 | (((distinct_names[v_usize] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
5261 distinct_names[v_usize] = d;
5262
5263 if target >= omp_block_start as SaSint && target < block_end as SaSint {
5264 let ni = usize::try_from(target - omp_block_start as SaSint)
5265 .expect("cache slot must be non-negative");
5266 let mut np = cache[offset].index;
5267 if np > 0 {
5268 cache[ni].index = np;
5269 np &= !SUFFIX_GROUP_MARKER;
5270 let np_usize = usize::try_from(np).expect("suffix index must be non-negative");
5271 cache[ni].symbol = buckets_index2(
5272 usize::try_from(t[np_usize - 1])
5273 .expect("bucket symbol must be non-negative"),
5274 usize::from(t[np_usize - 2] < t[np_usize - 1]),
5275 ) as SaSint;
5276 np = 0;
5277 }
5278 cache[offset].index = np & SAINT_MAX;
5279 }
5280 }
5281 }
5282
5283 d
5284}
5285
5286#[doc(hidden)]
5288pub fn partial_sorting_scan_left_to_right_32s_1k_block_sort(
5289 t: &[SaSint],
5290 induction_bucket: &mut [SaSint],
5291 cache: &mut [ThreadCache],
5292 omp_block_start: FastSint,
5293 omp_block_size: FastSint,
5294) {
5295 if omp_block_size <= 0 {
5296 return;
5297 }
5298 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
5299 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
5300 let block_end = start + size;
5301
5302 for offset in 0..size {
5303 let v = cache[offset].symbol;
5304 if v >= 0 {
5305 let v_usize = v as usize;
5306 let target = induction_bucket[v_usize];
5307 cache[offset].symbol = target;
5308 induction_bucket[v_usize] += 1;
5309 if target >= omp_block_start as SaSint && target < block_end as SaSint {
5310 let ni = usize::try_from(target - omp_block_start as SaSint)
5311 .expect("cache slot must be non-negative");
5312 let mut np = cache[offset].index;
5313 if np > 0 {
5314 cache[ni].index = (np - 1)
5315 | ((usize::from(t[np as usize - 2] < t[np as usize - 1]) as SaSint)
5316 << (SAINT_BIT - 1));
5317 cache[ni].symbol = t[np as usize - 1];
5318 np = 0;
5319 }
5320 cache[offset].index = np & SAINT_MAX;
5321 }
5322 }
5323 }
5324}
5325
5326#[doc(hidden)]
5328pub fn partial_sorting_scan_left_to_right_32s_6k_block_omp(
5329 t: &[SaSint],
5330 sa: &mut [SaSint],
5331 buckets: &mut [SaSint],
5332 d: SaSint,
5333 cache: &mut [ThreadCache],
5334 block_start: FastSint,
5335 block_size: FastSint,
5336 threads: SaSint,
5337) -> SaSint {
5338 if block_size <= 0 {
5339 return d;
5340 }
5341 if threads == 1 || block_size < 16_384 {
5342 return partial_sorting_scan_left_to_right_32s_6k(
5343 t,
5344 sa,
5345 buckets,
5346 d,
5347 block_start,
5348 block_size,
5349 );
5350 }
5351
5352 let threads_usize = usize::try_from(threads)
5353 .expect("threads must be non-negative")
5354 .max(1);
5355 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
5356 let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
5357 let omp_num_threads = threads_usize.min(block_size_usize.max(1));
5358 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
5359
5360 for omp_thread_num in 0..omp_num_threads {
5361 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
5362 omp_block_stride
5363 } else {
5364 block_size_usize - omp_thread_num * omp_block_stride
5365 };
5366 let omp_block_start = block_start_usize + omp_thread_num * omp_block_stride;
5367 if omp_block_size > 0 {
5368 partial_sorting_scan_left_to_right_32s_6k_block_gather(
5369 t,
5370 sa,
5371 &mut cache[omp_thread_num * omp_block_stride
5372 ..omp_thread_num * omp_block_stride + omp_block_size],
5373 omp_block_start as FastSint,
5374 omp_block_size as FastSint,
5375 );
5376 }
5377 }
5378
5379 let d = partial_sorting_scan_left_to_right_32s_6k_block_sort(
5380 t,
5381 buckets,
5382 d,
5383 &mut cache[..block_size_usize],
5384 block_start,
5385 block_size,
5386 );
5387
5388 for omp_thread_num in 0..omp_num_threads {
5389 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
5390 omp_block_stride
5391 } else {
5392 block_size_usize - omp_thread_num * omp_block_stride
5393 };
5394 if omp_block_size > 0 {
5395 place_cached_suffixes(
5396 sa,
5397 &cache[omp_thread_num * omp_block_stride..],
5398 0,
5399 omp_block_size as FastSint,
5400 );
5401 }
5402 }
5403 d
5404}
5405
5406#[doc(hidden)]
5408pub fn partial_sorting_scan_left_to_right_32s_4k_block_omp(
5409 t: &[SaSint],
5410 sa: &mut [SaSint],
5411 k: SaSint,
5412 buckets: &mut [SaSint],
5413 d: SaSint,
5414 cache: &mut [ThreadCache],
5415 block_start: FastSint,
5416 block_size: FastSint,
5417 threads: SaSint,
5418) -> SaSint {
5419 if block_size <= 0 {
5420 return d;
5421 }
5422 if threads == 1 || block_size < 16_384 {
5423 return partial_sorting_scan_left_to_right_32s_4k(
5424 t,
5425 sa,
5426 k,
5427 buckets,
5428 d,
5429 block_start,
5430 block_size,
5431 );
5432 }
5433
5434 let threads_usize = usize::try_from(threads)
5435 .expect("threads must be non-negative")
5436 .max(1);
5437 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
5438 let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
5439 let omp_num_threads = threads_usize.min(block_size_usize.max(1));
5440 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
5441
5442 for omp_thread_num in 0..omp_num_threads {
5443 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
5444 omp_block_stride
5445 } else {
5446 block_size_usize - omp_thread_num * omp_block_stride
5447 };
5448 let omp_block_start = block_start_usize + omp_thread_num * omp_block_stride;
5449 if omp_block_size > 0 {
5450 partial_sorting_scan_left_to_right_32s_4k_block_gather(
5451 t,
5452 sa,
5453 &mut cache[omp_thread_num * omp_block_stride
5454 ..omp_thread_num * omp_block_stride + omp_block_size],
5455 omp_block_start as FastSint,
5456 omp_block_size as FastSint,
5457 );
5458 }
5459 }
5460
5461 let cache = &mut cache[..block_size_usize];
5462 let d = partial_sorting_scan_left_to_right_32s_4k_block_sort(
5463 t,
5464 k,
5465 buckets,
5466 d,
5467 cache,
5468 block_start,
5469 block_size,
5470 );
5471
5472 for omp_thread_num in 0..omp_num_threads {
5473 let omp_block_start = omp_thread_num * omp_block_stride;
5474 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
5475 omp_block_stride
5476 } else {
5477 block_size_usize - omp_block_start
5478 };
5479 if omp_block_size > 0 {
5480 compact_and_place_cached_suffixes(
5481 sa,
5482 &mut cache[omp_block_start..],
5483 0,
5484 omp_block_size as FastSint,
5485 );
5486 }
5487 }
5488
5489 d
5490}
5491
5492#[doc(hidden)]
5494pub fn partial_sorting_scan_left_to_right_32s_1k_block_omp(
5495 t: &[SaSint],
5496 sa: &mut [SaSint],
5497 buckets: &mut [SaSint],
5498 cache: &mut [ThreadCache],
5499 block_start: FastSint,
5500 block_size: FastSint,
5501 threads: SaSint,
5502) {
5503 if block_size <= 0 {
5504 return;
5505 }
5506 if threads == 1 || block_size < 16_384 {
5507 partial_sorting_scan_left_to_right_32s_1k(t, sa, buckets, block_start, block_size);
5508 return;
5509 }
5510
5511 let threads_usize = usize::try_from(threads)
5512 .expect("threads must be non-negative")
5513 .max(1);
5514 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
5515 let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
5516 let omp_num_threads = threads_usize.min(block_size_usize.max(1));
5517 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
5518
5519 for omp_thread_num in 0..omp_num_threads {
5520 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
5521 omp_block_stride
5522 } else {
5523 block_size_usize - omp_thread_num * omp_block_stride
5524 };
5525 let omp_block_start = block_start_usize + omp_thread_num * omp_block_stride;
5526 if omp_block_size > 0 {
5527 partial_sorting_scan_left_to_right_32s_1k_block_gather(
5528 t,
5529 sa,
5530 &mut cache[omp_thread_num * omp_block_stride
5531 ..omp_thread_num * omp_block_stride + omp_block_size],
5532 omp_block_start as FastSint,
5533 omp_block_size as FastSint,
5534 );
5535 }
5536 }
5537
5538 let cache = &mut cache[..block_size_usize];
5539 partial_sorting_scan_left_to_right_32s_1k_block_sort(
5540 t,
5541 buckets,
5542 cache,
5543 block_start,
5544 block_size,
5545 );
5546 for omp_thread_num in 0..omp_num_threads {
5547 let omp_block_start = omp_thread_num * omp_block_stride;
5548 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
5549 omp_block_stride
5550 } else {
5551 block_size_usize - omp_block_start
5552 };
5553 if omp_block_size > 0 {
5554 compact_and_place_cached_suffixes(
5555 sa,
5556 &mut cache[omp_block_start..],
5557 0,
5558 omp_block_size as FastSint,
5559 );
5560 }
5561 }
5562}
5563
5564#[doc(hidden)]
5566pub fn partial_sorting_scan_right_to_left_32s_6k_omp(
5567 t: &[SaSint],
5568 sa: &mut [SaSint],
5569 n: SaSint,
5570 buckets: &mut [SaSint],
5571 first_lms_suffix: SaSint,
5572 left_suffixes_count: SaSint,
5573 mut d: SaSint,
5574 threads: SaSint,
5575 thread_state: &mut [ThreadState],
5576) -> SaSint {
5577 let scan_start = left_suffixes_count as FastSint + 1;
5578 let scan_end = n as FastSint - first_lms_suffix as FastSint;
5579 if threads == 1 || (scan_end - scan_start) < 65_536 {
5580 return partial_sorting_scan_right_to_left_32s_6k(
5581 t,
5582 sa,
5583 buckets,
5584 d,
5585 scan_start,
5586 scan_end - scan_start,
5587 );
5588 }
5589 if thread_state.is_empty() {
5590 return partial_sorting_scan_right_to_left_32s_6k(
5591 t,
5592 sa,
5593 buckets,
5594 d,
5595 scan_start,
5596 scan_end - scan_start,
5597 );
5598 }
5599
5600 let threads_usize = usize::try_from(threads)
5601 .expect("threads must be non-negative")
5602 .max(1);
5603 let mut cache = vec![ThreadCache::default(); threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE];
5604 let mut block_start = scan_end - 1;
5605 let block_span = FastSint::try_from(threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE)
5606 .expect("block span must fit FastSint");
5607 while block_start >= scan_start {
5608 let mut block_end = block_start - block_span;
5609 if block_end < scan_start {
5610 block_end = scan_start - 1;
5611 }
5612
5613 d = partial_sorting_scan_right_to_left_32s_6k_block_omp(
5614 t,
5615 sa,
5616 buckets,
5617 d,
5618 &mut cache,
5619 block_end + 1,
5620 block_start - block_end,
5621 threads,
5622 );
5623
5624 if block_end < scan_start {
5625 break;
5626 }
5627 block_start = block_end;
5628 }
5629
5630 d
5631}
5632
5633#[doc(hidden)]
5635pub fn partial_sorting_scan_right_to_left_32s_4k_omp(
5636 t: &[SaSint],
5637 sa: &mut [SaSint],
5638 n: SaSint,
5639 k: SaSint,
5640 buckets: &mut [SaSint],
5641 mut d: SaSint,
5642 threads: SaSint,
5643 thread_state: &mut [ThreadState],
5644) -> SaSint {
5645 if threads == 1 || n < 65_536 {
5646 return partial_sorting_scan_right_to_left_32s_4k(t, sa, k, buckets, d, 0, n as FastSint);
5647 }
5648 if thread_state.is_empty() {
5649 return partial_sorting_scan_right_to_left_32s_4k(t, sa, k, buckets, d, 0, n as FastSint);
5650 }
5651 let threads_usize = usize::try_from(threads)
5652 .expect("threads must be non-negative")
5653 .max(1);
5654 let mut cache = vec![ThreadCache::default(); threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE];
5655 let mut block_start = FastSint::try_from(n).expect("n must fit FastSint") - 1;
5656 let block_span = FastSint::try_from(threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE)
5657 .expect("block span must fit FastSint");
5658 while block_start >= 0 {
5659 let mut block_end = block_start - block_span;
5660 if block_end < 0 {
5661 block_end = -1;
5662 }
5663
5664 d = partial_sorting_scan_right_to_left_32s_4k_block_omp(
5665 t,
5666 sa,
5667 k,
5668 buckets,
5669 d,
5670 &mut cache,
5671 block_end + 1,
5672 block_start - block_end,
5673 threads,
5674 );
5675
5676 if block_end < 0 {
5677 break;
5678 }
5679 block_start = block_end;
5680 }
5681
5682 d
5683}
5684
5685#[doc(hidden)]
5687pub fn partial_sorting_scan_right_to_left_32s_1k_omp(
5688 t: &[SaSint],
5689 sa: &mut [SaSint],
5690 n: SaSint,
5691 buckets: &mut [SaSint],
5692 threads: SaSint,
5693 thread_state: &mut [ThreadState],
5694) {
5695 if threads == 1 || n < 65_536 {
5696 partial_sorting_scan_right_to_left_32s_1k(t, sa, buckets, 0, n as FastSint);
5697 return;
5698 }
5699 if thread_state.is_empty() {
5700 partial_sorting_scan_right_to_left_32s_1k(t, sa, buckets, 0, n as FastSint);
5701 return;
5702 }
5703
5704 let threads_usize = usize::try_from(threads)
5705 .expect("threads must be non-negative")
5706 .max(1);
5707 let mut cache = vec![ThreadCache::default(); threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE];
5708 let mut block_start = FastSint::try_from(n).expect("n must fit FastSint") - 1;
5709 let block_span = FastSint::try_from(threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE)
5710 .expect("block span must fit FastSint");
5711 while block_start >= 0 {
5712 let mut block_end = block_start - block_span;
5713 if block_end < 0 {
5714 block_end = -1;
5715 }
5716
5717 partial_sorting_scan_right_to_left_32s_1k_block_omp(
5718 t,
5719 sa,
5720 buckets,
5721 &mut cache,
5722 block_end + 1,
5723 block_start - block_end,
5724 threads,
5725 );
5726
5727 if block_end < 0 {
5728 break;
5729 }
5730 block_start = block_end;
5731 }
5732}
5733
5734#[doc(hidden)]
5736pub fn partial_sorting_gather_lms_suffixes_32s_4k(
5737 sa: &mut [SaSint],
5738 omp_block_start: FastSint,
5739 omp_block_size: FastSint,
5740) -> FastSint {
5741 if omp_block_size <= 0 {
5742 return omp_block_start;
5743 }
5744
5745 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
5746 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
5747 let mut l = start;
5748
5749 for i in start..start + size {
5750 let s = sa[i] as SaUint;
5751 sa[l] = ((s.wrapping_sub(SUFFIX_GROUP_MARKER as SaUint)) & !(SUFFIX_GROUP_MARKER as SaUint))
5752 as SaSint;
5753 l += usize::from((s as SaSint) < 0);
5754 }
5755
5756 l as FastSint
5757}
5758
5759#[doc(hidden)]
5761pub fn partial_sorting_gather_lms_suffixes_32s_1k(
5762 sa: &mut [SaSint],
5763 omp_block_start: FastSint,
5764 omp_block_size: FastSint,
5765) -> FastSint {
5766 if omp_block_size <= 0 {
5767 return omp_block_start;
5768 }
5769
5770 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
5771 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
5772 let mut l = start;
5773
5774 for i in start..start + size {
5775 let s = sa[i];
5776 sa[l] = s & SAINT_MAX;
5777 l += usize::from(s < 0);
5778 }
5779
5780 l as FastSint
5781}
5782
5783#[doc(hidden)]
5785pub fn partial_sorting_gather_lms_suffixes_32s_4k_omp(
5786 sa: &mut [SaSint],
5787 n: SaSint,
5788 threads: SaSint,
5789 thread_state: &mut [ThreadState],
5790) {
5791 let n_usize = usize::try_from(n).expect("n must be non-negative");
5792 let omp_num_threads = if threads > 1 && n >= 65_536 {
5793 usize::try_from(threads)
5794 .expect("threads must be non-negative")
5795 .min(thread_state.len())
5796 .max(1)
5797 } else {
5798 1
5799 };
5800
5801 if omp_num_threads == 1 {
5802 let _ = partial_sorting_gather_lms_suffixes_32s_4k(sa, 0, n as FastSint);
5803 return;
5804 }
5805
5806 let omp_block_stride = (n_usize / omp_num_threads) & !15usize;
5807 for (thread_num, state) in thread_state.iter_mut().take(omp_num_threads).enumerate() {
5808 let block_start = thread_num * omp_block_stride;
5809 let block_size = if thread_num + 1 < omp_num_threads {
5810 omp_block_stride
5811 } else {
5812 n_usize - block_start
5813 };
5814 state.position = block_start as FastSint;
5815 state.count = partial_sorting_gather_lms_suffixes_32s_4k(
5816 sa,
5817 block_start as FastSint,
5818 block_size as FastSint,
5819 ) - block_start as FastSint;
5820 }
5821
5822 let mut position = 0usize;
5823 for (thread_num, state) in thread_state.iter().take(omp_num_threads).enumerate() {
5824 let count = usize::try_from(state.count).expect("count must be non-negative");
5825 let src = usize::try_from(state.position).expect("position must be non-negative");
5826 if thread_num > 0 && count > 0 {
5827 sa.copy_within(src..src + count, position);
5828 }
5829 position += count;
5830 }
5831}
5832
5833#[doc(hidden)]
5835pub fn partial_sorting_gather_lms_suffixes_32s_1k_omp(
5836 sa: &mut [SaSint],
5837 n: SaSint,
5838 threads: SaSint,
5839 thread_state: &mut [ThreadState],
5840) {
5841 let n_usize = usize::try_from(n).expect("n must be non-negative");
5842 let omp_num_threads = if threads > 1 && n >= 65_536 {
5843 usize::try_from(threads)
5844 .expect("threads must be non-negative")
5845 .min(thread_state.len())
5846 .max(1)
5847 } else {
5848 1
5849 };
5850
5851 if omp_num_threads == 1 {
5852 let _ = partial_sorting_gather_lms_suffixes_32s_1k(sa, 0, n as FastSint);
5853 return;
5854 }
5855
5856 let omp_block_stride = (n_usize / omp_num_threads) & !15usize;
5857 for (thread_num, state) in thread_state.iter_mut().take(omp_num_threads).enumerate() {
5858 let block_start = thread_num * omp_block_stride;
5859 let block_size = if thread_num + 1 < omp_num_threads {
5860 omp_block_stride
5861 } else {
5862 n_usize - block_start
5863 };
5864 state.position = block_start as FastSint;
5865 state.count = partial_sorting_gather_lms_suffixes_32s_1k(
5866 sa,
5867 block_start as FastSint,
5868 block_size as FastSint,
5869 ) - block_start as FastSint;
5870 }
5871
5872 let mut position = 0usize;
5873 for (thread_num, state) in thread_state.iter().take(omp_num_threads).enumerate() {
5874 let count = usize::try_from(state.count).expect("count must be non-negative");
5875 let src = usize::try_from(state.position).expect("position must be non-negative");
5876 if thread_num > 0 && count > 0 {
5877 sa.copy_within(src..src + count, position);
5878 }
5879 position += count;
5880 }
5881}
5882
5883#[doc(hidden)]
5885pub fn induce_partial_order_8u_omp(
5886 t: &[u8],
5887 sa: &mut [SaSint],
5888 n: SaSint,
5889 k: SaSint,
5890 flags: SaSint,
5891 buckets: &mut [SaSint],
5892 first_lms_suffix: SaSint,
5893 left_suffixes_count: SaSint,
5894 threads: SaSint,
5895 thread_state: &mut [ThreadState],
5896) {
5897 buckets[2 * ALPHABET_SIZE..4 * ALPHABET_SIZE].fill(0);
5898
5899 if (flags & LIBSAIS_FLAGS_GSA) != 0 {
5900 let left = 4 * ALPHABET_SIZE + buckets_index2(0, 1);
5901 let right = 4 * ALPHABET_SIZE + buckets_index2(1, 1);
5902 buckets[left] = buckets[right] - 1;
5903 flip_suffix_markers_omp(sa, buckets[left], threads);
5904 }
5905
5906 let d = partial_sorting_scan_left_to_right_8u_omp(
5907 t,
5908 sa,
5909 n,
5910 k,
5911 buckets,
5912 left_suffixes_count,
5913 0,
5914 threads,
5915 thread_state,
5916 );
5917 partial_sorting_shift_markers_8u_omp(sa, n, buckets, threads);
5918
5919 if (flags & LIBSAIS_FLAGS_GSA) != 0 {
5920 partial_gsa_scan_right_to_left_8u_omp(
5921 t,
5922 sa,
5923 n,
5924 k,
5925 buckets,
5926 first_lms_suffix,
5927 left_suffixes_count,
5928 d,
5929 threads,
5930 thread_state,
5931 );
5932
5933 if t[usize::try_from(first_lms_suffix).expect("first_lms_suffix must be non-negative")] == 0
5934 {
5935 let count = usize::try_from(buckets[buckets_index2(1, 1)] - 1)
5936 .expect("count must be non-negative");
5937 sa.copy_within(0..count, 1);
5938 sa[0] = first_lms_suffix | SAINT_MIN;
5939 }
5940
5941 buckets[buckets_index2(0, 1)] = 0;
5942 } else {
5943 partial_sorting_scan_right_to_left_8u_omp(
5944 t,
5945 sa,
5946 n,
5947 k,
5948 buckets,
5949 first_lms_suffix,
5950 left_suffixes_count,
5951 d,
5952 threads,
5953 thread_state,
5954 );
5955 }
5956}
5957
5958#[doc(hidden)]
5960pub fn induce_partial_order_32s_6k_omp(
5961 t: &[SaSint],
5962 sa: &mut [SaSint],
5963 n: SaSint,
5964 k: SaSint,
5965 buckets: &mut [SaSint],
5966 first_lms_suffix: SaSint,
5967 left_suffixes_count: SaSint,
5968 threads: SaSint,
5969 thread_state: &mut [ThreadState],
5970) {
5971 let d = partial_sorting_scan_left_to_right_32s_6k_omp(
5972 t,
5973 sa,
5974 n,
5975 buckets,
5976 left_suffixes_count,
5977 0,
5978 threads,
5979 thread_state,
5980 );
5981 partial_sorting_shift_markers_32s_6k_omp(sa, k, buckets, threads);
5982 partial_sorting_shift_buckets_32s_6k(k, buckets);
5983 let _ = partial_sorting_scan_right_to_left_32s_6k_omp(
5984 t,
5985 sa,
5986 n,
5987 buckets,
5988 first_lms_suffix,
5989 left_suffixes_count,
5990 d,
5991 threads,
5992 thread_state,
5993 );
5994}
5995
5996#[doc(hidden)]
5998pub fn induce_partial_order_32s_4k_omp(
5999 t: &[SaSint],
6000 sa: &mut [SaSint],
6001 n: SaSint,
6002 k: SaSint,
6003 buckets: &mut [SaSint],
6004 threads: SaSint,
6005 thread_state: &mut [ThreadState],
6006) {
6007 let zero_len = 2 * usize::try_from(k).expect("k must be non-negative");
6008 buckets[..zero_len].fill(0);
6009
6010 let d = partial_sorting_scan_left_to_right_32s_4k_omp(
6011 t,
6012 sa,
6013 n,
6014 k,
6015 buckets,
6016 0,
6017 threads,
6018 thread_state,
6019 );
6020 partial_sorting_shift_markers_32s_4k(sa, n);
6021 let _ = partial_sorting_scan_right_to_left_32s_4k_omp(
6022 t,
6023 sa,
6024 n,
6025 k,
6026 buckets,
6027 d,
6028 threads,
6029 thread_state,
6030 );
6031 partial_sorting_gather_lms_suffixes_32s_4k_omp(sa, n, threads, thread_state);
6032}
6033
6034#[doc(hidden)]
6036pub fn induce_partial_order_32s_2k_omp(
6037 t: &[SaSint],
6038 sa: &mut [SaSint],
6039 n: SaSint,
6040 k: SaSint,
6041 buckets: &mut [SaSint],
6042 threads: SaSint,
6043 thread_state: &mut [ThreadState],
6044) {
6045 let k_usize = usize::try_from(k).expect("k must be non-negative");
6046 let (left, right) = buckets.split_at_mut(k_usize);
6047 partial_sorting_scan_left_to_right_32s_1k_omp(t, sa, n, right, threads, thread_state);
6048 partial_sorting_scan_right_to_left_32s_1k_omp(t, sa, n, left, threads, thread_state);
6049 partial_sorting_gather_lms_suffixes_32s_1k_omp(sa, n, threads, thread_state);
6050}
6051
6052#[doc(hidden)]
6054pub fn induce_partial_order_32s_1k_omp(
6055 t: &[SaSint],
6056 sa: &mut [SaSint],
6057 n: SaSint,
6058 k: SaSint,
6059 buckets: &mut [SaSint],
6060 threads: SaSint,
6061 thread_state: &mut [ThreadState],
6062) {
6063 count_suffixes_32s(t, n, k, buckets);
6064 initialize_buckets_start_32s_1k(k, buckets);
6065 partial_sorting_scan_left_to_right_32s_1k_omp(t, sa, n, buckets, threads, thread_state);
6066
6067 count_suffixes_32s(t, n, k, buckets);
6068 initialize_buckets_end_32s_1k(k, buckets);
6069 partial_sorting_scan_right_to_left_32s_1k_omp(t, sa, n, buckets, threads, thread_state);
6070
6071 partial_sorting_gather_lms_suffixes_32s_1k_omp(sa, n, threads, thread_state);
6072}
6073
6074#[doc(hidden)]
6076pub fn renumber_lms_suffixes_8u(
6077 sa: &mut [SaSint],
6078 m: SaSint,
6079 mut name: SaSint,
6080 omp_block_start: FastSint,
6081 omp_block_size: FastSint,
6082) -> SaSint {
6083 if omp_block_size <= 0 {
6084 return name;
6085 }
6086
6087 let m_usize = usize::try_from(m).expect("m must be non-negative");
6088 let (sa_head, sam) = sa.split_at_mut(m_usize);
6089 let mut i = omp_block_start;
6090 let mut j = omp_block_start + omp_block_size - 64 - 3;
6091
6092 while i < j {
6093 let i0 = i as usize;
6094 let p0 = sa_head[i0];
6095 let d0 = ((p0 & SAINT_MAX) >> 1) as usize;
6096 sam[d0] = name | SAINT_MIN;
6097 name += SaSint::from(p0 < 0);
6098
6099 let p1 = sa_head[i0 + 1];
6100 let d1 = ((p1 & SAINT_MAX) >> 1) as usize;
6101 sam[d1] = name | SAINT_MIN;
6102 name += SaSint::from(p1 < 0);
6103
6104 let p2 = sa_head[i0 + 2];
6105 let d2 = ((p2 & SAINT_MAX) >> 1) as usize;
6106 sam[d2] = name | SAINT_MIN;
6107 name += SaSint::from(p2 < 0);
6108
6109 let p3 = sa_head[i0 + 3];
6110 let d3 = ((p3 & SAINT_MAX) >> 1) as usize;
6111 sam[d3] = name | SAINT_MIN;
6112 name += SaSint::from(p3 < 0);
6113
6114 i += 4;
6115 }
6116
6117 j += 64 + 3;
6118 while i < j {
6119 let p = sa_head[i as usize];
6120 let d = ((p & SAINT_MAX) >> 1) as usize;
6121 sam[d] = name | SAINT_MIN;
6122 name += SaSint::from(p < 0);
6123 i += 1;
6124 }
6125
6126 name
6127}
6128
6129#[doc(hidden)]
6131pub fn gather_marked_lms_suffixes(
6132 sa: &mut [SaSint],
6133 m: SaSint,
6134 l: FastSint,
6135 omp_block_start: FastSint,
6136 omp_block_size: FastSint,
6137) -> FastSint {
6138 if omp_block_size <= 0 {
6139 return l;
6140 }
6141
6142 let mut l = l - 1;
6143 let mut i = m as FastSint + omp_block_start + omp_block_size - 1;
6144 let mut j = m as FastSint + omp_block_start + 3;
6145
6146 while i >= j {
6147 let i0 = i as usize;
6148 let s0 = sa[i0];
6149 sa[l as usize] = s0 & SAINT_MAX;
6150 l -= FastSint::from(s0 < 0);
6151
6152 let s1 = sa[i0 - 1];
6153 sa[l as usize] = s1 & SAINT_MAX;
6154 l -= FastSint::from(s1 < 0);
6155
6156 let s2 = sa[i0 - 2];
6157 sa[l as usize] = s2 & SAINT_MAX;
6158 l -= FastSint::from(s2 < 0);
6159
6160 let s3 = sa[i0 - 3];
6161 sa[l as usize] = s3 & SAINT_MAX;
6162 l -= FastSint::from(s3 < 0);
6163
6164 i -= 4;
6165 }
6166
6167 j -= 3;
6168 while i >= j {
6169 let s = sa[i as usize];
6170 sa[l as usize] = s & SAINT_MAX;
6171 l -= FastSint::from(s < 0);
6172 i -= 1;
6173 }
6174
6175 l + 1
6176}
6177
6178#[doc(hidden)]
6180pub fn renumber_lms_suffixes_8u_omp(
6181 sa: &mut [SaSint],
6182 m: SaSint,
6183 threads: SaSint,
6184 thread_state: &mut [ThreadState],
6185) -> SaSint {
6186 let mut name = 0;
6187 let omp_num_threads = if threads > 1 && m >= 65_536 {
6188 usize::try_from(threads)
6189 .expect("threads must be non-negative")
6190 .min(thread_state.len())
6191 .max(1)
6192 } else {
6193 1
6194 };
6195 let omp_block_stride = (m as FastSint / omp_num_threads as FastSint) & !15;
6196
6197 if omp_num_threads == 1 {
6198 name = renumber_lms_suffixes_8u(sa, m, 0, 0, m as FastSint);
6199 } else {
6200 for omp_thread_num in 0..omp_num_threads {
6201 let omp_block_start = omp_thread_num as FastSint * omp_block_stride;
6202 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
6203 omp_block_stride
6204 } else {
6205 m as FastSint - omp_block_start
6206 };
6207 thread_state[omp_thread_num].count =
6208 count_negative_marked_suffixes(sa, omp_block_start, omp_block_size) as FastSint;
6209 }
6210
6211 for omp_thread_num in 0..omp_num_threads {
6212 let omp_block_start = omp_thread_num as FastSint * omp_block_stride;
6213 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
6214 omp_block_stride
6215 } else {
6216 m as FastSint - omp_block_start
6217 };
6218
6219 let mut count: FastSint = 0;
6220 for t in 0..omp_thread_num {
6221 count += thread_state[t].count;
6222 }
6223
6224 if omp_thread_num + 1 == omp_num_threads {
6225 name = (count + thread_state[omp_thread_num].count) as SaSint;
6226 }
6227
6228 let _ =
6229 renumber_lms_suffixes_8u(sa, m, count as SaSint, omp_block_start, omp_block_size);
6230 }
6231 }
6232
6233 name
6234}
6235
6236#[doc(hidden)]
6238pub fn gather_marked_lms_suffixes_omp(
6239 sa: &mut [SaSint],
6240 n: SaSint,
6241 m: SaSint,
6242 fs: SaSint,
6243 threads: SaSint,
6244 thread_state: &mut [ThreadState],
6245) {
6246 let n_fast = n as FastSint;
6247 let m_fast = m as FastSint;
6248 let omp_num_threads = if threads > 1 && n >= 131_072 {
6249 usize::try_from(threads)
6250 .expect("threads must be non-negative")
6251 .min(thread_state.len())
6252 .max(1)
6253 } else {
6254 1
6255 };
6256 let omp_block_stride = ((n_fast >> 1) / omp_num_threads as FastSint) & !15;
6257
6258 if omp_num_threads == 1 {
6259 let _ = gather_marked_lms_suffixes(sa, m, n_fast + fs as FastSint, 0, n_fast >> 1);
6260 } else {
6261 for omp_thread_num in 0..omp_num_threads {
6262 let omp_block_start = omp_thread_num as FastSint * omp_block_stride;
6263 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
6264 omp_block_stride
6265 } else {
6266 (n_fast >> 1) - omp_block_start
6267 };
6268
6269 if omp_thread_num < omp_num_threads - 1 {
6270 thread_state[omp_thread_num].position = gather_marked_lms_suffixes(
6271 sa,
6272 m,
6273 m_fast + omp_block_start + omp_block_size,
6274 omp_block_start,
6275 omp_block_size,
6276 );
6277 thread_state[omp_thread_num].count = m_fast + omp_block_start + omp_block_size
6278 - thread_state[omp_thread_num].position;
6279 } else {
6280 thread_state[omp_thread_num].position = gather_marked_lms_suffixes(
6281 sa,
6282 m,
6283 n_fast + fs as FastSint,
6284 omp_block_start,
6285 omp_block_size,
6286 );
6287 thread_state[omp_thread_num].count =
6288 n_fast + fs as FastSint - thread_state[omp_thread_num].position;
6289 }
6290 }
6291
6292 let mut position = n_fast + fs as FastSint;
6293 for t in (0..omp_num_threads).rev() {
6294 position -= thread_state[t].count;
6295 if t + 1 != omp_num_threads && thread_state[t].count > 0 {
6296 let src = usize::try_from(thread_state[t].position)
6297 .expect("position must be non-negative");
6298 let len =
6299 usize::try_from(thread_state[t].count).expect("count must be non-negative");
6300 let dst = usize::try_from(position).expect("position must be non-negative");
6301 sa.copy_within(src..src + len, dst);
6302 }
6303 }
6304 }
6305}
6306
6307#[doc(hidden)]
6309pub fn renumber_and_gather_lms_suffixes_omp(
6310 sa: &mut [SaSint],
6311 n: SaSint,
6312 m: SaSint,
6313 fs: SaSint,
6314 threads: SaSint,
6315 thread_state: &mut [ThreadState],
6316) -> SaSint {
6317 let m_usize = usize::try_from(m).expect("m must be non-negative");
6318 let half_n = usize::try_from(n >> 1).expect("n must be non-negative");
6319 sa[m_usize..m_usize + half_n].fill(0);
6320
6321 let name = renumber_lms_suffixes_8u_omp(sa, m, threads, thread_state);
6322 if name < m {
6323 gather_marked_lms_suffixes_omp(sa, n, m, fs, threads, thread_state);
6324 } else {
6325 let mut i = 0;
6326 while i < m_usize {
6327 sa[i] &= SAINT_MAX;
6328 i += 1;
6329 }
6330 }
6331
6332 name
6333}
6334
6335#[doc(hidden)]
6337pub fn renumber_distinct_lms_suffixes_32s_4k(
6338 sa: &mut [SaSint],
6339 m: SaSint,
6340 mut name: SaSint,
6341 omp_block_start: FastSint,
6342 omp_block_size: FastSint,
6343) -> SaSint {
6344 if omp_block_size <= 0 {
6345 return name;
6346 }
6347
6348 let prefetch_distance = 64usize;
6349 let m_usize = usize::try_from(m).expect("m must be non-negative");
6350 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
6351 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
6352 let (sa_head, sam) = sa.split_at_mut(m_usize);
6353 let mut i = start;
6354 let mut j = start
6355 .saturating_add(size)
6356 .saturating_sub(prefetch_distance + 3);
6357 let mut p0;
6358 let mut p1;
6359 let mut p2;
6360 let mut p3 = 0;
6361
6362 while i < j {
6363 p0 = sa_head[i];
6364 sa_head[i] = p0 & SAINT_MAX;
6365 sam[(sa_head[i] >> 1) as usize] = name | (p0 & p3 & SAINT_MIN);
6366 name += SaSint::from(p0 < 0);
6367
6368 p1 = sa_head[i + 1];
6369 sa_head[i + 1] = p1 & SAINT_MAX;
6370 sam[(sa_head[i + 1] >> 1) as usize] = name | (p1 & p0 & SAINT_MIN);
6371 name += SaSint::from(p1 < 0);
6372
6373 p2 = sa_head[i + 2];
6374 sa_head[i + 2] = p2 & SAINT_MAX;
6375 sam[(sa_head[i + 2] >> 1) as usize] = name | (p2 & p1 & SAINT_MIN);
6376 name += SaSint::from(p2 < 0);
6377
6378 p3 = sa_head[i + 3];
6379 sa_head[i + 3] = p3 & SAINT_MAX;
6380 sam[(sa_head[i + 3] >> 1) as usize] = name | (p3 & p2 & SAINT_MIN);
6381 name += SaSint::from(p3 < 0);
6382
6383 i += 4;
6384 }
6385
6386 j = start + size;
6387 while i < j {
6388 p2 = p3;
6389 p3 = sa_head[i];
6390 sa_head[i] = p3 & SAINT_MAX;
6391 sam[(sa_head[i] >> 1) as usize] = name | (p3 & p2 & SAINT_MIN);
6392 name += SaSint::from(p3 < 0);
6393 i += 1;
6394 }
6395
6396 name
6397}
6398
6399#[doc(hidden)]
6401pub fn mark_distinct_lms_suffixes_32s(
6402 sa: &mut [SaSint],
6403 m: SaSint,
6404 omp_block_start: FastSint,
6405 omp_block_size: FastSint,
6406) {
6407 if omp_block_size <= 0 {
6408 return;
6409 }
6410
6411 let m_usize = usize::try_from(m).expect("m must be non-negative");
6412 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
6413 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
6414 let mut i = m_usize + start;
6415 let mut j = m_usize + start + size.saturating_sub(3);
6416 let mut p3 = 0;
6417
6418 while i < j {
6419 let mut p0 = sa[i];
6420 sa[i] = p0 & (p3 | SAINT_MAX);
6421 p0 = if p0 == 0 { p3 } else { p0 };
6422
6423 let mut p1 = sa[i + 1];
6424 sa[i + 1] = p1 & (p0 | SAINT_MAX);
6425 p1 = if p1 == 0 { p0 } else { p1 };
6426
6427 let mut p2 = sa[i + 2];
6428 sa[i + 2] = p2 & (p1 | SAINT_MAX);
6429 p2 = if p2 == 0 { p1 } else { p2 };
6430
6431 p3 = sa[i + 3];
6432 sa[i + 3] = p3 & (p2 | SAINT_MAX);
6433 p3 = if p3 == 0 { p2 } else { p3 };
6434
6435 i += 4;
6436 }
6437
6438 j = m_usize + start + size;
6439 while i < j {
6440 let p2 = p3;
6441 p3 = sa[i];
6442 sa[i] = p3 & (p2 | SAINT_MAX);
6443 p3 = if p3 == 0 { p2 } else { p3 };
6444 i += 1;
6445 }
6446}
6447
6448#[doc(hidden)]
6450pub fn clamp_lms_suffixes_length_32s(
6451 sa: &mut [SaSint],
6452 m: SaSint,
6453 omp_block_start: FastSint,
6454 omp_block_size: FastSint,
6455) {
6456 if omp_block_size <= 0 {
6457 return;
6458 }
6459
6460 let m_usize = usize::try_from(m).expect("m must be non-negative");
6461 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
6462 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
6463 let mut i = m_usize + start;
6464 let mut j = m_usize + start + size.saturating_sub(3);
6465
6466 while i < j {
6467 let s0 = sa[i];
6468 sa[i] = if s0 < 0 { s0 } else { 0 } & SAINT_MAX;
6469
6470 let s1 = sa[i + 1];
6471 sa[i + 1] = if s1 < 0 { s1 } else { 0 } & SAINT_MAX;
6472
6473 let s2 = sa[i + 2];
6474 sa[i + 2] = if s2 < 0 { s2 } else { 0 } & SAINT_MAX;
6475
6476 let s3 = sa[i + 3];
6477 sa[i + 3] = if s3 < 0 { s3 } else { 0 } & SAINT_MAX;
6478
6479 i += 4;
6480 }
6481
6482 j = m_usize + start + size;
6483 while i < j {
6484 let s = sa[i];
6485 sa[i] = if s < 0 { s } else { 0 } & SAINT_MAX;
6486 i += 1;
6487 }
6488}
6489
6490#[doc(hidden)]
6492pub fn renumber_distinct_lms_suffixes_32s_4k_omp(
6493 sa: &mut [SaSint],
6494 m: SaSint,
6495 threads: SaSint,
6496 thread_state: &mut [ThreadState],
6497) -> SaSint {
6498 let mut name = 0;
6499 let m_usize = usize::try_from(m).expect("m must be non-negative");
6500 let omp_num_threads = if threads > 1 && m >= 65_536 {
6501 usize::try_from(threads)
6502 .expect("threads must be non-negative")
6503 .min(thread_state.len())
6504 .max(1)
6505 } else {
6506 1
6507 };
6508 let omp_block_stride = (m_usize / omp_num_threads) & !15usize;
6509
6510 if omp_num_threads == 1 {
6511 let omp_block_start = 0usize;
6512 let omp_block_size = m_usize - omp_block_start;
6513 name = renumber_distinct_lms_suffixes_32s_4k(
6514 sa,
6515 m,
6516 1,
6517 omp_block_start as FastSint,
6518 omp_block_size as FastSint,
6519 );
6520 } else {
6521 for omp_thread_num in 0..omp_num_threads {
6522 let omp_block_start = omp_thread_num * omp_block_stride;
6523 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
6524 omp_block_stride
6525 } else {
6526 m_usize - omp_block_start
6527 };
6528 thread_state[omp_thread_num].count = count_negative_marked_suffixes(
6529 sa,
6530 omp_block_start as FastSint,
6531 omp_block_size as FastSint,
6532 ) as FastSint;
6533 }
6534
6535 for omp_thread_num in 0..omp_num_threads {
6536 let omp_block_start = omp_thread_num * omp_block_stride;
6537 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
6538 omp_block_stride
6539 } else {
6540 m_usize - omp_block_start
6541 };
6542
6543 let mut count: FastSint = 1;
6544 for t in 0..omp_thread_num {
6545 count += thread_state[t].count;
6546 }
6547
6548 if omp_thread_num + 1 == omp_num_threads {
6549 name = (count + thread_state[omp_thread_num].count) as SaSint;
6550 }
6551
6552 let _ = renumber_distinct_lms_suffixes_32s_4k(
6553 sa,
6554 m,
6555 count as SaSint,
6556 omp_block_start as FastSint,
6557 omp_block_size as FastSint,
6558 );
6559 }
6560 }
6561
6562 name - 1
6563}
6564
6565#[doc(hidden)]
6567pub fn mark_distinct_lms_suffixes_32s_omp(
6568 sa: &mut [SaSint],
6569 n: SaSint,
6570 m: SaSint,
6571 threads: SaSint,
6572) {
6573 let half_n = usize::try_from(n >> 1).expect("n must be non-negative");
6574 let omp_num_threads = if threads > 1 && n >= 131_072 {
6575 usize::try_from(threads)
6576 .expect("threads must be non-negative")
6577 .max(1)
6578 } else {
6579 1
6580 };
6581 let omp_block_stride = (half_n / omp_num_threads) & !15usize;
6582
6583 for omp_thread_num in 0..omp_num_threads {
6584 let omp_block_start = omp_thread_num * omp_block_stride;
6585 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
6586 omp_block_stride
6587 } else {
6588 half_n - omp_block_start
6589 };
6590 mark_distinct_lms_suffixes_32s(
6591 sa,
6592 m,
6593 omp_block_start as FastSint,
6594 omp_block_size as FastSint,
6595 );
6596 }
6597}
6598
6599#[doc(hidden)]
6601pub fn clamp_lms_suffixes_length_32s_omp(sa: &mut [SaSint], n: SaSint, m: SaSint, threads: SaSint) {
6602 let half_n = usize::try_from(n >> 1).expect("n must be non-negative");
6603 let omp_num_threads = if threads > 1 && n >= 131_072 {
6604 usize::try_from(threads)
6605 .expect("threads must be non-negative")
6606 .max(1)
6607 } else {
6608 1
6609 };
6610 let omp_block_stride = (half_n / omp_num_threads) & !15usize;
6611
6612 for omp_thread_num in 0..omp_num_threads {
6613 let omp_block_start = omp_thread_num * omp_block_stride;
6614 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
6615 omp_block_stride
6616 } else {
6617 half_n - omp_block_start
6618 };
6619 clamp_lms_suffixes_length_32s(
6620 sa,
6621 m,
6622 omp_block_start as FastSint,
6623 omp_block_size as FastSint,
6624 );
6625 }
6626}
6627
6628#[doc(hidden)]
6630pub fn renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(
6631 sa: &mut [SaSint],
6632 n: SaSint,
6633 m: SaSint,
6634 threads: SaSint,
6635 thread_state: &mut [ThreadState],
6636) -> SaSint {
6637 let m_usize = usize::try_from(m).expect("m must be non-negative");
6638 let half_n = usize::try_from(n >> 1).expect("n must be non-negative");
6639 sa[m_usize..m_usize + half_n].fill(0);
6640
6641 let name = renumber_distinct_lms_suffixes_32s_4k_omp(sa, m, threads, thread_state);
6642 if name < m {
6643 mark_distinct_lms_suffixes_32s_omp(sa, n, m, threads);
6644 }
6645
6646 name
6647}
6648
6649#[doc(hidden)]
6651pub fn renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(
6652 t: &[SaSint],
6653 sa: &mut [SaSint],
6654 n: SaSint,
6655 m: SaSint,
6656 threads: SaSint,
6657) -> SaSint {
6658 let m_usize = usize::try_from(m).expect("m must be non-negative");
6659 let n_usize = usize::try_from(n).expect("n must be non-negative");
6660
6661 let _ = gather_lms_suffixes_32s(t, sa, n);
6662
6663 let zero_len = n_usize
6664 .checked_sub(m_usize)
6665 .and_then(|v| v.checked_sub(m_usize))
6666 .expect("n must be at least 2*m");
6667 sa[m_usize..m_usize + zero_len].fill(0);
6668
6669 {
6670 let prefetch_distance: FastSint = 64;
6671 let mut i = n as FastSint - m as FastSint;
6672 let mut j = n as FastSint - 1 - prefetch_distance - 3;
6673
6674 while i < j {
6675 let iu = i as usize;
6676 let s0 = (sa[iu] as SaUint >> 1) as usize;
6677 let s1 = (sa[iu + 1] as SaUint >> 1) as usize;
6678 let s2 = (sa[iu + 2] as SaUint >> 1) as usize;
6679 let s3 = (sa[iu + 3] as SaUint >> 1) as usize;
6680
6681 sa[m_usize + s0] = sa[iu + 1] - sa[iu] + 1 + SAINT_MIN;
6682 sa[m_usize + s1] = sa[iu + 2] - sa[iu + 1] + 1 + SAINT_MIN;
6683 sa[m_usize + s2] = sa[iu + 3] - sa[iu + 2] + 1 + SAINT_MIN;
6684 sa[m_usize + s3] = sa[iu + 4] - sa[iu + 3] + 1 + SAINT_MIN;
6685 i += 4;
6686 }
6687
6688 j += prefetch_distance + 3;
6689 while i < j {
6690 let iu = i as usize;
6691 let s = (sa[iu] as SaUint >> 1) as usize;
6692 sa[m_usize + s] = sa[iu + 1] - sa[iu] + 1 + SAINT_MIN;
6693 i += 1;
6694 }
6695
6696 let tail = (sa[n_usize - 1] as SaUint >> 1) as usize;
6697 sa[m_usize + tail] = 1 + SAINT_MIN;
6698 }
6699
6700 clamp_lms_suffixes_length_32s_omp(sa, n, m, threads);
6701
6702 let mut name = 1;
6703 if m_usize > 0 {
6704 let (sa_head, sam) = sa.split_at_mut(m_usize);
6705 let mut i = 1usize;
6706 let prefetch_distance = 64usize;
6707 let mut j = m_usize.saturating_sub(prefetch_distance + 1);
6708 let mut p = usize::try_from(sa_head[0]).expect("suffix index must be non-negative");
6709 let mut plen = sam[p >> 1];
6710 let mut pdiff = SAINT_MIN;
6711
6712 while i < j {
6713 let q = usize::try_from(sa_head[i]).expect("suffix index must be non-negative");
6714 let qlen = sam[q >> 1];
6715 let mut qdiff = SAINT_MIN;
6716 if plen == qlen {
6717 let mut l = 0usize;
6718 while l < qlen as usize {
6719 if t[p + l] != t[q + l] {
6720 break;
6721 }
6722 l += 1;
6723 }
6724 qdiff = ((l as SaSint) - qlen) & SAINT_MIN;
6725 }
6726 sam[p >> 1] = name | (pdiff & qdiff);
6727 name += SaSint::from(qdiff < 0);
6728
6729 p = usize::try_from(sa_head[i + 1]).expect("suffix index must be non-negative");
6730 plen = sam[p >> 1];
6731 pdiff = SAINT_MIN;
6732 if qlen == plen {
6733 let mut l = 0usize;
6734 while l < plen as usize {
6735 if t[q + l] != t[p + l] {
6736 break;
6737 }
6738 l += 1;
6739 }
6740 pdiff = ((l as SaSint) - plen) & SAINT_MIN;
6741 }
6742 sam[q >> 1] = name | (qdiff & pdiff);
6743 name += SaSint::from(pdiff < 0);
6744 i += 2;
6745 }
6746
6747 j = m_usize;
6748 while i < j {
6749 let q = usize::try_from(sa_head[i]).expect("suffix index must be non-negative");
6750 let qlen = sam[q >> 1];
6751 let mut qdiff = SAINT_MIN;
6752 if plen == qlen {
6753 let mut l = 0usize;
6754 while l < plen as usize {
6755 if t[p + l] != t[q + l] {
6756 break;
6757 }
6758 l += 1;
6759 }
6760 qdiff = ((l as SaSint) - plen) & SAINT_MIN;
6761 }
6762 sam[p >> 1] = name | (pdiff & qdiff);
6763 name += SaSint::from(qdiff < 0);
6764
6765 p = q;
6766 plen = qlen;
6767 pdiff = qdiff;
6768 i += 1;
6769 }
6770
6771 sam[p >> 1] = name | pdiff;
6772 name += 1;
6773 }
6774
6775 if name <= m {
6776 mark_distinct_lms_suffixes_32s_omp(sa, n, m, threads);
6777 }
6778
6779 name - 1
6780}
6781
6782#[doc(hidden)]
6784pub fn reconstruct_lms_suffixes(
6785 sa: &mut [SaSint],
6786 n: SaSint,
6787 m: SaSint,
6788 omp_block_start: FastSint,
6789 omp_block_size: FastSint,
6790) {
6791 if omp_block_size <= 0 {
6792 return;
6793 }
6794
6795 let prefetch_distance: FastSint = 64;
6796 let base = (n - m) as usize;
6797 let mut i = omp_block_start;
6798 let mut j = omp_block_start + omp_block_size - prefetch_distance - 3;
6799
6800 while i < j {
6801 let iu = i as usize;
6802 let s0 = sa[iu] as usize;
6803 let s1 = sa[iu + 1] as usize;
6804 let s2 = sa[iu + 2] as usize;
6805 let s3 = sa[iu + 3] as usize;
6806 sa[iu] = sa[base + s0];
6807 sa[iu + 1] = sa[base + s1];
6808 sa[iu + 2] = sa[base + s2];
6809 sa[iu + 3] = sa[base + s3];
6810 i += 4;
6811 }
6812
6813 j += prefetch_distance + 3;
6814 while i < j {
6815 let iu = i as usize;
6816 let s = sa[iu] as usize;
6817 sa[iu] = sa[base + s];
6818 i += 1;
6819 }
6820}
6821
6822#[doc(hidden)]
6824pub fn reconstruct_lms_suffixes_omp(sa: &mut [SaSint], n: SaSint, m: SaSint, threads: SaSint) {
6825 let m_usize = usize::try_from(m).expect("m must be non-negative");
6826 let omp_num_threads = if threads > 1 && m >= 65_536 {
6827 usize::try_from(threads)
6828 .expect("threads must be non-negative")
6829 .max(1)
6830 } else {
6831 1
6832 };
6833 let omp_block_stride = (m_usize / omp_num_threads) & !15usize;
6834
6835 for omp_thread_num in 0..omp_num_threads {
6836 let omp_block_start = omp_thread_num * omp_block_stride;
6837 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
6838 omp_block_stride
6839 } else {
6840 m_usize - omp_block_start
6841 };
6842 reconstruct_lms_suffixes(
6843 sa,
6844 n,
6845 m,
6846 omp_block_start as FastSint,
6847 omp_block_size as FastSint,
6848 );
6849 }
6850}
6851
6852#[doc(hidden)]
6854pub fn place_lms_suffixes_interval_8u(
6855 sa: &mut [SaSint],
6856 n: SaSint,
6857 mut m: SaSint,
6858 flags: SaSint,
6859 buckets: &mut [SaSint],
6860) {
6861 let bucket_end_base = 7 * ALPHABET_SIZE;
6862 if (flags & LIBSAIS_FLAGS_GSA) != 0 {
6863 buckets[bucket_end_base] -= 1;
6864 }
6865
6866 let mut j = usize::try_from(n).expect("n must be non-negative");
6867 for c in (0..ALPHABET_SIZE - 1).rev() {
6868 let l = usize::try_from(
6869 buckets[buckets_index2(c, 1) + buckets_index2(1, 0)] - buckets[buckets_index2(c, 1)],
6870 )
6871 .expect("interval length must be non-negative");
6872 if l > 0 {
6873 let i = usize::try_from(buckets[bucket_end_base + c])
6874 .expect("bucket end must be non-negative");
6875 if j > i {
6876 sa[i..j].fill(0);
6877 }
6878
6879 let new_j = i - l;
6880 let src_end = usize::try_from(m).expect("m must be non-negative");
6881 let src_start = src_end - l;
6882 sa.copy_within(src_start..src_end, new_j);
6883 m -= l as SaSint;
6884 j = new_j;
6885 }
6886 }
6887
6888 sa[..j].fill(0);
6889
6890 if (flags & LIBSAIS_FLAGS_GSA) != 0 {
6891 buckets[bucket_end_base] += 1;
6892 }
6893}
6894
6895#[doc(hidden)]
6897pub fn place_lms_suffixes_interval_32s_4k(
6898 sa: &mut [SaSint],
6899 n: SaSint,
6900 k: SaSint,
6901 mut m: SaSint,
6902 buckets: &[SaSint],
6903) {
6904 let k_usize = usize::try_from(k).expect("k must be non-negative");
6905 let bucket_end = &buckets[3 * k_usize..4 * k_usize];
6906
6907 let mut j = usize::try_from(n).expect("n must be non-negative");
6908 for c in (0..k_usize - 1).rev() {
6909 let l = usize::try_from(
6910 buckets[buckets_index2(c, 1) + buckets_index2(1, 0)] - buckets[buckets_index2(c, 1)],
6911 )
6912 .expect("interval length must be non-negative");
6913 if l > 0 {
6914 let i = usize::try_from(bucket_end[c]).expect("bucket end must be non-negative");
6915 if j > i {
6916 sa[i..j].fill(0);
6917 }
6918
6919 let new_j = i - l;
6920 let src_end = usize::try_from(m).expect("m must be non-negative");
6921 let src_start = src_end - l;
6922 sa.copy_within(src_start..src_end, new_j);
6923 m -= l as SaSint;
6924 j = new_j;
6925 }
6926 }
6927
6928 sa[..j].fill(0);
6929}
6930
6931#[doc(hidden)]
6933pub fn place_lms_suffixes_interval_32s_2k(
6934 sa: &mut [SaSint],
6935 n: SaSint,
6936 k: SaSint,
6937 mut m: SaSint,
6938 buckets: &[SaSint],
6939) {
6940 let k_usize = usize::try_from(k).expect("k must be non-negative");
6941 let mut j = usize::try_from(n).expect("n must be non-negative");
6942
6943 if k_usize > 1 {
6944 let mut c = buckets_index2(k_usize - 2, 0) as isize;
6945 while c >= buckets_index2(0, 0) as isize {
6946 let c_usize = c as usize;
6947 let l = usize::try_from(
6948 buckets[c_usize + buckets_index2(1, 1)] - buckets[c_usize + buckets_index2(0, 1)],
6949 )
6950 .expect("interval length must be non-negative");
6951 if l > 0 {
6952 let i =
6953 usize::try_from(buckets[c_usize]).expect("bucket start must be non-negative");
6954 if j > i {
6955 sa[i..j].fill(0);
6956 }
6957
6958 let new_j = i - l;
6959 let src_end = usize::try_from(m).expect("m must be non-negative");
6960 let src_start = src_end - l;
6961 sa.copy_within(src_start..src_end, new_j);
6962 m -= l as SaSint;
6963 j = new_j;
6964 }
6965 c -= buckets_index2(1, 0) as isize;
6966 }
6967 }
6968
6969 sa[..j].fill(0);
6970}
6971
6972#[doc(hidden)]
6974pub fn place_lms_suffixes_interval_32s_1k(
6975 t: &[SaSint],
6976 sa: &mut [SaSint],
6977 k: SaSint,
6978 m: SaSint,
6979 buckets: &[SaSint],
6980) {
6981 let mut c = k - 1;
6982 let c_usize = usize::try_from(c).expect("k must be positive");
6983 let mut l = usize::try_from(buckets[c_usize]).expect("bucket end must be non-negative");
6984
6985 let m_usize = usize::try_from(m).expect("m must be non-negative");
6986 for i in (0..m_usize).rev() {
6987 let p = usize::try_from(sa[i]).expect("suffix index must be non-negative");
6988 let tp = t[p];
6989 if tp != c {
6990 c = tp;
6991 let bucket = usize::try_from(c).expect("bucket index must be non-negative");
6992 let bucket_pos =
6993 usize::try_from(buckets[bucket]).expect("bucket end must be non-negative");
6994 if l > bucket_pos {
6995 sa[bucket_pos..l].fill(0);
6996 }
6997 l = bucket_pos;
6998 }
6999 l -= 1;
7000 sa[l] = p as SaSint;
7001 }
7002
7003 sa[..l].fill(0);
7004}
7005
7006#[doc(hidden)]
7008pub fn place_lms_suffixes_histogram_32s_6k(
7009 sa: &mut [SaSint],
7010 n: SaSint,
7011 k: SaSint,
7012 mut m: SaSint,
7013 buckets: &[SaSint],
7014) {
7015 let k_usize = usize::try_from(k).expect("k must be non-negative");
7016 let bucket_end = &buckets[5 * k_usize..6 * k_usize];
7017
7018 let mut j = usize::try_from(n).expect("n must be non-negative");
7019 for c in (0..k_usize - 1).rev() {
7020 let l = usize::try_from(buckets[buckets_index4(c, 1)])
7021 .expect("histogram length must be non-negative");
7022 if l > 0 {
7023 let i = usize::try_from(bucket_end[c]).expect("bucket end must be non-negative");
7024 if j > i {
7025 sa[i..j].fill(0);
7026 }
7027
7028 let new_j = i - l;
7029 let src_end = usize::try_from(m).expect("m must be non-negative");
7030 let src_start = src_end - l;
7031 sa.copy_within(src_start..src_end, new_j);
7032 m -= l as SaSint;
7033 j = new_j;
7034 }
7035 }
7036
7037 sa[..j].fill(0);
7038}
7039
7040#[doc(hidden)]
7042pub fn place_lms_suffixes_histogram_32s_4k(
7043 sa: &mut [SaSint],
7044 n: SaSint,
7045 k: SaSint,
7046 mut m: SaSint,
7047 buckets: &[SaSint],
7048) {
7049 let k_usize = usize::try_from(k).expect("k must be non-negative");
7050 let bucket_end = &buckets[3 * k_usize..4 * k_usize];
7051
7052 let mut j = usize::try_from(n).expect("n must be non-negative");
7053 for c in (0..k_usize - 1).rev() {
7054 let l = usize::try_from(buckets[buckets_index2(c, 1)])
7055 .expect("histogram length must be non-negative");
7056 if l > 0 {
7057 let i = usize::try_from(bucket_end[c]).expect("bucket end must be non-negative");
7058 if j > i {
7059 sa[i..j].fill(0);
7060 }
7061
7062 let new_j = i - l;
7063 let src_end = usize::try_from(m).expect("m must be non-negative");
7064 let src_start = src_end - l;
7065 sa.copy_within(src_start..src_end, new_j);
7066 m -= l as SaSint;
7067 j = new_j;
7068 }
7069 }
7070
7071 sa[..j].fill(0);
7072}
7073
7074#[doc(hidden)]
7076pub fn place_lms_suffixes_histogram_32s_2k(
7077 sa: &mut [SaSint],
7078 n: SaSint,
7079 k: SaSint,
7080 mut m: SaSint,
7081 buckets: &[SaSint],
7082) {
7083 let k_usize = usize::try_from(k).expect("k must be non-negative");
7084 let mut j = usize::try_from(n).expect("n must be non-negative");
7085
7086 if k_usize > 1 {
7087 let mut c = buckets_index2(k_usize - 2, 0) as isize;
7088 while c >= buckets_index2(0, 0) as isize {
7089 let c_usize = c as usize;
7090 let l = usize::try_from(buckets[c_usize + buckets_index2(0, 1)])
7091 .expect("histogram length must be non-negative");
7092 if l > 0 {
7093 let i =
7094 usize::try_from(buckets[c_usize]).expect("bucket start must be non-negative");
7095 if j > i {
7096 sa[i..j].fill(0);
7097 }
7098
7099 let new_j = i - l;
7100 let src_end = usize::try_from(m).expect("m must be non-negative");
7101 let src_start = src_end - l;
7102 sa.copy_within(src_start..src_end, new_j);
7103 m -= l as SaSint;
7104 j = new_j;
7105 }
7106 c -= buckets_index2(1, 0) as isize;
7107 }
7108 }
7109
7110 sa[..j].fill(0);
7111}
7112
7113#[doc(hidden)]
7115pub fn final_bwt_scan_left_to_right_8u(
7116 t: &[u8],
7117 sa: &mut [SaSint],
7118 induction_bucket: &mut [SaSint],
7119 omp_block_start: FastSint,
7120 omp_block_size: FastSint,
7121) {
7122 if omp_block_size <= 0 {
7123 return;
7124 }
7125
7126 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
7127 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
7128 for i in start..start + size {
7129 let mut p = sa[i];
7130 sa[i] = p & SAINT_MAX;
7131 if p > 0 {
7132 p -= 1;
7133 let p_usize = usize::try_from(p).expect("suffix index must be non-negative");
7134 sa[i] = t[p_usize] as SaSint | SAINT_MIN;
7135 let bucket = t[p_usize] as usize;
7136 let slot = usize::try_from(induction_bucket[bucket])
7137 .expect("bucket slot must be non-negative");
7138 sa[slot] = p
7139 | ((usize::from(t[p_usize - usize::from(p > 0)] < t[p_usize]) as SaSint)
7140 << (SAINT_BIT - 1));
7141 induction_bucket[bucket] += 1;
7142 }
7143 }
7144}
7145
7146#[doc(hidden)]
7148pub fn final_bwt_aux_scan_left_to_right_8u(
7149 t: &[u8],
7150 sa: &mut [SaSint],
7151 rm: SaSint,
7152 i_out: &mut [SaSint],
7153 induction_bucket: &mut [SaSint],
7154 omp_block_start: FastSint,
7155 omp_block_size: FastSint,
7156) {
7157 if omp_block_size <= 0 {
7158 return;
7159 }
7160
7161 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
7162 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
7163 for i in start..start + size {
7164 let mut p = sa[i];
7165 sa[i] = p & SAINT_MAX;
7166 if p > 0 {
7167 p -= 1;
7168 let p_usize = usize::try_from(p).expect("suffix index must be non-negative");
7169 sa[i] = t[p_usize] as SaSint | SAINT_MIN;
7170 let bucket = t[p_usize] as usize;
7171 let slot = usize::try_from(induction_bucket[bucket])
7172 .expect("bucket slot must be non-negative");
7173 sa[slot] = p
7174 | ((usize::from(t[p_usize - usize::from(p > 0)] < t[p_usize]) as SaSint)
7175 << (SAINT_BIT - 1));
7176 induction_bucket[bucket] += 1;
7177 if (p & rm) == 0 {
7178 let out_idx =
7179 usize::try_from(p / (rm + 1)).expect("sample index must be non-negative");
7180 i_out[out_idx] = induction_bucket[bucket];
7181 }
7182 }
7183 }
7184}
7185
7186#[doc(hidden)]
7188pub fn final_sorting_scan_left_to_right_8u(
7189 t: &[u8],
7190 sa: &mut [SaSint],
7191 induction_bucket: &mut [SaSint],
7192 omp_block_start: FastSint,
7193 omp_block_size: FastSint,
7194) {
7195 if omp_block_size <= 0 {
7196 return;
7197 }
7198
7199 let prefetch_distance = 64usize;
7200 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
7201 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
7202
7203 let mut i = start;
7204 let mut j = if size > prefetch_distance + 1 {
7205 start + size - (prefetch_distance + 1)
7206 } else {
7207 start
7208 };
7209 while i < j {
7210 let mut p0 = sa[i];
7211 sa[i] = p0 ^ SAINT_MIN;
7212 if p0 > 0 {
7213 p0 -= 1;
7214 let p0_usize = p0 as usize;
7215 let bucket0 = t[p0_usize] as usize;
7216 let slot0 = induction_bucket[bucket0] as usize;
7217 sa[slot0] = p0
7218 | ((usize::from(t[p0_usize - usize::from(p0 > 0)] < t[p0_usize]) as SaSint)
7219 << (SAINT_BIT - 1));
7220 induction_bucket[bucket0] += 1;
7221 }
7222
7223 let mut p1 = sa[i + 1];
7224 sa[i + 1] = p1 ^ SAINT_MIN;
7225 if p1 > 0 {
7226 p1 -= 1;
7227 let p1_usize = p1 as usize;
7228 let bucket1 = t[p1_usize] as usize;
7229 let slot1 = induction_bucket[bucket1] as usize;
7230 sa[slot1] = p1
7231 | ((usize::from(t[p1_usize - usize::from(p1 > 0)] < t[p1_usize]) as SaSint)
7232 << (SAINT_BIT - 1));
7233 induction_bucket[bucket1] += 1;
7234 }
7235
7236 i += 2;
7237 }
7238
7239 j = start + size;
7240 while i < j {
7241 let mut p = sa[i];
7242 sa[i] = p ^ SAINT_MIN;
7243 if p > 0 {
7244 p -= 1;
7245 let p_usize = p as usize;
7246 let bucket = t[p_usize] as usize;
7247 let slot = induction_bucket[bucket] as usize;
7248 sa[slot] = p
7249 | ((usize::from(t[p_usize - usize::from(p > 0)] < t[p_usize]) as SaSint)
7250 << (SAINT_BIT - 1));
7251 induction_bucket[bucket] += 1;
7252 }
7253 i += 1;
7254 }
7255}
7256
7257#[doc(hidden)]
7259pub fn final_sorting_scan_left_to_right_32s(
7260 t: &[SaSint],
7261 sa: &mut [SaSint],
7262 induction_bucket: &mut [SaSint],
7263 omp_block_start: FastSint,
7264 omp_block_size: FastSint,
7265) {
7266 if omp_block_size <= 0 {
7267 return;
7268 }
7269
7270 let prefetch_distance: FastSint = 64;
7271 let mut i = omp_block_start;
7272 let mut j = omp_block_start + omp_block_size - 2 * prefetch_distance - 1;
7273
7274 while i < j {
7275 let i0 = i as usize;
7276 let mut p0 = sa[i0];
7277 sa[i0] = p0 ^ SAINT_MIN;
7278 if p0 > 0 {
7279 p0 -= 1;
7280 let p0u = p0 as usize;
7281 let bucket0 = t[p0u] as usize;
7282 let slot0 = induction_bucket[bucket0] as usize;
7283 sa[slot0] = p0
7284 | ((usize::from(t[p0u - usize::from(p0 > 0)] < t[p0u]) as SaSint)
7285 << (SAINT_BIT - 1));
7286 induction_bucket[bucket0] += 1;
7287 }
7288
7289 let i1 = (i + 1) as usize;
7290 let mut p1 = sa[i1];
7291 sa[i1] = p1 ^ SAINT_MIN;
7292 if p1 > 0 {
7293 p1 -= 1;
7294 let p1u = p1 as usize;
7295 let bucket1 = t[p1u] as usize;
7296 let slot1 = induction_bucket[bucket1] as usize;
7297 sa[slot1] = p1
7298 | ((usize::from(t[p1u - usize::from(p1 > 0)] < t[p1u]) as SaSint)
7299 << (SAINT_BIT - 1));
7300 induction_bucket[bucket1] += 1;
7301 }
7302 i += 2;
7303 }
7304
7305 j += 2 * prefetch_distance + 1;
7306 while i < j {
7307 let iu = i as usize;
7308 let mut p = sa[iu];
7309 sa[iu] = p ^ SAINT_MIN;
7310 if p > 0 {
7311 p -= 1;
7312 let pu = p as usize;
7313 let bucket = t[pu] as usize;
7314 let slot = induction_bucket[bucket] as usize;
7315 sa[slot] = p
7316 | ((usize::from(t[pu - usize::from(p > 0)] < t[pu]) as SaSint) << (SAINT_BIT - 1));
7317 induction_bucket[bucket] += 1;
7318 }
7319 i += 1;
7320 }
7321}
7322
7323#[doc(hidden)]
7325pub fn final_bwt_scan_left_to_right_8u_block_prepare(
7326 t: &[u8],
7327 sa: &mut [SaSint],
7328 k: SaSint,
7329 buckets: &mut [SaSint],
7330 cache: &mut [ThreadCache],
7331 omp_block_start: FastSint,
7332 omp_block_size: FastSint,
7333) -> FastSint {
7334 if omp_block_size <= 0 {
7335 return 0;
7336 }
7337
7338 let k_usize = usize::try_from(k).expect("k must be non-negative");
7339 buckets[..k_usize].fill(0);
7340
7341 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
7342 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
7343 let mut count = 0usize;
7344 for i in start..start + size {
7345 let mut p = sa[i];
7346 sa[i] = p & SAINT_MAX;
7347 if p > 0 {
7348 p -= 1;
7349 let p_usize = usize::try_from(p).expect("suffix index must be non-negative");
7350 let symbol = t[p_usize] as usize;
7351 sa[i] = t[p_usize] as SaSint | SAINT_MIN;
7352 buckets[symbol] += 1;
7353 cache[count].symbol = symbol as SaSint;
7354 cache[count].index = p
7355 | ((usize::from(t[p_usize - usize::from(p > 0)] < t[p_usize]) as SaSint)
7356 << (SAINT_BIT - 1));
7357 count += 1;
7358 }
7359 }
7360
7361 count as FastSint
7362}
7363
7364#[doc(hidden)]
7366pub fn final_sorting_scan_left_to_right_8u_block_prepare(
7367 t: &[u8],
7368 sa: &mut [SaSint],
7369 k: SaSint,
7370 buckets: &mut [SaSint],
7371 cache: &mut [ThreadCache],
7372 omp_block_start: FastSint,
7373 omp_block_size: FastSint,
7374) -> FastSint {
7375 if omp_block_size <= 0 {
7376 return 0;
7377 }
7378
7379 let k_usize = usize::try_from(k).expect("k must be non-negative");
7380 buckets[..k_usize].fill(0);
7381
7382 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
7383 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
7384 let mut count = 0usize;
7385 for i in start..start + size {
7386 let mut p = sa[i];
7387 sa[i] = p ^ SAINT_MIN;
7388 if p > 0 {
7389 p -= 1;
7390 let p_usize = usize::try_from(p).expect("suffix index must be non-negative");
7391 let symbol = t[p_usize] as usize;
7392 buckets[symbol] += 1;
7393 cache[count].symbol = symbol as SaSint;
7394 cache[count].index = p
7395 | ((usize::from(t[p_usize - usize::from(p > 0)] < t[p_usize]) as SaSint)
7396 << (SAINT_BIT - 1));
7397 count += 1;
7398 }
7399 }
7400
7401 count as FastSint
7402}
7403
7404#[doc(hidden)]
7406pub fn final_order_scan_left_to_right_8u_block_place(
7407 sa: &mut [SaSint],
7408 buckets: &mut [SaSint],
7409 cache: &[ThreadCache],
7410 count: FastSint,
7411) {
7412 if count <= 0 {
7413 return;
7414 }
7415
7416 let count_usize = usize::try_from(count).expect("count must be non-negative");
7417 for entry in &cache[..count_usize] {
7418 let symbol = usize::try_from(entry.symbol).expect("cache symbol must be non-negative");
7419 let slot = usize::try_from(buckets[symbol]).expect("bucket slot must be non-negative");
7420 sa[slot] = entry.index;
7421 buckets[symbol] += 1;
7422 }
7423}
7424
7425#[doc(hidden)]
7427pub fn final_bwt_aux_scan_left_to_right_8u_block_place(
7428 sa: &mut [SaSint],
7429 rm: SaSint,
7430 i_out: &mut [SaSint],
7431 buckets: &mut [SaSint],
7432 cache: &[ThreadCache],
7433 count: FastSint,
7434) {
7435 if count <= 0 {
7436 return;
7437 }
7438
7439 let count_usize = usize::try_from(count).expect("count must be non-negative");
7440 for entry in &cache[..count_usize] {
7441 let symbol = usize::try_from(entry.symbol).expect("cache symbol must be non-negative");
7442 let slot = usize::try_from(buckets[symbol]).expect("bucket slot must be non-negative");
7443 sa[slot] = entry.index;
7444 buckets[symbol] += 1;
7445 if (entry.index & rm) == 0 {
7446 let sample_index = usize::try_from((entry.index & SAINT_MAX) / (rm + 1))
7447 .expect("sample index must be non-negative");
7448 i_out[sample_index] = buckets[symbol];
7449 }
7450 }
7451}
7452
7453#[doc(hidden)]
7455pub fn final_sorting_scan_left_to_right_32s_block_gather(
7456 t: &[SaSint],
7457 sa: &mut [SaSint],
7458 cache: &mut [ThreadCache],
7459 omp_block_start: FastSint,
7460 omp_block_size: FastSint,
7461) {
7462 if omp_block_size <= 0 {
7463 return;
7464 }
7465 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
7466 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
7467 for offset in 0..size {
7468 let i = start + offset;
7469 let mut symbol = SAINT_MIN;
7470 let mut p = sa[i];
7471 sa[i] = p ^ SAINT_MIN;
7472 if p > 0 {
7473 p -= 1;
7474 let p_usize = p as usize;
7475 cache[offset].index = p
7476 | ((usize::from(t[p_usize - usize::from(p > 0)] < t[p_usize]) as SaSint)
7477 << (SAINT_BIT - 1));
7478 symbol = t[p_usize];
7479 }
7480 cache[offset].symbol = symbol;
7481 }
7482}
7483
7484#[doc(hidden)]
7486pub fn final_sorting_scan_left_to_right_32s_block_sort(
7487 t: &[SaSint],
7488 induction_bucket: &mut [SaSint],
7489 cache: &mut [ThreadCache],
7490 omp_block_start: FastSint,
7491 omp_block_size: FastSint,
7492) {
7493 if omp_block_size <= 0 {
7494 return;
7495 }
7496 let prefetch_distance = 64usize;
7497 let start = omp_block_start as usize;
7498 let block_end = start + omp_block_size as usize;
7499 let mut i = start;
7500 let mut j = start + (omp_block_size as usize).saturating_sub(prefetch_distance + 1);
7501
7502 while i < j {
7503 let ci = i - start;
7504 let v0 = cache[ci].symbol;
7505 if v0 >= 0 {
7506 let bucket_index0 = v0 as usize;
7507 cache[ci].symbol = induction_bucket[bucket_index0];
7508 induction_bucket[bucket_index0] += 1;
7509 if cache[ci].symbol < block_end as SaSint {
7510 let ni = cache[ci].symbol as usize;
7511 let cni = ni - start;
7512 let mut np = cache[ci].index;
7513 cache[ci].index = np ^ SAINT_MIN;
7514 if np > 0 {
7515 np -= 1;
7516 let np_usize = np as usize;
7517 cache[cni].index = np
7518 | ((usize::from(t[np_usize - usize::from(np > 0)] < t[np_usize])
7519 as SaSint)
7520 << (SAINT_BIT - 1));
7521 cache[cni].symbol = t[np_usize];
7522 }
7523 }
7524 }
7525
7526 let i1 = i + 1;
7527 let ci1 = i1 - start;
7528 let v1 = cache[ci1].symbol;
7529 if v1 >= 0 {
7530 let bucket_index1 = v1 as usize;
7531 cache[ci1].symbol = induction_bucket[bucket_index1];
7532 induction_bucket[bucket_index1] += 1;
7533 if cache[ci1].symbol < block_end as SaSint {
7534 let ni = cache[ci1].symbol as usize;
7535 let cni = ni - start;
7536 let mut np = cache[ci1].index;
7537 cache[ci1].index = np ^ SAINT_MIN;
7538 if np > 0 {
7539 np -= 1;
7540 let np_usize = np as usize;
7541 cache[cni].index = np
7542 | ((usize::from(t[np_usize - usize::from(np > 0)] < t[np_usize])
7543 as SaSint)
7544 << (SAINT_BIT - 1));
7545 cache[cni].symbol = t[np_usize];
7546 }
7547 }
7548 }
7549
7550 i += 2;
7551 }
7552
7553 j = block_end;
7554 while i < j {
7555 let ci = i - start;
7556 let v = cache[ci].symbol;
7557 if v >= 0 {
7558 let bucket_index = v as usize;
7559 cache[ci].symbol = induction_bucket[bucket_index];
7560 induction_bucket[bucket_index] += 1;
7561 if cache[ci].symbol < block_end as SaSint {
7562 let ni = cache[ci].symbol as usize;
7563 let cni = ni - start;
7564 let mut np = cache[ci].index;
7565 cache[ci].index = np ^ SAINT_MIN;
7566 if np > 0 {
7567 np -= 1;
7568 let np_usize = np as usize;
7569 cache[cni].index = np
7570 | ((usize::from(t[np_usize - usize::from(np > 0)] < t[np_usize])
7571 as SaSint)
7572 << (SAINT_BIT - 1));
7573 cache[cni].symbol = t[np_usize];
7574 }
7575 }
7576 }
7577 i += 1;
7578 }
7579}
7580
7581#[doc(hidden)]
7583pub fn final_bwt_scan_left_to_right_8u_block_omp(
7584 t: &[u8],
7585 sa: &mut [SaSint],
7586 k: SaSint,
7587 induction_bucket: &mut [SaSint],
7588 block_start: FastSint,
7589 block_size: FastSint,
7590 threads: SaSint,
7591 thread_state: &mut [ThreadState],
7592) {
7593 if block_size <= 0 {
7594 return;
7595 }
7596
7597 let k_usize = usize::try_from(k).expect("k must be non-negative");
7598 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
7599 let omp_num_threads = if threads > 1 && block_size_usize >= 64 * k_usize.max(256) {
7600 usize::try_from(threads)
7601 .expect("threads must be non-negative")
7602 .min(thread_state.len())
7603 .max(1)
7604 } else {
7605 1
7606 };
7607
7608 if omp_num_threads == 1 {
7609 final_bwt_scan_left_to_right_8u(t, sa, induction_bucket, block_start, block_size);
7610 return;
7611 }
7612
7613 let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
7614 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
7615 for (thread_num, state) in thread_state.iter_mut().take(omp_num_threads).enumerate() {
7616 let relative_start = thread_num * omp_block_stride;
7617 let size = if thread_num + 1 < omp_num_threads {
7618 omp_block_stride
7619 } else {
7620 block_size_usize - relative_start
7621 };
7622 state.count = final_bwt_scan_left_to_right_8u_block_prepare(
7623 t,
7624 sa,
7625 k,
7626 &mut state.buckets,
7627 &mut state.cache,
7628 (block_start_usize + relative_start) as FastSint,
7629 size as FastSint,
7630 );
7631 }
7632
7633 for state in thread_state.iter_mut().take(omp_num_threads) {
7634 for (c, bucket) in induction_bucket.iter_mut().take(k_usize).enumerate() {
7635 let a = *bucket;
7636 let b = state.buckets[c];
7637 *bucket = a + b;
7638 state.buckets[c] = a;
7639 }
7640 }
7641
7642 for state in thread_state.iter_mut().take(omp_num_threads) {
7643 final_order_scan_left_to_right_8u_block_place(
7644 sa,
7645 &mut state.buckets,
7646 &state.cache,
7647 state.count,
7648 );
7649 }
7650}
7651
7652#[doc(hidden)]
7654pub fn final_bwt_aux_scan_left_to_right_8u_block_omp(
7655 t: &[u8],
7656 sa: &mut [SaSint],
7657 k: SaSint,
7658 rm: SaSint,
7659 i_out: &mut [SaSint],
7660 induction_bucket: &mut [SaSint],
7661 block_start: FastSint,
7662 block_size: FastSint,
7663 threads: SaSint,
7664 thread_state: &mut [ThreadState],
7665) {
7666 if block_size <= 0 {
7667 return;
7668 }
7669
7670 let k_usize = usize::try_from(k).expect("k must be non-negative");
7671 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
7672 let omp_num_threads = if threads > 1 && block_size_usize >= 64 * k_usize.max(256) {
7673 usize::try_from(threads)
7674 .expect("threads must be non-negative")
7675 .min(thread_state.len())
7676 .max(1)
7677 } else {
7678 1
7679 };
7680
7681 if omp_num_threads == 1 {
7682 final_bwt_aux_scan_left_to_right_8u(
7683 t,
7684 sa,
7685 rm,
7686 i_out,
7687 induction_bucket,
7688 block_start,
7689 block_size,
7690 );
7691 return;
7692 }
7693
7694 let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
7695 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
7696 for (thread_num, state) in thread_state.iter_mut().take(omp_num_threads).enumerate() {
7697 let relative_start = thread_num * omp_block_stride;
7698 let size = if thread_num + 1 < omp_num_threads {
7699 omp_block_stride
7700 } else {
7701 block_size_usize - relative_start
7702 };
7703 state.count = final_bwt_scan_left_to_right_8u_block_prepare(
7704 t,
7705 sa,
7706 k,
7707 &mut state.buckets,
7708 &mut state.cache,
7709 (block_start_usize + relative_start) as FastSint,
7710 size as FastSint,
7711 );
7712 }
7713
7714 for state in thread_state.iter_mut().take(omp_num_threads) {
7715 for (c, bucket) in induction_bucket.iter_mut().take(k_usize).enumerate() {
7716 let a = *bucket;
7717 let b = state.buckets[c];
7718 *bucket = a + b;
7719 state.buckets[c] = a;
7720 }
7721 }
7722
7723 for state in thread_state.iter_mut().take(omp_num_threads) {
7724 final_bwt_aux_scan_left_to_right_8u_block_place(
7725 sa,
7726 rm,
7727 i_out,
7728 &mut state.buckets,
7729 &state.cache,
7730 state.count,
7731 );
7732 }
7733}
7734
7735#[doc(hidden)]
7737pub fn final_sorting_scan_left_to_right_8u_block_omp(
7738 t: &[u8],
7739 sa: &mut [SaSint],
7740 k: SaSint,
7741 induction_bucket: &mut [SaSint],
7742 block_start: FastSint,
7743 block_size: FastSint,
7744 threads: SaSint,
7745 thread_state: &mut [ThreadState],
7746) {
7747 if block_size <= 0 {
7748 return;
7749 }
7750
7751 let k_usize = usize::try_from(k).expect("k must be non-negative");
7752 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
7753 let omp_num_threads = if threads > 1 && block_size_usize >= 64 * k_usize.max(256) {
7754 usize::try_from(threads)
7755 .expect("threads must be non-negative")
7756 .min(thread_state.len())
7757 .max(1)
7758 } else {
7759 1
7760 };
7761
7762 if omp_num_threads == 1 {
7763 final_sorting_scan_left_to_right_8u(t, sa, induction_bucket, block_start, block_size);
7764 return;
7765 }
7766
7767 let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
7768 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
7769 for (thread_num, state) in thread_state.iter_mut().take(omp_num_threads).enumerate() {
7770 let relative_start = thread_num * omp_block_stride;
7771 let size = if thread_num + 1 < omp_num_threads {
7772 omp_block_stride
7773 } else {
7774 block_size_usize - relative_start
7775 };
7776 state.count = final_sorting_scan_left_to_right_8u_block_prepare(
7777 t,
7778 sa,
7779 k,
7780 &mut state.buckets,
7781 &mut state.cache,
7782 (block_start_usize + relative_start) as FastSint,
7783 size as FastSint,
7784 );
7785 }
7786
7787 for state in thread_state.iter_mut().take(omp_num_threads) {
7788 for (c, bucket) in induction_bucket.iter_mut().take(k_usize).enumerate() {
7789 let a = *bucket;
7790 let b = state.buckets[c];
7791 *bucket = a + b;
7792 state.buckets[c] = a;
7793 }
7794 }
7795
7796 for state in thread_state.iter_mut().take(omp_num_threads) {
7797 final_order_scan_left_to_right_8u_block_place(
7798 sa,
7799 &mut state.buckets,
7800 &state.cache,
7801 state.count,
7802 );
7803 }
7804}
7805
7806#[doc(hidden)]
7808pub fn final_sorting_scan_left_to_right_32s_block_omp(
7809 t: &[SaSint],
7810 sa: &mut [SaSint],
7811 buckets: &mut [SaSint],
7812 cache: &mut [ThreadCache],
7813 block_start: FastSint,
7814 block_size: FastSint,
7815 threads: SaSint,
7816) {
7817 if threads <= 1 || block_size < 16_384 {
7818 final_sorting_scan_left_to_right_32s(t, sa, buckets, block_start, block_size);
7819 return;
7820 }
7821
7822 final_sorting_scan_left_to_right_32s_block_gather(t, sa, cache, block_start, block_size);
7823 final_sorting_scan_left_to_right_32s_block_sort(t, buckets, cache, block_start, block_size);
7824 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
7825 let threads_usize = usize::try_from(threads.max(1)).expect("threads must be positive");
7826 let omp_num_threads = threads_usize.min(block_size_usize);
7827 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
7828 for omp_thread_num in 0..omp_num_threads {
7829 let omp_block_start = omp_thread_num * omp_block_stride;
7830 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
7831 omp_block_stride
7832 } else {
7833 block_size_usize - omp_block_start
7834 };
7835 compact_and_place_cached_suffixes(
7836 sa,
7837 cache,
7838 omp_block_start as FastSint,
7839 omp_block_size as FastSint,
7840 );
7841 }
7842}
7843
7844#[doc(hidden)]
7846pub fn final_bwt_scan_left_to_right_8u_omp(
7847 t: &[u8],
7848 sa: &mut [SaSint],
7849 n: FastSint,
7850 k: SaSint,
7851 induction_bucket: &mut [SaSint],
7852 threads: SaSint,
7853 thread_state: &mut [ThreadState],
7854) {
7855 let n_usize = usize::try_from(n).expect("n must be non-negative");
7856 let last = n_usize - 1;
7857 let bucket = t[last] as usize;
7858 let slot = usize::try_from(induction_bucket[bucket]).expect("bucket slot must be non-negative");
7859 sa[slot] =
7860 (n as SaSint - 1) | ((usize::from(t[last - 1] < t[last]) as SaSint) << (SAINT_BIT - 1));
7861 induction_bucket[bucket] += 1;
7862
7863 if threads == 1 || n < 65_536 {
7864 final_bwt_scan_left_to_right_8u(t, sa, induction_bucket, 0, n);
7865 return;
7866 }
7867
7868 let mut block_start = 0usize;
7869 while block_start < n_usize {
7870 if sa[block_start] == 0 {
7871 block_start += 1;
7872 } else {
7873 let threads_usize = usize::try_from(threads)
7874 .expect("threads must be non-negative")
7875 .min(thread_state.len())
7876 .max(1);
7877 let max_span = threads_usize * (LIBSAIS_PER_THREAD_CACHE_SIZE - 16 * threads_usize);
7878 let block_max_end = (block_start + max_span).min(n_usize);
7879 let mut block_end = block_start + 1;
7880 while block_end < block_max_end && sa[block_end] != 0 {
7881 block_end += 1;
7882 }
7883 let size = block_end - block_start;
7884
7885 if size < 32 {
7886 final_bwt_scan_left_to_right_8u(
7887 t,
7888 sa,
7889 induction_bucket,
7890 block_start as FastSint,
7891 size as FastSint,
7892 );
7893 } else {
7894 final_bwt_scan_left_to_right_8u_block_omp(
7895 t,
7896 sa,
7897 k,
7898 induction_bucket,
7899 block_start as FastSint,
7900 size as FastSint,
7901 threads,
7902 thread_state,
7903 );
7904 }
7905 block_start = block_end;
7906 }
7907 }
7908}
7909
7910#[doc(hidden)]
7912pub fn final_bwt_aux_scan_left_to_right_8u_omp(
7913 t: &[u8],
7914 sa: &mut [SaSint],
7915 n: FastSint,
7916 k: SaSint,
7917 rm: SaSint,
7918 i_out: &mut [SaSint],
7919 induction_bucket: &mut [SaSint],
7920 threads: SaSint,
7921 thread_state: &mut [ThreadState],
7922) {
7923 let n_usize = usize::try_from(n).expect("n must be non-negative");
7924 let last = n_usize - 1;
7925 let bucket = t[last] as usize;
7926 let slot = usize::try_from(induction_bucket[bucket]).expect("bucket slot must be non-negative");
7927 sa[slot] =
7928 (n as SaSint - 1) | ((usize::from(t[last - 1] < t[last]) as SaSint) << (SAINT_BIT - 1));
7929 induction_bucket[bucket] += 1;
7930 if (((n as SaSint) - 1) & rm) == 0 {
7931 i_out[last / usize::try_from(rm + 1).expect("rm must allow positive step")] =
7932 induction_bucket[bucket];
7933 }
7934
7935 if threads == 1 || n < 65_536 {
7936 final_bwt_aux_scan_left_to_right_8u(t, sa, rm, i_out, induction_bucket, 0, n);
7937 return;
7938 }
7939
7940 let mut block_start = 0usize;
7941 while block_start < n_usize {
7942 if sa[block_start] == 0 {
7943 block_start += 1;
7944 } else {
7945 let threads_usize = usize::try_from(threads)
7946 .expect("threads must be non-negative")
7947 .min(thread_state.len())
7948 .max(1);
7949 let max_span = threads_usize * (LIBSAIS_PER_THREAD_CACHE_SIZE - 16 * threads_usize);
7950 let block_max_end = (block_start + max_span).min(n_usize);
7951 let mut block_end = block_start + 1;
7952 while block_end < block_max_end && sa[block_end] != 0 {
7953 block_end += 1;
7954 }
7955 let size = block_end - block_start;
7956
7957 if size < 32 {
7958 final_bwt_aux_scan_left_to_right_8u(
7959 t,
7960 sa,
7961 rm,
7962 i_out,
7963 induction_bucket,
7964 block_start as FastSint,
7965 size as FastSint,
7966 );
7967 } else {
7968 final_bwt_aux_scan_left_to_right_8u_block_omp(
7969 t,
7970 sa,
7971 k,
7972 rm,
7973 i_out,
7974 induction_bucket,
7975 block_start as FastSint,
7976 size as FastSint,
7977 threads,
7978 thread_state,
7979 );
7980 }
7981 block_start = block_end;
7982 }
7983 }
7984}
7985
7986#[doc(hidden)]
7988pub fn final_sorting_scan_left_to_right_8u_omp(
7989 t: &[u8],
7990 sa: &mut [SaSint],
7991 n: FastSint,
7992 k: SaSint,
7993 induction_bucket: &mut [SaSint],
7994 threads: SaSint,
7995 thread_state: &mut [ThreadState],
7996) {
7997 let n_usize = usize::try_from(n).expect("n must be non-negative");
7998 let last = n_usize - 1;
7999 let bucket = t[last] as usize;
8000 let slot = usize::try_from(induction_bucket[bucket]).expect("bucket slot must be non-negative");
8001 sa[slot] =
8002 (n as SaSint - 1) | ((usize::from(t[last - 1] < t[last]) as SaSint) << (SAINT_BIT - 1));
8003 induction_bucket[bucket] += 1;
8004
8005 if threads == 1 || n < 65_536 {
8006 final_sorting_scan_left_to_right_8u(t, sa, induction_bucket, 0, n);
8007 return;
8008 }
8009
8010 let mut block_start = 0usize;
8011 while block_start < n_usize {
8012 if sa[block_start] == 0 {
8013 block_start += 1;
8014 } else {
8015 let threads_usize = usize::try_from(threads)
8016 .expect("threads must be non-negative")
8017 .min(thread_state.len())
8018 .max(1);
8019 let max_span = threads_usize * (LIBSAIS_PER_THREAD_CACHE_SIZE - 16 * threads_usize);
8020 let block_max_end = (block_start + max_span).min(n_usize);
8021 let mut block_end = block_start + 1;
8022 while block_end < block_max_end && sa[block_end] != 0 {
8023 block_end += 1;
8024 }
8025 let size = block_end - block_start;
8026
8027 if size < 32 {
8028 final_sorting_scan_left_to_right_8u(
8029 t,
8030 sa,
8031 induction_bucket,
8032 block_start as FastSint,
8033 size as FastSint,
8034 );
8035 } else {
8036 final_sorting_scan_left_to_right_8u_block_omp(
8037 t,
8038 sa,
8039 k,
8040 induction_bucket,
8041 block_start as FastSint,
8042 size as FastSint,
8043 threads,
8044 thread_state,
8045 );
8046 }
8047 block_start = block_end;
8048 }
8049 }
8050}
8051
8052#[doc(hidden)]
8054pub fn final_sorting_scan_left_to_right_32s_omp(
8055 t: &[SaSint],
8056 sa: &mut [SaSint],
8057 n: SaSint,
8058 induction_bucket: &mut [SaSint],
8059 threads: SaSint,
8060 thread_state: &mut [ThreadState],
8061) {
8062 let n_usize = usize::try_from(n).expect("n must be non-negative");
8063 let last = n_usize - 1;
8064 let bucket = usize::try_from(t[last]).expect("bucket symbol must be non-negative");
8065 let slot = usize::try_from(induction_bucket[bucket]).expect("bucket slot must be non-negative");
8066 sa[slot] = (n - 1) | ((usize::from(t[last - 1] < t[last]) as SaSint) << (SAINT_BIT - 1));
8067 induction_bucket[bucket] += 1;
8068
8069 if threads == 1 || n < 65_536 {
8070 final_sorting_scan_left_to_right_32s(t, sa, induction_bucket, 0, n as FastSint);
8071 return;
8072 }
8073
8074 if thread_state.is_empty() {
8075 final_sorting_scan_left_to_right_32s(t, sa, induction_bucket, 0, n as FastSint);
8076 return;
8077 }
8078
8079 let threads_usize = usize::try_from(threads)
8080 .expect("threads must be non-negative")
8081 .max(1);
8082 let mut cache = vec![ThreadCache::default(); threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE];
8083 let mut block_start = 0usize;
8084 while block_start < n_usize {
8085 let block_end = (block_start + threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE).min(n_usize);
8086 final_sorting_scan_left_to_right_32s_block_omp(
8087 t,
8088 sa,
8089 induction_bucket,
8090 &mut cache,
8091 block_start as FastSint,
8092 (block_end - block_start) as FastSint,
8093 threads,
8094 );
8095 block_start = block_end;
8096 }
8097}
8098
8099#[doc(hidden)]
8101pub fn final_bwt_scan_right_to_left_8u(
8102 t: &[u8],
8103 sa: &mut [SaSint],
8104 induction_bucket: &mut [SaSint],
8105 omp_block_start: FastSint,
8106 omp_block_size: FastSint,
8107) -> SaSint {
8108 if omp_block_size <= 0 {
8109 return -1;
8110 }
8111
8112 let mut index = -1;
8113
8114 let start =
8115 usize::try_from(omp_block_start).expect("omp_block_start must be non-negative") as FastSint;
8116 let mut i = omp_block_start + omp_block_size - 1;
8117 let mut j = start + 1;
8118 while i >= j {
8119 let i0 = usize::try_from(i).expect("loop index must be non-negative");
8120 let i1 = usize::try_from(i - 1).expect("loop index must be non-negative");
8121
8122 let mut p0 = sa[i0];
8123 if p0 == 0 {
8124 index = i0 as SaSint;
8125 }
8126 sa[i0] = p0 & SAINT_MAX;
8127 if p0 > 0 {
8128 p0 -= 1;
8129 let p0_usize = usize::try_from(p0).expect("suffix index must be non-negative");
8130 let c0 = t[p0_usize - usize::from(p0 > 0)] as SaSint;
8131 let c1 = t[p0_usize] as SaSint;
8132 sa[i0] = c1;
8133 induction_bucket[c1 as usize] -= 1;
8134 let slot = usize::try_from(induction_bucket[c1 as usize])
8135 .expect("bucket slot must be non-negative");
8136 let marked = c0 | SAINT_MIN;
8137 sa[slot] = if c0 <= c1 { p0 } else { marked };
8138 }
8139
8140 let mut p1 = sa[i1];
8141 if p1 == 0 {
8142 index = i1 as SaSint;
8143 }
8144 sa[i1] = p1 & SAINT_MAX;
8145 if p1 > 0 {
8146 p1 -= 1;
8147 let p1_usize = usize::try_from(p1).expect("suffix index must be non-negative");
8148 let c0 = t[p1_usize - usize::from(p1 > 0)] as SaSint;
8149 let c1 = t[p1_usize] as SaSint;
8150 sa[i1] = c1;
8151 induction_bucket[c1 as usize] -= 1;
8152 let slot = usize::try_from(induction_bucket[c1 as usize])
8153 .expect("bucket slot must be non-negative");
8154 let marked = c0 | SAINT_MIN;
8155 sa[slot] = if c0 <= c1 { p1 } else { marked };
8156 }
8157
8158 i -= 2;
8159 }
8160
8161 j -= 1;
8162 while i >= j {
8163 let idx = usize::try_from(i).expect("loop index must be non-negative");
8164 let mut p = sa[idx];
8165 if p == 0 {
8166 index = idx as SaSint;
8167 }
8168 sa[idx] = p & SAINT_MAX;
8169 if p > 0 {
8170 p -= 1;
8171 let p_usize = usize::try_from(p).expect("suffix index must be non-negative");
8172 let c0 = t[p_usize - usize::from(p > 0)] as SaSint;
8173 let c1 = t[p_usize] as SaSint;
8174 sa[idx] = c1;
8175 induction_bucket[c1 as usize] -= 1;
8176 let slot = usize::try_from(induction_bucket[c1 as usize])
8177 .expect("bucket slot must be non-negative");
8178 let marked = c0 | SAINT_MIN;
8179 sa[slot] = if c0 <= c1 { p } else { marked };
8180 }
8181
8182 i -= 1;
8183 }
8184
8185 index
8186}
8187
8188#[doc(hidden)]
8190pub fn final_bwt_aux_scan_right_to_left_8u(
8191 t: &[u8],
8192 sa: &mut [SaSint],
8193 rm: SaSint,
8194 i_out: &mut [SaSint],
8195 induction_bucket: &mut [SaSint],
8196 omp_block_start: FastSint,
8197 omp_block_size: FastSint,
8198) {
8199 if omp_block_size <= 0 {
8200 return;
8201 }
8202
8203 let start =
8204 usize::try_from(omp_block_start).expect("omp_block_start must be non-negative") as FastSint;
8205 let mut i = omp_block_start + omp_block_size - 1;
8206 let mut j = start + 1;
8207 while i >= j {
8208 let i0 = usize::try_from(i).expect("loop index must be non-negative");
8209 let i1 = usize::try_from(i - 1).expect("loop index must be non-negative");
8210
8211 let mut p0 = sa[i0];
8212 sa[i0] = p0 & SAINT_MAX;
8213 if p0 > 0 {
8214 p0 -= 1;
8215 let p0_usize = usize::try_from(p0).expect("suffix index must be non-negative");
8216 let c0 = t[p0_usize - usize::from(p0 > 0)] as SaSint;
8217 let c1 = t[p0_usize] as SaSint;
8218 sa[i0] = c1;
8219 induction_bucket[c1 as usize] -= 1;
8220 let slot = usize::try_from(induction_bucket[c1 as usize])
8221 .expect("bucket slot must be non-negative");
8222 let marked = c0 | SAINT_MIN;
8223 sa[slot] = if c0 <= c1 { p0 } else { marked };
8224 if (p0 & rm) == 0 {
8225 let out_idx =
8226 usize::try_from(p0 / (rm + 1)).expect("sample index must be non-negative");
8227 i_out[out_idx] = induction_bucket[t[p0_usize] as usize] + 1;
8228 }
8229 }
8230
8231 let mut p1 = sa[i1];
8232 sa[i1] = p1 & SAINT_MAX;
8233 if p1 > 0 {
8234 p1 -= 1;
8235 let p1_usize = usize::try_from(p1).expect("suffix index must be non-negative");
8236 let c0 = t[p1_usize - usize::from(p1 > 0)] as SaSint;
8237 let c1 = t[p1_usize] as SaSint;
8238 sa[i1] = c1;
8239 induction_bucket[c1 as usize] -= 1;
8240 let slot = usize::try_from(induction_bucket[c1 as usize])
8241 .expect("bucket slot must be non-negative");
8242 let marked = c0 | SAINT_MIN;
8243 sa[slot] = if c0 <= c1 { p1 } else { marked };
8244 if (p1 & rm) == 0 {
8245 let out_idx =
8246 usize::try_from(p1 / (rm + 1)).expect("sample index must be non-negative");
8247 i_out[out_idx] = induction_bucket[t[p1_usize] as usize] + 1;
8248 }
8249 }
8250
8251 i -= 2;
8252 }
8253
8254 j -= 1;
8255 while i >= j {
8256 let idx = usize::try_from(i).expect("loop index must be non-negative");
8257 let mut p = sa[idx];
8258 sa[idx] = p & SAINT_MAX;
8259 if p > 0 {
8260 p -= 1;
8261 let p_usize = usize::try_from(p).expect("suffix index must be non-negative");
8262 let c0 = t[p_usize - usize::from(p > 0)] as SaSint;
8263 let c1 = t[p_usize] as SaSint;
8264 sa[idx] = c1;
8265 induction_bucket[c1 as usize] -= 1;
8266 let slot = usize::try_from(induction_bucket[c1 as usize])
8267 .expect("bucket slot must be non-negative");
8268 let marked = c0 | SAINT_MIN;
8269 sa[slot] = if c0 <= c1 { p } else { marked };
8270 if (p & rm) == 0 {
8271 let out_idx =
8272 usize::try_from(p / (rm + 1)).expect("sample index must be non-negative");
8273 i_out[out_idx] = induction_bucket[t[p_usize] as usize] + 1;
8274 }
8275 }
8276
8277 i -= 1;
8278 }
8279}
8280
8281#[doc(hidden)]
8283pub fn final_sorting_scan_right_to_left_8u(
8284 t: &[u8],
8285 sa: &mut [SaSint],
8286 induction_bucket: &mut [SaSint],
8287 omp_block_start: FastSint,
8288 omp_block_size: FastSint,
8289) {
8290 if omp_block_size <= 0 {
8291 return;
8292 }
8293
8294 let prefetch_distance = 64usize;
8295 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
8296 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
8297 let mut i = start + size - 1;
8298 let mut j = start + prefetch_distance + 1;
8299
8300 while i >= j {
8301 let mut p0 = sa[i];
8302 sa[i] = p0 & SAINT_MAX;
8303 if p0 > 0 {
8304 p0 -= 1;
8305 let p0_usize = p0 as usize;
8306 let bucket0 = t[p0_usize] as usize;
8307 induction_bucket[bucket0] -= 1;
8308 let slot0 = induction_bucket[bucket0] as usize;
8309 sa[slot0] = p0
8310 | ((usize::from(t[p0_usize - usize::from(p0 > 0)] > t[p0_usize]) as SaSint)
8311 << (SAINT_BIT - 1));
8312 }
8313
8314 let mut p1 = sa[i - 1];
8315 sa[i - 1] = p1 & SAINT_MAX;
8316 if p1 > 0 {
8317 p1 -= 1;
8318 let p1_usize = p1 as usize;
8319 let bucket1 = t[p1_usize] as usize;
8320 induction_bucket[bucket1] -= 1;
8321 let slot1 = induction_bucket[bucket1] as usize;
8322 sa[slot1] = p1
8323 | ((usize::from(t[p1_usize - usize::from(p1 > 0)] > t[p1_usize]) as SaSint)
8324 << (SAINT_BIT - 1));
8325 }
8326
8327 i -= 2;
8328 }
8329
8330 j -= prefetch_distance + 1;
8331 while i >= j {
8332 let mut p = sa[i];
8333 sa[i] = p & SAINT_MAX;
8334 if p > 0 {
8335 p -= 1;
8336 let p_usize = p as usize;
8337 let bucket = t[p_usize] as usize;
8338 induction_bucket[bucket] -= 1;
8339 let slot = induction_bucket[bucket] as usize;
8340 sa[slot] = p
8341 | ((usize::from(t[p_usize - usize::from(p > 0)] > t[p_usize]) as SaSint)
8342 << (SAINT_BIT - 1));
8343 }
8344
8345 if i == 0 {
8346 break;
8347 }
8348 i -= 1;
8349 }
8350}
8351
8352#[doc(hidden)]
8354pub fn final_gsa_scan_right_to_left_8u(
8355 t: &[u8],
8356 sa: &mut [SaSint],
8357 induction_bucket: &mut [SaSint],
8358 omp_block_start: FastSint,
8359 omp_block_size: FastSint,
8360) {
8361 if omp_block_size <= 0 {
8362 return;
8363 }
8364
8365 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
8366 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
8367 let mut i = start + size;
8368 while i > start {
8369 i -= 1;
8370 let mut p = sa[i];
8371 sa[i] = p & SAINT_MAX;
8372 if p > 0 {
8373 let p_usize = usize::try_from(p).expect("suffix index must be non-negative");
8374 if t[p_usize - 1] > 0 {
8375 p -= 1;
8376 let bucket =
8377 t[usize::try_from(p).expect("suffix index must be non-negative")] as usize;
8378 induction_bucket[bucket] -= 1;
8379 let slot = usize::try_from(induction_bucket[bucket])
8380 .expect("bucket slot must be non-negative");
8381 let p_usize = usize::try_from(p).expect("suffix index must be non-negative");
8382 sa[slot] = p
8383 | ((usize::from(t[p_usize - usize::from(p > 0)] > t[p_usize]) as SaSint)
8384 << (SAINT_BIT - 1));
8385 }
8386 }
8387 }
8388}
8389
8390#[doc(hidden)]
8392pub fn final_sorting_scan_right_to_left_32s(
8393 t: &[SaSint],
8394 sa: &mut [SaSint],
8395 induction_bucket: &mut [SaSint],
8396 omp_block_start: FastSint,
8397 omp_block_size: FastSint,
8398) {
8399 if omp_block_size <= 0 {
8400 return;
8401 }
8402
8403 let prefetch_distance: FastSint = 64;
8404 let mut i = omp_block_start + omp_block_size - 1;
8405 let mut j = omp_block_start + 2 * prefetch_distance + 1;
8406
8407 while i >= j {
8408 let i0 = i as usize;
8409 let mut p0 = sa[i0];
8410 sa[i0] = p0 & SAINT_MAX;
8411 if p0 > 0 {
8412 p0 -= 1;
8413 let p0u = p0 as usize;
8414 let bucket0 = t[p0u] as usize;
8415 induction_bucket[bucket0] -= 1;
8416 let slot0 = induction_bucket[bucket0] as usize;
8417 sa[slot0] = p0
8418 | ((usize::from(t[p0u - usize::from(p0 > 0)] > t[p0u]) as SaSint)
8419 << (SAINT_BIT - 1));
8420 }
8421
8422 let i1 = (i - 1) as usize;
8423 let mut p1 = sa[i1];
8424 sa[i1] = p1 & SAINT_MAX;
8425 if p1 > 0 {
8426 p1 -= 1;
8427 let p1u = p1 as usize;
8428 let bucket1 = t[p1u] as usize;
8429 induction_bucket[bucket1] -= 1;
8430 let slot1 = induction_bucket[bucket1] as usize;
8431 sa[slot1] = p1
8432 | ((usize::from(t[p1u - usize::from(p1 > 0)] > t[p1u]) as SaSint)
8433 << (SAINT_BIT - 1));
8434 }
8435 i -= 2;
8436 }
8437
8438 j -= 2 * prefetch_distance + 1;
8439 while i >= j {
8440 let iu = i as usize;
8441 let mut p = sa[iu];
8442 sa[iu] = p & SAINT_MAX;
8443 if p > 0 {
8444 p -= 1;
8445 let pu = p as usize;
8446 let bucket = t[pu] as usize;
8447 induction_bucket[bucket] -= 1;
8448 let slot = induction_bucket[bucket] as usize;
8449 sa[slot] = p
8450 | ((usize::from(t[pu - usize::from(p > 0)] > t[pu]) as SaSint) << (SAINT_BIT - 1));
8451 }
8452 i -= 1;
8453 }
8454}
8455
8456#[doc(hidden)]
8458pub fn final_bwt_scan_right_to_left_8u_block_prepare(
8459 t: &[u8],
8460 sa: &mut [SaSint],
8461 k: SaSint,
8462 buckets: &mut [SaSint],
8463 cache: &mut [ThreadCache],
8464 omp_block_start: FastSint,
8465 omp_block_size: FastSint,
8466) -> FastSint {
8467 if omp_block_size <= 0 {
8468 return 0;
8469 }
8470 let k_usize = usize::try_from(k).expect("k must be non-negative");
8471 buckets[..k_usize].fill(0);
8472 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
8473 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
8474 let mut count = 0usize;
8475 let mut i = start + size;
8476 while i > start {
8477 i -= 1;
8478 let mut p = sa[i];
8479 sa[i] = p & SAINT_MAX;
8480 if p > 0 {
8481 p -= 1;
8482 let p_usize = usize::try_from(p).expect("suffix index must be non-negative");
8483 let c0 = t[p_usize - usize::from(p > 0)] as SaSint;
8484 let c1 = t[p_usize] as SaSint;
8485 sa[i] = c1;
8486 buckets[c1 as usize] += 1;
8487 cache[count].symbol = c1;
8488 cache[count].index = if c0 <= c1 { p } else { c0 | SAINT_MIN };
8489 count += 1;
8490 }
8491 }
8492 count as FastSint
8493}
8494
8495#[doc(hidden)]
8497pub fn final_bwt_aux_scan_right_to_left_8u_block_prepare(
8498 t: &[u8],
8499 sa: &mut [SaSint],
8500 k: SaSint,
8501 buckets: &mut [SaSint],
8502 cache: &mut [ThreadCache],
8503 omp_block_start: FastSint,
8504 omp_block_size: FastSint,
8505) -> FastSint {
8506 if omp_block_size <= 0 {
8507 return 0;
8508 }
8509 let k_usize = usize::try_from(k).expect("k must be non-negative");
8510 buckets[..k_usize].fill(0);
8511 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
8512 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
8513 let mut count = 0usize;
8514 let mut i = start + size;
8515 while i > start {
8516 i -= 1;
8517 let mut p = sa[i];
8518 sa[i] = p & SAINT_MAX;
8519 if p > 0 {
8520 p -= 1;
8521 let p_usize = usize::try_from(p).expect("suffix index must be non-negative");
8522 let c0 = t[p_usize - usize::from(p > 0)] as SaSint;
8523 let c1 = t[p_usize] as SaSint;
8524 sa[i] = c1;
8525 buckets[c1 as usize] += 1;
8526 cache[count].symbol = c1;
8527 cache[count].index = if c0 <= c1 { p } else { c0 | SAINT_MIN };
8528 cache[count + 1].index = p;
8529 count += 2;
8530 }
8531 }
8532 count as FastSint
8533}
8534
8535#[doc(hidden)]
8537pub fn final_sorting_scan_right_to_left_8u_block_prepare(
8538 t: &[u8],
8539 sa: &mut [SaSint],
8540 k: SaSint,
8541 buckets: &mut [SaSint],
8542 cache: &mut [ThreadCache],
8543 omp_block_start: FastSint,
8544 omp_block_size: FastSint,
8545) -> FastSint {
8546 if omp_block_size <= 0 {
8547 return 0;
8548 }
8549
8550 let k_usize = usize::try_from(k).expect("k must be non-negative");
8551 buckets[..k_usize].fill(0);
8552
8553 let start =
8554 usize::try_from(omp_block_start).expect("omp_block_start must be non-negative") as FastSint;
8555 let mut i = omp_block_start + omp_block_size - 1;
8556 let mut j = start + 1;
8557 let mut count = 0usize;
8558
8559 while i >= j {
8560 let i0 = usize::try_from(i).expect("loop index must be non-negative");
8561 let i1 = usize::try_from(i - 1).expect("loop index must be non-negative");
8562
8563 let mut p0 = sa[i0];
8564 sa[i0] = p0 & SAINT_MAX;
8565 if p0 > 0 {
8566 p0 -= 1;
8567 let p0_usize = usize::try_from(p0).expect("suffix index must be non-negative");
8568 let c0 = t[p0_usize] as SaSint;
8569 buckets[c0 as usize] += 1;
8570 cache[count].symbol = c0;
8571 cache[count].index = p0
8572 | ((usize::from(t[p0_usize - usize::from(p0 > 0)] > t[p0_usize]) as SaSint)
8573 << (SAINT_BIT - 1));
8574 count += 1;
8575 }
8576
8577 let mut p1 = sa[i1];
8578 sa[i1] = p1 & SAINT_MAX;
8579 if p1 > 0 {
8580 p1 -= 1;
8581 let p1_usize = usize::try_from(p1).expect("suffix index must be non-negative");
8582 let c1 = t[p1_usize] as SaSint;
8583 buckets[c1 as usize] += 1;
8584 cache[count].symbol = c1;
8585 cache[count].index = p1
8586 | ((usize::from(t[p1_usize - usize::from(p1 > 0)] > t[p1_usize]) as SaSint)
8587 << (SAINT_BIT - 1));
8588 count += 1;
8589 }
8590
8591 i -= 2;
8592 }
8593
8594 j -= 1;
8595 while i >= j {
8596 let idx = usize::try_from(i).expect("loop index must be non-negative");
8597 let mut p = sa[idx];
8598 sa[idx] = p & SAINT_MAX;
8599 if p > 0 {
8600 p -= 1;
8601 let p_usize = usize::try_from(p).expect("suffix index must be non-negative");
8602 let c = t[p_usize] as SaSint;
8603 buckets[c as usize] += 1;
8604 cache[count].symbol = c;
8605 cache[count].index = p
8606 | ((usize::from(t[p_usize - usize::from(p > 0)] > t[p_usize]) as SaSint)
8607 << (SAINT_BIT - 1));
8608 count += 1;
8609 }
8610
8611 i -= 1;
8612 }
8613
8614 count as FastSint
8615}
8616
8617#[doc(hidden)]
8619pub fn final_order_scan_right_to_left_8u_block_place(
8620 sa: &mut [SaSint],
8621 buckets: &mut [SaSint],
8622 cache: &[ThreadCache],
8623 count: FastSint,
8624) {
8625 if count <= 0 {
8626 return;
8627 }
8628 let count_usize = usize::try_from(count).expect("count must be non-negative");
8629 for entry in &cache[..count_usize] {
8630 let symbol = usize::try_from(entry.symbol).expect("cache symbol must be non-negative");
8631 buckets[symbol] -= 1;
8632 let slot = usize::try_from(buckets[symbol]).expect("bucket slot must be non-negative");
8633 sa[slot] = entry.index;
8634 }
8635}
8636
8637#[doc(hidden)]
8639pub fn final_gsa_scan_right_to_left_8u_block_place(
8640 sa: &mut [SaSint],
8641 buckets: &mut [SaSint],
8642 cache: &[ThreadCache],
8643 count: FastSint,
8644) {
8645 if count <= 0 {
8646 return;
8647 }
8648 let count_usize = usize::try_from(count).expect("count must be non-negative");
8649 for entry in &cache[..count_usize] {
8650 if entry.symbol > 0 {
8651 let symbol = usize::try_from(entry.symbol).expect("cache symbol must be non-negative");
8652 buckets[symbol] -= 1;
8653 let slot = usize::try_from(buckets[symbol]).expect("bucket slot must be non-negative");
8654 sa[slot] = entry.index;
8655 }
8656 }
8657}
8658
8659#[doc(hidden)]
8661pub fn final_bwt_aux_scan_right_to_left_8u_block_place(
8662 sa: &mut [SaSint],
8663 rm: SaSint,
8664 i_out: &mut [SaSint],
8665 buckets: &mut [SaSint],
8666 cache: &[ThreadCache],
8667 count: FastSint,
8668) {
8669 if count <= 0 {
8670 return;
8671 }
8672 let count_usize = usize::try_from(count).expect("count must be non-negative");
8673 let mut i = 0usize;
8674 while i < count_usize {
8675 let symbol = usize::try_from(cache[i].symbol).expect("cache symbol must be non-negative");
8676 buckets[symbol] -= 1;
8677 let slot = usize::try_from(buckets[symbol]).expect("bucket slot must be non-negative");
8678 sa[slot] = cache[i].index;
8679 if (cache[i + 1].index & rm) == 0 {
8680 let sample_index = usize::try_from((cache[i + 1].index & SAINT_MAX) / (rm + 1))
8681 .expect("sample index must be non-negative");
8682 i_out[sample_index] = buckets[symbol] + 1;
8683 }
8684 i += 2;
8685 }
8686}
8687
8688#[doc(hidden)]
8690pub fn final_sorting_scan_right_to_left_32s_block_gather(
8691 t: &[SaSint],
8692 sa: &mut [SaSint],
8693 cache: &mut [ThreadCache],
8694 omp_block_start: FastSint,
8695 omp_block_size: FastSint,
8696) {
8697 if omp_block_size <= 0 {
8698 return;
8699 }
8700 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
8701 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
8702 for offset in 0..size {
8703 let i = start + offset;
8704 let mut symbol = SAINT_MIN;
8705 let mut p = sa[i];
8706 sa[i] = p & SAINT_MAX;
8707 if p > 0 {
8708 p -= 1;
8709 let p_usize = p as usize;
8710 cache[offset].index = p
8711 | ((usize::from(t[p_usize - usize::from(p > 0)] > t[p_usize]) as SaSint)
8712 << (SAINT_BIT - 1));
8713 symbol = t[p_usize];
8714 }
8715 cache[offset].symbol = symbol;
8716 }
8717}
8718
8719#[doc(hidden)]
8721pub fn final_sorting_scan_right_to_left_32s_block_sort(
8722 t: &[SaSint],
8723 induction_bucket: &mut [SaSint],
8724 cache: &mut [ThreadCache],
8725 omp_block_start: FastSint,
8726 omp_block_size: FastSint,
8727) {
8728 if omp_block_size <= 0 {
8729 return;
8730 }
8731 let prefetch_distance = 64usize;
8732 let start = omp_block_start as usize;
8733 let mut i = start + omp_block_size as usize - 1;
8734 let mut j = start + prefetch_distance + 1;
8735
8736 while i >= j {
8737 let ci = i - start;
8738 let v0 = cache[ci].symbol;
8739 if v0 >= 0 {
8740 let bucket_index0 = v0 as usize;
8741 induction_bucket[bucket_index0] -= 1;
8742 cache[ci].symbol = induction_bucket[bucket_index0];
8743 if cache[ci].symbol >= omp_block_start as SaSint {
8744 let ni = cache[ci].symbol as usize;
8745 let cni = ni - start;
8746 let mut np = cache[ci].index;
8747 cache[ci].index = np & SAINT_MAX;
8748 if np > 0 {
8749 np -= 1;
8750 let np_usize = np as usize;
8751 cache[cni].index = np
8752 | ((usize::from(t[np_usize - usize::from(np > 0)] > t[np_usize])
8753 as SaSint)
8754 << (SAINT_BIT - 1));
8755 cache[cni].symbol = t[np_usize];
8756 }
8757 }
8758 }
8759
8760 let i1 = i - 1;
8761 let ci1 = i1 - start;
8762 let v1 = cache[ci1].symbol;
8763 if v1 >= 0 {
8764 let bucket_index1 = v1 as usize;
8765 induction_bucket[bucket_index1] -= 1;
8766 cache[ci1].symbol = induction_bucket[bucket_index1];
8767 if cache[ci1].symbol >= omp_block_start as SaSint {
8768 let ni = cache[ci1].symbol as usize;
8769 let cni = ni - start;
8770 let mut np = cache[ci1].index;
8771 cache[ci1].index = np & SAINT_MAX;
8772 if np > 0 {
8773 np -= 1;
8774 let np_usize = np as usize;
8775 cache[cni].index = np
8776 | ((usize::from(t[np_usize - usize::from(np > 0)] > t[np_usize])
8777 as SaSint)
8778 << (SAINT_BIT - 1));
8779 cache[cni].symbol = t[np_usize];
8780 }
8781 }
8782 }
8783
8784 i -= 2;
8785 }
8786
8787 j -= prefetch_distance + 1;
8788 while i >= j {
8789 let ci = i - start;
8790 let v = cache[ci].symbol;
8791 if v >= 0 {
8792 let bucket_index = v as usize;
8793 induction_bucket[bucket_index] -= 1;
8794 cache[ci].symbol = induction_bucket[bucket_index];
8795 if cache[ci].symbol >= omp_block_start as SaSint {
8796 let ni = cache[ci].symbol as usize;
8797 let cni = ni - start;
8798 let mut np = cache[ci].index;
8799 cache[ci].index = np & SAINT_MAX;
8800 if np > 0 {
8801 np -= 1;
8802 let np_usize = np as usize;
8803 cache[cni].index = np
8804 | ((usize::from(t[np_usize - usize::from(np > 0)] > t[np_usize])
8805 as SaSint)
8806 << (SAINT_BIT - 1));
8807 cache[cni].symbol = t[np_usize];
8808 }
8809 }
8810 }
8811
8812 if i == 0 {
8813 break;
8814 }
8815 i -= 1;
8816 }
8817}
8818
8819#[doc(hidden)]
8821pub fn final_bwt_scan_right_to_left_8u_block_omp(
8822 t: &[u8],
8823 sa: &mut [SaSint],
8824 k: SaSint,
8825 induction_bucket: &mut [SaSint],
8826 block_start: FastSint,
8827 block_size: FastSint,
8828 threads: SaSint,
8829 thread_state: &mut [ThreadState],
8830) {
8831 if block_size <= 0 {
8832 return;
8833 }
8834 let k_usize = usize::try_from(k).expect("k must be non-negative");
8835 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
8836 let threads_usize = usize::try_from(threads.max(1)).expect("threads must be positive");
8837 let omp_num_threads = threads_usize.min(thread_state.len()).min(block_size_usize);
8838 if omp_num_threads <= 1 || block_size < 64 * k.max(256) as FastSint {
8839 let _ = final_bwt_scan_right_to_left_8u(t, sa, induction_bucket, block_start, block_size);
8840 return;
8841 }
8842
8843 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
8844 for (omp_thread_num, state) in thread_state.iter_mut().take(omp_num_threads).enumerate() {
8845 let omp_block_start = omp_thread_num * omp_block_stride;
8846 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
8847 omp_block_stride
8848 } else {
8849 block_size_usize - omp_block_start
8850 };
8851 state.count = final_bwt_scan_right_to_left_8u_block_prepare(
8852 t,
8853 sa,
8854 k,
8855 &mut state.buckets,
8856 &mut state.cache,
8857 block_start + omp_block_start as FastSint,
8858 omp_block_size as FastSint,
8859 );
8860 }
8861 for state in thread_state.iter_mut().take(omp_num_threads).rev() {
8862 for c in 0..k_usize {
8863 let a = induction_bucket[c];
8864 let b = state.buckets[c];
8865 induction_bucket[c] = a - b;
8866 state.buckets[c] = a;
8867 }
8868 }
8869 for state in thread_state.iter_mut().take(omp_num_threads) {
8870 final_order_scan_right_to_left_8u_block_place(
8871 sa,
8872 &mut state.buckets,
8873 &state.cache,
8874 state.count,
8875 );
8876 }
8877}
8878
8879#[doc(hidden)]
8881pub fn final_bwt_aux_scan_right_to_left_8u_block_omp(
8882 t: &[u8],
8883 sa: &mut [SaSint],
8884 k: SaSint,
8885 rm: SaSint,
8886 i_out: &mut [SaSint],
8887 induction_bucket: &mut [SaSint],
8888 block_start: FastSint,
8889 block_size: FastSint,
8890 threads: SaSint,
8891 thread_state: &mut [ThreadState],
8892) {
8893 if block_size <= 0 {
8894 return;
8895 }
8896 let k_usize = usize::try_from(k).expect("k must be non-negative");
8897 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
8898 let threads_usize = usize::try_from(threads.max(1)).expect("threads must be positive");
8899 let omp_num_threads = threads_usize.min(thread_state.len()).min(block_size_usize);
8900 if omp_num_threads <= 1 || block_size < 64 * k.max(256) as FastSint {
8901 final_bwt_aux_scan_right_to_left_8u(
8902 t,
8903 sa,
8904 rm,
8905 i_out,
8906 induction_bucket,
8907 block_start,
8908 block_size,
8909 );
8910 return;
8911 }
8912
8913 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
8914 for (omp_thread_num, state) in thread_state.iter_mut().take(omp_num_threads).enumerate() {
8915 let omp_block_start = omp_thread_num * omp_block_stride;
8916 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
8917 omp_block_stride
8918 } else {
8919 block_size_usize - omp_block_start
8920 };
8921 state.count = final_bwt_aux_scan_right_to_left_8u_block_prepare(
8922 t,
8923 sa,
8924 k,
8925 &mut state.buckets,
8926 &mut state.cache,
8927 block_start + omp_block_start as FastSint,
8928 omp_block_size as FastSint,
8929 );
8930 }
8931 for state in thread_state.iter_mut().take(omp_num_threads).rev() {
8932 for c in 0..k_usize {
8933 let a = induction_bucket[c];
8934 let b = state.buckets[c];
8935 induction_bucket[c] = a - b;
8936 state.buckets[c] = a;
8937 }
8938 }
8939 for state in thread_state.iter_mut().take(omp_num_threads) {
8940 final_bwt_aux_scan_right_to_left_8u_block_place(
8941 sa,
8942 rm,
8943 i_out,
8944 &mut state.buckets,
8945 &state.cache,
8946 state.count,
8947 );
8948 }
8949}
8950
8951#[doc(hidden)]
8953pub fn final_sorting_scan_right_to_left_8u_block_omp(
8954 t: &[u8],
8955 sa: &mut [SaSint],
8956 k: SaSint,
8957 induction_bucket: &mut [SaSint],
8958 block_start: FastSint,
8959 block_size: FastSint,
8960 threads: SaSint,
8961 thread_state: &mut [ThreadState],
8962) {
8963 if block_size <= 0 {
8964 return;
8965 }
8966 let k_usize = usize::try_from(k).expect("k must be non-negative");
8967 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
8968 let threads_usize = usize::try_from(threads.max(1)).expect("threads must be positive");
8969 let omp_num_threads = threads_usize.min(thread_state.len()).min(block_size_usize);
8970 if omp_num_threads <= 1 || block_size < 64 * k.max(256) as FastSint {
8971 final_sorting_scan_right_to_left_8u(t, sa, induction_bucket, block_start, block_size);
8972 return;
8973 }
8974
8975 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
8976 for (omp_thread_num, state) in thread_state.iter_mut().take(omp_num_threads).enumerate() {
8977 let omp_block_start = omp_thread_num * omp_block_stride;
8978 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
8979 omp_block_stride
8980 } else {
8981 block_size_usize - omp_block_start
8982 };
8983 state.count = final_sorting_scan_right_to_left_8u_block_prepare(
8984 t,
8985 sa,
8986 k,
8987 &mut state.buckets,
8988 &mut state.cache,
8989 block_start + omp_block_start as FastSint,
8990 omp_block_size as FastSint,
8991 );
8992 }
8993 for state in thread_state.iter_mut().take(omp_num_threads).rev() {
8994 for c in 0..k_usize {
8995 let a = induction_bucket[c];
8996 let b = state.buckets[c];
8997 induction_bucket[c] = a - b;
8998 state.buckets[c] = a;
8999 }
9000 }
9001 for state in thread_state.iter_mut().take(omp_num_threads) {
9002 final_order_scan_right_to_left_8u_block_place(
9003 sa,
9004 &mut state.buckets,
9005 &state.cache,
9006 state.count,
9007 );
9008 }
9009}
9010
9011#[doc(hidden)]
9013pub fn final_gsa_scan_right_to_left_8u_block_omp(
9014 t: &[u8],
9015 sa: &mut [SaSint],
9016 k: SaSint,
9017 induction_bucket: &mut [SaSint],
9018 block_start: FastSint,
9019 block_size: FastSint,
9020 threads: SaSint,
9021 thread_state: &mut [ThreadState],
9022) {
9023 if block_size <= 0 {
9024 return;
9025 }
9026 let k_usize = usize::try_from(k).expect("k must be non-negative");
9027 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
9028 let threads_usize = usize::try_from(threads.max(1)).expect("threads must be positive");
9029 let omp_num_threads = threads_usize.min(thread_state.len()).min(block_size_usize);
9030 if omp_num_threads <= 1 || block_size < 64 * k.max(256) as FastSint {
9031 final_gsa_scan_right_to_left_8u(t, sa, induction_bucket, block_start, block_size);
9032 return;
9033 }
9034
9035 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
9036 for (omp_thread_num, state) in thread_state.iter_mut().take(omp_num_threads).enumerate() {
9037 let omp_block_start = omp_thread_num * omp_block_stride;
9038 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
9039 omp_block_stride
9040 } else {
9041 block_size_usize - omp_block_start
9042 };
9043 state.count = final_sorting_scan_right_to_left_8u_block_prepare(
9044 t,
9045 sa,
9046 k,
9047 &mut state.buckets,
9048 &mut state.cache,
9049 block_start + omp_block_start as FastSint,
9050 omp_block_size as FastSint,
9051 );
9052 }
9053 for state in thread_state.iter_mut().take(omp_num_threads).rev() {
9054 for c in 0..k_usize {
9055 let a = induction_bucket[c];
9056 let b = state.buckets[c];
9057 induction_bucket[c] = a - b;
9058 state.buckets[c] = a;
9059 }
9060 }
9061 for state in thread_state.iter_mut().take(omp_num_threads) {
9062 final_gsa_scan_right_to_left_8u_block_place(
9063 sa,
9064 &mut state.buckets,
9065 &state.cache,
9066 state.count,
9067 );
9068 }
9069}
9070
9071#[doc(hidden)]
9073pub fn final_sorting_scan_right_to_left_32s_block_omp(
9074 t: &[SaSint],
9075 sa: &mut [SaSint],
9076 buckets: &mut [SaSint],
9077 cache: &mut [ThreadCache],
9078 block_start: FastSint,
9079 block_size: FastSint,
9080 threads: SaSint,
9081) {
9082 if threads <= 1 || block_size < 16_384 {
9083 final_sorting_scan_right_to_left_32s(t, sa, buckets, block_start, block_size);
9084 return;
9085 }
9086
9087 final_sorting_scan_right_to_left_32s_block_gather(t, sa, cache, block_start, block_size);
9088 final_sorting_scan_right_to_left_32s_block_sort(t, buckets, cache, block_start, block_size);
9089 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
9090 let threads_usize = usize::try_from(threads.max(1)).expect("threads must be positive");
9091 let omp_num_threads = threads_usize.min(block_size_usize);
9092 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
9093 for omp_thread_num in 0..omp_num_threads {
9094 let omp_block_start = omp_thread_num * omp_block_stride;
9095 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
9096 omp_block_stride
9097 } else {
9098 block_size_usize - omp_block_start
9099 };
9100 compact_and_place_cached_suffixes(
9101 sa,
9102 cache,
9103 omp_block_start as FastSint,
9104 omp_block_size as FastSint,
9105 );
9106 }
9107}
9108
9109#[doc(hidden)]
9111pub fn final_bwt_scan_right_to_left_8u_omp(
9112 t: &[u8],
9113 sa: &mut [SaSint],
9114 n: SaSint,
9115 k: SaSint,
9116 induction_bucket: &mut [SaSint],
9117 threads: SaSint,
9118 thread_state: &mut [ThreadState],
9119) -> SaSint {
9120 if threads == 1 || n < 65_536 {
9121 return final_bwt_scan_right_to_left_8u(t, sa, induction_bucket, 0, n as FastSint);
9122 }
9123 let mut index = -1;
9124 let mut block_start = usize::try_from(n).expect("n must be non-negative");
9125 while block_start > 0 {
9126 block_start -= 1;
9127 if sa[block_start] == 0 {
9128 index = block_start as SaSint;
9129 } else {
9130 let threads_usize = usize::try_from(threads)
9131 .expect("threads must be non-negative")
9132 .min(thread_state.len())
9133 .max(1);
9134 let max_back =
9135 threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE.saturating_sub(16 * threads_usize);
9136 let block_max_end = block_start.saturating_sub(max_back);
9137 let mut block_end = block_start;
9138 while block_end > block_max_end && sa[block_end - 1] != 0 {
9139 block_end -= 1;
9140 }
9141 let size = block_start - block_end + 1;
9142 if size < 32 {
9143 let res = final_bwt_scan_right_to_left_8u(
9144 t,
9145 sa,
9146 induction_bucket,
9147 block_end as FastSint,
9148 size as FastSint,
9149 );
9150 if res >= 0 {
9151 index = res;
9152 }
9153 } else {
9154 final_bwt_scan_right_to_left_8u_block_omp(
9155 t,
9156 sa,
9157 k,
9158 induction_bucket,
9159 block_end as FastSint,
9160 size as FastSint,
9161 threads,
9162 thread_state,
9163 );
9164 }
9165 block_start = block_end;
9166 }
9167 }
9168 index
9169}
9170
9171#[doc(hidden)]
9173pub fn final_bwt_aux_scan_right_to_left_8u_omp(
9174 t: &[u8],
9175 sa: &mut [SaSint],
9176 n: SaSint,
9177 k: SaSint,
9178 rm: SaSint,
9179 i_out: &mut [SaSint],
9180 induction_bucket: &mut [SaSint],
9181 threads: SaSint,
9182 thread_state: &mut [ThreadState],
9183) {
9184 if threads == 1 || n < 65_536 {
9185 final_bwt_aux_scan_right_to_left_8u(t, sa, rm, i_out, induction_bucket, 0, n as FastSint);
9186 return;
9187 }
9188 let mut block_start = usize::try_from(n).expect("n must be non-negative");
9189 while block_start > 0 {
9190 block_start -= 1;
9191 if sa[block_start] != 0 {
9192 let threads_usize = usize::try_from(threads)
9193 .expect("threads must be non-negative")
9194 .min(thread_state.len())
9195 .max(1);
9196 let max_back = threads_usize
9197 * (LIBSAIS_PER_THREAD_CACHE_SIZE.saturating_sub(16 * threads_usize) / 2);
9198 let block_max_end = block_start.saturating_sub(max_back);
9199 let mut block_end = block_start;
9200 while block_end > block_max_end && sa[block_end - 1] != 0 {
9201 block_end -= 1;
9202 }
9203 let size = block_start - block_end + 1;
9204 if size < 32 {
9205 final_bwt_aux_scan_right_to_left_8u(
9206 t,
9207 sa,
9208 rm,
9209 i_out,
9210 induction_bucket,
9211 block_end as FastSint,
9212 size as FastSint,
9213 );
9214 } else {
9215 final_bwt_aux_scan_right_to_left_8u_block_omp(
9216 t,
9217 sa,
9218 k,
9219 rm,
9220 i_out,
9221 induction_bucket,
9222 block_end as FastSint,
9223 size as FastSint,
9224 threads,
9225 thread_state,
9226 );
9227 }
9228 block_start = block_end;
9229 }
9230 }
9231}
9232
9233#[doc(hidden)]
9235pub fn final_sorting_scan_right_to_left_8u_omp(
9236 t: &[u8],
9237 sa: &mut [SaSint],
9238 omp_block_start: FastSint,
9239 omp_block_size: FastSint,
9240 k: SaSint,
9241 induction_bucket: &mut [SaSint],
9242 threads: SaSint,
9243 thread_state: &mut [ThreadState],
9244) {
9245 if threads == 1 || omp_block_size < 65_536 {
9246 final_sorting_scan_right_to_left_8u(
9247 t,
9248 sa,
9249 induction_bucket,
9250 omp_block_start,
9251 omp_block_size,
9252 );
9253 return;
9254 }
9255 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
9256 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
9257 let mut block_start = start + size;
9258 while block_start > start {
9259 block_start -= 1;
9260 if sa[block_start] != 0 {
9261 let threads_usize = usize::try_from(threads)
9262 .expect("threads must be non-negative")
9263 .min(thread_state.len())
9264 .max(1);
9265 let max_back =
9266 threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE.saturating_sub(16 * threads_usize);
9267 let block_max_end = block_start.saturating_sub(max_back).max(start);
9268 let mut block_end = block_start;
9269 while block_end > block_max_end && sa[block_end - 1] != 0 {
9270 block_end -= 1;
9271 }
9272 let span = block_start - block_end + 1;
9273 if span < 32 {
9274 final_sorting_scan_right_to_left_8u(
9275 t,
9276 sa,
9277 induction_bucket,
9278 block_end as FastSint,
9279 span as FastSint,
9280 );
9281 } else {
9282 final_sorting_scan_right_to_left_8u_block_omp(
9283 t,
9284 sa,
9285 k,
9286 induction_bucket,
9287 block_end as FastSint,
9288 span as FastSint,
9289 threads,
9290 thread_state,
9291 );
9292 }
9293 block_start = block_end;
9294 }
9295 }
9296}
9297
9298#[doc(hidden)]
9300pub fn final_gsa_scan_right_to_left_8u_omp(
9301 t: &[u8],
9302 sa: &mut [SaSint],
9303 omp_block_start: FastSint,
9304 omp_block_size: FastSint,
9305 k: SaSint,
9306 induction_bucket: &mut [SaSint],
9307 threads: SaSint,
9308 thread_state: &mut [ThreadState],
9309) {
9310 if threads == 1 || omp_block_size < 65_536 {
9311 final_gsa_scan_right_to_left_8u(t, sa, induction_bucket, omp_block_start, omp_block_size);
9312 return;
9313 }
9314 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
9315 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
9316 let mut block_start = start + size;
9317 while block_start > start {
9318 block_start -= 1;
9319 if sa[block_start] != 0 {
9320 let threads_usize = usize::try_from(threads)
9321 .expect("threads must be non-negative")
9322 .min(thread_state.len())
9323 .max(1);
9324 let max_back =
9325 threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE.saturating_sub(16 * threads_usize);
9326 let block_max_end = block_start.saturating_sub(max_back).max(start);
9327 let mut block_end = block_start;
9328 while block_end > block_max_end && sa[block_end - 1] != 0 {
9329 block_end -= 1;
9330 }
9331 let span = block_start - block_end + 1;
9332 if span < 32 {
9333 final_gsa_scan_right_to_left_8u(
9334 t,
9335 sa,
9336 induction_bucket,
9337 block_end as FastSint,
9338 span as FastSint,
9339 );
9340 } else {
9341 final_gsa_scan_right_to_left_8u_block_omp(
9342 t,
9343 sa,
9344 k,
9345 induction_bucket,
9346 block_end as FastSint,
9347 span as FastSint,
9348 threads,
9349 thread_state,
9350 );
9351 }
9352 block_start = block_end;
9353 }
9354 }
9355}
9356
9357#[doc(hidden)]
9359pub fn final_sorting_scan_right_to_left_32s_omp(
9360 t: &[SaSint],
9361 sa: &mut [SaSint],
9362 n: SaSint,
9363 induction_bucket: &mut [SaSint],
9364 threads: SaSint,
9365 thread_state: &mut [ThreadState],
9366) {
9367 if threads == 1 || n < 65_536 {
9368 final_sorting_scan_right_to_left_32s(t, sa, induction_bucket, 0, n as FastSint);
9369 return;
9370 }
9371 if thread_state.is_empty() {
9372 final_sorting_scan_right_to_left_32s(t, sa, induction_bucket, 0, n as FastSint);
9373 return;
9374 }
9375 let threads_usize = usize::try_from(threads)
9376 .expect("threads must be non-negative")
9377 .max(1);
9378 let mut cache = vec![ThreadCache::default(); threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE];
9379 let mut block_start = isize::try_from(n).expect("n must fit isize") - 1;
9380 while block_start >= 0 {
9381 let block_end = (block_start
9382 - isize::try_from(threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE)
9383 .expect("block span must fit isize"))
9384 .max(-1);
9385 final_sorting_scan_right_to_left_32s_block_omp(
9386 t,
9387 sa,
9388 induction_bucket,
9389 &mut cache,
9390 (block_end + 1) as FastSint,
9391 (block_start - block_end) as FastSint,
9392 threads,
9393 );
9394 block_start = block_end;
9395 }
9396}
9397
9398#[doc(hidden)]
9400pub fn clear_lms_suffixes_omp(
9401 sa: &mut [SaSint],
9402 n: SaSint,
9403 k: SaSint,
9404 bucket_start: &[SaSint],
9405 bucket_end: &[SaSint],
9406 threads: SaSint,
9407) {
9408 let k_usize = usize::try_from(k).expect("k must be non-negative");
9409 let thread_count = if threads > 1 && n >= 65536 {
9410 usize::try_from(threads).expect("threads must be positive")
9411 } else {
9412 1
9413 };
9414 for t in 0..thread_count {
9415 let mut c = t;
9416 while c < k_usize {
9417 if bucket_end[c] > bucket_start[c] {
9418 let start =
9419 usize::try_from(bucket_start[c]).expect("bucket start must be non-negative");
9420 let end = usize::try_from(bucket_end[c]).expect("bucket end must be non-negative");
9421 sa[start..end].fill(0);
9422 }
9423 c += thread_count;
9424 }
9425 }
9426}
9427
9428#[doc(hidden)]
9430pub fn induce_final_order_8u_omp(
9431 t: &[u8],
9432 sa: &mut [SaSint],
9433 n: SaSint,
9434 k: SaSint,
9435 flags: SaSint,
9436 r: SaSint,
9437 i_out: Option<&mut [SaSint]>,
9438 buckets: &mut [SaSint],
9439 threads: SaSint,
9440 thread_state: &mut [ThreadState],
9441) -> SaSint {
9442 if (flags & LIBSAIS_FLAGS_BWT) == 0 {
9443 if (flags & LIBSAIS_FLAGS_GSA) != 0 {
9444 buckets[6 * ALPHABET_SIZE] = buckets[7 * ALPHABET_SIZE] - 1;
9445 }
9446
9447 let (left_buckets, right_tail) = buckets.split_at_mut(7 * ALPHABET_SIZE);
9448 let bucket_start = &mut left_buckets[6 * ALPHABET_SIZE..7 * ALPHABET_SIZE];
9449 let bucket_end = &mut right_tail[..ALPHABET_SIZE];
9450
9451 final_sorting_scan_left_to_right_8u_omp(
9452 t,
9453 sa,
9454 n as FastSint,
9455 k,
9456 bucket_start,
9457 threads,
9458 thread_state,
9459 );
9460 if threads > 1 && n >= 65_536 {
9461 clear_lms_suffixes_omp(
9462 sa,
9463 n,
9464 ALPHABET_SIZE as SaSint,
9465 bucket_start,
9466 bucket_end,
9467 threads,
9468 );
9469 }
9470
9471 if (flags & LIBSAIS_FLAGS_GSA) != 0 {
9472 flip_suffix_markers_omp(sa, bucket_end[0], threads);
9473 final_gsa_scan_right_to_left_8u_omp(
9474 t,
9475 sa,
9476 bucket_end[0] as FastSint,
9477 n as FastSint - bucket_end[0] as FastSint,
9478 k,
9479 bucket_end,
9480 threads,
9481 thread_state,
9482 );
9483 } else {
9484 final_sorting_scan_right_to_left_8u_omp(
9485 t,
9486 sa,
9487 0,
9488 n as FastSint,
9489 k,
9490 bucket_end,
9491 threads,
9492 thread_state,
9493 );
9494 }
9495
9496 0
9497 } else if let Some(i_out) = i_out {
9498 let (left_buckets, right_tail) = buckets.split_at_mut(7 * ALPHABET_SIZE);
9499 let bucket_start = &mut left_buckets[6 * ALPHABET_SIZE..7 * ALPHABET_SIZE];
9500 let bucket_end = &mut right_tail[..ALPHABET_SIZE];
9501
9502 final_bwt_aux_scan_left_to_right_8u_omp(
9503 t,
9504 sa,
9505 n as FastSint,
9506 k,
9507 r - 1,
9508 i_out,
9509 bucket_start,
9510 threads,
9511 thread_state,
9512 );
9513 if threads > 1 && n >= 65_536 {
9514 clear_lms_suffixes_omp(
9515 sa,
9516 n,
9517 ALPHABET_SIZE as SaSint,
9518 bucket_start,
9519 bucket_end,
9520 threads,
9521 );
9522 }
9523 final_bwt_aux_scan_right_to_left_8u_omp(
9524 t,
9525 sa,
9526 n,
9527 k,
9528 r - 1,
9529 i_out,
9530 bucket_end,
9531 threads,
9532 thread_state,
9533 );
9534 0
9535 } else {
9536 let (left_buckets, right_tail) = buckets.split_at_mut(7 * ALPHABET_SIZE);
9537 let bucket_start = &mut left_buckets[6 * ALPHABET_SIZE..7 * ALPHABET_SIZE];
9538 let bucket_end = &mut right_tail[..ALPHABET_SIZE];
9539
9540 final_bwt_scan_left_to_right_8u_omp(
9541 t,
9542 sa,
9543 n as FastSint,
9544 k,
9545 bucket_start,
9546 threads,
9547 thread_state,
9548 );
9549 if threads > 1 && n >= 65_536 {
9550 clear_lms_suffixes_omp(
9551 sa,
9552 n,
9553 ALPHABET_SIZE as SaSint,
9554 bucket_start,
9555 bucket_end,
9556 threads,
9557 );
9558 }
9559 final_bwt_scan_right_to_left_8u_omp(t, sa, n, k, bucket_end, threads, thread_state)
9560 }
9561}
9562
9563#[doc(hidden)]
9565pub fn induce_final_order_32s_6k(
9566 t: &[SaSint],
9567 sa: &mut [SaSint],
9568 n: SaSint,
9569 k: SaSint,
9570 buckets: &mut [SaSint],
9571 threads: SaSint,
9572 thread_state: &mut [ThreadState],
9573) {
9574 let k_usize = usize::try_from(k).expect("k must be non-negative");
9575 let (_head, tail) = buckets.split_at_mut(4 * k_usize);
9576 let (left, right) = tail.split_at_mut(k_usize);
9577 final_sorting_scan_left_to_right_32s_omp(t, sa, n, left, threads, thread_state);
9578 final_sorting_scan_right_to_left_32s_omp(t, sa, n, right, threads, thread_state);
9579}
9580
9581#[doc(hidden)]
9583pub fn induce_final_order_32s_4k(
9584 t: &[SaSint],
9585 sa: &mut [SaSint],
9586 n: SaSint,
9587 k: SaSint,
9588 buckets: &mut [SaSint],
9589 threads: SaSint,
9590 thread_state: &mut [ThreadState],
9591) {
9592 let k_usize = usize::try_from(k).expect("k must be non-negative");
9593 let (_head, tail) = buckets.split_at_mut(2 * k_usize);
9594 let (left, right) = tail.split_at_mut(k_usize);
9595 final_sorting_scan_left_to_right_32s_omp(t, sa, n, left, threads, thread_state);
9596 final_sorting_scan_right_to_left_32s_omp(t, sa, n, right, threads, thread_state);
9597}
9598
9599#[doc(hidden)]
9601pub fn induce_final_order_32s_2k(
9602 t: &[SaSint],
9603 sa: &mut [SaSint],
9604 n: SaSint,
9605 k: SaSint,
9606 buckets: &mut [SaSint],
9607 threads: SaSint,
9608 thread_state: &mut [ThreadState],
9609) {
9610 let k_usize = usize::try_from(k).expect("k must be non-negative");
9611 let (right, left) = buckets.split_at_mut(k_usize);
9612 final_sorting_scan_left_to_right_32s_omp(t, sa, n, left, threads, thread_state);
9613 final_sorting_scan_right_to_left_32s_omp(t, sa, n, right, threads, thread_state);
9614}
9615
9616#[doc(hidden)]
9618pub fn induce_final_order_32s_1k(
9619 t: &[SaSint],
9620 sa: &mut [SaSint],
9621 n: SaSint,
9622 k: SaSint,
9623 buckets: &mut [SaSint],
9624 threads: SaSint,
9625 thread_state: &mut [ThreadState],
9626) {
9627 count_suffixes_32s(t, n, k, buckets);
9628 initialize_buckets_start_32s_1k(k, buckets);
9629 final_sorting_scan_left_to_right_32s_omp(t, sa, n, buckets, threads, thread_state);
9630
9631 count_suffixes_32s(t, n, k, buckets);
9632 initialize_buckets_end_32s_1k(k, buckets);
9633 final_sorting_scan_right_to_left_32s_omp(t, sa, n, buckets, threads, thread_state);
9634}
9635
9636#[doc(hidden)]
9638pub fn renumber_unique_and_nonunique_lms_suffixes_32s(
9639 t: &mut [SaSint],
9640 sa: &mut [SaSint],
9641 m: SaSint,
9642 mut f: SaSint,
9643 omp_block_start: FastSint,
9644 omp_block_size: FastSint,
9645) -> SaSint {
9646 if omp_block_size <= 0 {
9647 return f;
9648 }
9649
9650 let prefetch_distance = 64 as SaSint;
9651 let m_usize = usize::try_from(m).expect("m must be non-negative");
9652 let (sa_head, sam) = sa.split_at_mut(m_usize);
9653 let mut i = omp_block_start as SaSint;
9654 let mut j = omp_block_start as SaSint + omp_block_size as SaSint - 2 * prefetch_distance - 3;
9655
9656 while i < j {
9657 let p0 = sa_head[i as usize] as SaUint;
9658 let p0_half = (p0 >> 1) as usize;
9659 let mut s0 = sam[p0_half];
9660 if s0 < 0 {
9661 t[p0 as usize] |= SAINT_MIN;
9662 f += 1;
9663 s0 = i + SAINT_MIN + f;
9664 }
9665 sam[p0_half] = s0 - f;
9666
9667 let p1 = sa_head[(i + 1) as usize] as SaUint;
9668 let p1_half = (p1 >> 1) as usize;
9669 let mut s1 = sam[p1_half];
9670 if s1 < 0 {
9671 t[p1 as usize] |= SAINT_MIN;
9672 f += 1;
9673 s1 = i + 1 + SAINT_MIN + f;
9674 }
9675 sam[p1_half] = s1 - f;
9676
9677 let p2 = sa_head[(i + 2) as usize] as SaUint;
9678 let p2_half = (p2 >> 1) as usize;
9679 let mut s2 = sam[p2_half];
9680 if s2 < 0 {
9681 t[p2 as usize] |= SAINT_MIN;
9682 f += 1;
9683 s2 = i + 2 + SAINT_MIN + f;
9684 }
9685 sam[p2_half] = s2 - f;
9686
9687 let p3 = sa_head[(i + 3) as usize] as SaUint;
9688 let p3_half = (p3 >> 1) as usize;
9689 let mut s3 = sam[p3_half];
9690 if s3 < 0 {
9691 t[p3 as usize] |= SAINT_MIN;
9692 f += 1;
9693 s3 = i + 3 + SAINT_MIN + f;
9694 }
9695 sam[p3_half] = s3 - f;
9696
9697 i += 4;
9698 }
9699
9700 j += 2 * prefetch_distance + 3;
9701 while i < j {
9702 let p = sa_head[i as usize] as SaUint;
9703 let p_half = (p >> 1) as usize;
9704 let mut s = sam[p_half];
9705 if s < 0 {
9706 t[p as usize] |= SAINT_MIN;
9707 f += 1;
9708 s = i + SAINT_MIN + f;
9709 }
9710 sam[p_half] = s - f;
9711 i += 1;
9712 }
9713
9714 f
9715}
9716
9717#[doc(hidden)]
9719pub fn compact_unique_and_nonunique_lms_suffixes_32s(
9720 sa: &mut [SaSint],
9721 m: SaSint,
9722 pl: &mut FastSint,
9723 pr: &mut FastSint,
9724 omp_block_start: FastSint,
9725 omp_block_size: FastSint,
9726) {
9727 if omp_block_size <= 0 {
9728 return;
9729 }
9730
9731 let m_usize = usize::try_from(m).expect("m must be non-negative");
9732 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
9733 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
9734
9735 let source: Vec<SaSint> = sa[m_usize + start..m_usize + start + size].to_vec();
9736 let mut l = usize::try_from(*pl - 1).expect("left position must be positive");
9737 let mut r = usize::try_from(*pr - 1).expect("right position must be positive");
9738
9739 for &p in source.iter().rev() {
9740 let pu = p as SaUint;
9741 sa[l] = (pu & SAINT_MAX as SaUint) as SaSint;
9742 l = l.saturating_sub(usize::from((pu as SaSint) < 0));
9743
9744 sa[r] = pu.wrapping_sub(1) as SaSint;
9745 r = r.saturating_sub(usize::from((pu as SaSint) > 0));
9746 }
9747
9748 *pl = l as FastSint + 1;
9749 *pr = r as FastSint + 1;
9750}
9751
9752#[doc(hidden)]
9754pub fn count_unique_suffixes(
9755 sa: &[SaSint],
9756 m: SaSint,
9757 omp_block_start: FastSint,
9758 omp_block_size: FastSint,
9759) -> SaSint {
9760 if omp_block_size <= 0 {
9761 return 0;
9762 }
9763
9764 let m_usize = usize::try_from(m).expect("m must be non-negative");
9765 let sam = &sa[m_usize..];
9766 let mut i = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
9767 let block_end =
9768 i + usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
9769 let j = block_end.saturating_sub(67);
9770 let mut f0 = 0;
9771 let mut f1 = 0;
9772 let mut f2 = 0;
9773 let mut f3 = 0;
9774
9775 while i < j {
9776 f0 += SaSint::from(
9777 sam[usize::try_from((sa[i] as SaUint) >> 1).expect("name slot must fit usize")] < 0,
9778 );
9779 f1 += SaSint::from(
9780 sam[usize::try_from((sa[i + 1] as SaUint) >> 1).expect("name slot must fit usize")] < 0,
9781 );
9782 f2 += SaSint::from(
9783 sam[usize::try_from((sa[i + 2] as SaUint) >> 1).expect("name slot must fit usize")] < 0,
9784 );
9785 f3 += SaSint::from(
9786 sam[usize::try_from((sa[i + 3] as SaUint) >> 1).expect("name slot must fit usize")] < 0,
9787 );
9788 i += 4;
9789 }
9790
9791 while i < block_end {
9792 f0 += SaSint::from(
9793 sam[usize::try_from((sa[i] as SaUint) >> 1).expect("name slot must fit usize")] < 0,
9794 );
9795 i += 1;
9796 }
9797
9798 f0 + f1 + f2 + f3
9799}
9800
9801#[doc(hidden)]
9803pub fn renumber_unique_and_nonunique_lms_suffixes_32s_omp(
9804 t: &mut [SaSint],
9805 sa: &mut [SaSint],
9806 m: SaSint,
9807 threads: SaSint,
9808 thread_state: &mut [ThreadState],
9809) -> SaSint {
9810 let mut f = 0;
9811 if threads == 1 || m < 65_536 {
9812 f = renumber_unique_and_nonunique_lms_suffixes_32s(t, sa, m, 0, 0, m as FastSint);
9813 } else {
9814 let threads_usize = usize::try_from(threads)
9815 .expect("threads must be non-negative")
9816 .max(1);
9817 let m_usize = usize::try_from(m).expect("m must be non-negative");
9818 let omp_num_threads = threads_usize.min(m_usize.max(1));
9819 let omp_block_stride = (m_usize / omp_num_threads) & !15usize;
9820
9821 for omp_thread_num in 0..omp_num_threads {
9822 let omp_block_start = omp_thread_num * omp_block_stride;
9823 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
9824 omp_block_stride
9825 } else {
9826 m_usize - omp_block_start
9827 };
9828
9829 thread_state[omp_thread_num].count = count_unique_suffixes(
9830 sa,
9831 m,
9832 omp_block_start as FastSint,
9833 omp_block_size as FastSint,
9834 ) as FastSint;
9835 }
9836
9837 let mut count = 0 as FastSint;
9838 for omp_thread_num in 0..omp_num_threads {
9839 let omp_block_start = omp_thread_num * omp_block_stride;
9840 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
9841 omp_block_stride
9842 } else {
9843 m_usize - omp_block_start
9844 };
9845
9846 if omp_thread_num + 1 == omp_num_threads {
9847 f = (count + thread_state[omp_thread_num].count) as SaSint;
9848 }
9849
9850 renumber_unique_and_nonunique_lms_suffixes_32s(
9851 t,
9852 sa,
9853 m,
9854 count as SaSint,
9855 omp_block_start as FastSint,
9856 omp_block_size as FastSint,
9857 );
9858 count += thread_state[omp_thread_num].count;
9859 }
9860 }
9861
9862 f
9863}
9864
9865#[doc(hidden)]
9867pub fn compact_unique_and_nonunique_lms_suffixes_32s_omp(
9868 sa: &mut [SaSint],
9869 n: SaSint,
9870 m: SaSint,
9871 fs: SaSint,
9872 f: SaSint,
9873 threads: SaSint,
9874 thread_state: &mut [ThreadState],
9875) {
9876 let half_n = (n as FastSint) >> 1;
9877 if threads == 1 || n < 131_072 || m >= fs {
9878 let mut l = m as FastSint;
9879 let mut r = n as FastSint + fs as FastSint;
9880 compact_unique_and_nonunique_lms_suffixes_32s(sa, m, &mut l, &mut r, 0, half_n);
9881 } else {
9882 let threads_usize = usize::try_from(threads)
9883 .expect("threads must be non-negative")
9884 .max(1);
9885 let half_n_usize = usize::try_from(half_n).expect("half_n must be non-negative");
9886 let omp_num_threads = threads_usize.min(half_n_usize.max(1));
9887 let omp_block_stride = (half_n_usize / omp_num_threads) & !15usize;
9888
9889 for omp_thread_num in 0..omp_num_threads {
9890 let omp_block_start = omp_thread_num * omp_block_stride;
9891 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
9892 omp_block_stride
9893 } else {
9894 half_n_usize - omp_block_start
9895 };
9896
9897 thread_state[omp_thread_num].position =
9898 m as FastSint + half_n + omp_block_start as FastSint + omp_block_size as FastSint;
9899 thread_state[omp_thread_num].count =
9900 m as FastSint + omp_block_start as FastSint + omp_block_size as FastSint;
9901
9902 let mut position = thread_state[omp_thread_num].position;
9903 let mut count = thread_state[omp_thread_num].count;
9904 compact_unique_and_nonunique_lms_suffixes_32s(
9905 sa,
9906 m,
9907 &mut position,
9908 &mut count,
9909 omp_block_start as FastSint,
9910 omp_block_size as FastSint,
9911 );
9912 thread_state[omp_thread_num].position = position;
9913 thread_state[omp_thread_num].count = count;
9914 }
9915
9916 let mut position = m as FastSint;
9917 for t in (0..omp_num_threads).rev() {
9918 let omp_block_end = if t + 1 < omp_num_threads {
9919 omp_block_stride * (t + 1)
9920 } else {
9921 half_n_usize
9922 };
9923 let count =
9924 m as FastSint + half_n + omp_block_end as FastSint - thread_state[t].position;
9925 if count > 0 {
9926 position -= count;
9927 let dst = usize::try_from(position).expect("destination must be non-negative");
9928 let src =
9929 usize::try_from(thread_state[t].position).expect("source must be non-negative");
9930 let len = usize::try_from(count).expect("length must be non-negative");
9931 sa.copy_within(src..src + len, dst);
9932 }
9933 }
9934
9935 let mut position = n as FastSint + fs as FastSint;
9936 for t in (0..omp_num_threads).rev() {
9937 let omp_block_end = if t + 1 < omp_num_threads {
9938 omp_block_stride * (t + 1)
9939 } else {
9940 half_n_usize
9941 };
9942 let count = m as FastSint + omp_block_end as FastSint - thread_state[t].count;
9943 if count > 0 {
9944 position -= count;
9945 let dst = usize::try_from(position).expect("destination must be non-negative");
9946 let src =
9947 usize::try_from(thread_state[t].count).expect("source must be non-negative");
9948 let len = usize::try_from(count).expect("length must be non-negative");
9949 sa.copy_within(src..src + len, dst);
9950 }
9951 }
9952 }
9953
9954 let copy_dst = usize::try_from(n + fs - m).expect("copy destination must be non-negative");
9955 let copy_src = usize::try_from(m - f).expect("copy source must be non-negative");
9956 let copy_len = usize::try_from(f).expect("copy length must be non-negative");
9957 sa.copy_within(copy_src..copy_src + copy_len, copy_dst);
9958}
9959
9960#[doc(hidden)]
9962pub fn compact_lms_suffixes_32s_omp(
9963 t: &mut [SaSint],
9964 sa: &mut [SaSint],
9965 n: SaSint,
9966 m: SaSint,
9967 fs: SaSint,
9968 threads: SaSint,
9969 thread_state: &mut [ThreadState],
9970) -> SaSint {
9971 let f = renumber_unique_and_nonunique_lms_suffixes_32s_omp(t, sa, m, threads, thread_state);
9972 compact_unique_and_nonunique_lms_suffixes_32s_omp(sa, n, m, fs, f, threads, thread_state);
9973 f
9974}
9975
9976#[doc(hidden)]
9978pub fn merge_unique_lms_suffixes_32s(
9979 t: &mut [SaSint],
9980 sa: &mut [SaSint],
9981 n: SaSint,
9982 m: SaSint,
9983 l: FastSint,
9984 omp_block_start: FastSint,
9985 omp_block_size: FastSint,
9986) {
9987 if omp_block_size <= 0 {
9988 return;
9989 }
9990
9991 let n_usize = usize::try_from(n).expect("n must be non-negative");
9992 let m_usize = usize::try_from(m).expect("m must be non-negative");
9993 let mut src_index = n_usize - m_usize - 1 + usize::try_from(l).expect("l must be non-negative");
9994 let mut tmp = sa[src_index] as FastSint;
9995 src_index += 1;
9996
9997 let mut i = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
9998 let block_end =
9999 i + usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
10000 let j = block_end.saturating_sub(6);
10001 while i < j {
10002 let c0 = t[i];
10003 if c0 < 0 {
10004 t[i] = c0 & SAINT_MAX;
10005 sa[usize::try_from(tmp).expect("target slot must be non-negative")] = i as SaSint;
10006 i += 1;
10007 tmp = sa[src_index] as FastSint;
10008 src_index += 1;
10009 }
10010
10011 let c1 = t[i + 1];
10012 if c1 < 0 {
10013 t[i + 1] = c1 & SAINT_MAX;
10014 sa[usize::try_from(tmp).expect("target slot must be non-negative")] = i as SaSint + 1;
10015 i += 1;
10016 tmp = sa[src_index] as FastSint;
10017 src_index += 1;
10018 }
10019
10020 let c2 = t[i + 2];
10021 if c2 < 0 {
10022 t[i + 2] = c2 & SAINT_MAX;
10023 sa[usize::try_from(tmp).expect("target slot must be non-negative")] = i as SaSint + 2;
10024 i += 1;
10025 tmp = sa[src_index] as FastSint;
10026 src_index += 1;
10027 }
10028
10029 let c3 = t[i + 3];
10030 if c3 < 0 {
10031 t[i + 3] = c3 & SAINT_MAX;
10032 sa[usize::try_from(tmp).expect("target slot must be non-negative")] = i as SaSint + 3;
10033 i += 1;
10034 tmp = sa[src_index] as FastSint;
10035 src_index += 1;
10036 }
10037
10038 i += 4;
10039 }
10040
10041 while i < block_end {
10042 let c = t[i];
10043 if c < 0 {
10044 t[i] = c & SAINT_MAX;
10045 sa[usize::try_from(tmp).expect("target slot must be non-negative")] = i as SaSint;
10046 i += 1;
10047 tmp = sa[src_index] as FastSint;
10048 src_index += 1;
10049 }
10050 i += 1;
10051 }
10052}
10053
10054#[doc(hidden)]
10056pub fn merge_nonunique_lms_suffixes_32s(
10057 sa: &mut [SaSint],
10058 n: SaSint,
10059 m: SaSint,
10060 l: FastSint,
10061 omp_block_start: FastSint,
10062 omp_block_size: FastSint,
10063) {
10064 if omp_block_size <= 0 {
10065 return;
10066 }
10067
10068 let n_usize = usize::try_from(n).expect("n must be non-negative");
10069 let m_usize = usize::try_from(m).expect("m must be non-negative");
10070 let mut src_index = n_usize - m_usize - 1 + usize::try_from(l).expect("l must be non-negative");
10071 let mut tmp = sa[src_index];
10072 src_index += 1;
10073
10074 let mut i = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
10075 let block_end =
10076 i + usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
10077 let j = block_end.saturating_sub(3);
10078 while i < j {
10079 if sa[i] == 0 {
10080 sa[i] = tmp;
10081 tmp = sa[src_index];
10082 src_index += 1;
10083 }
10084 if sa[i + 1] == 0 {
10085 sa[i + 1] = tmp;
10086 tmp = sa[src_index];
10087 src_index += 1;
10088 }
10089 if sa[i + 2] == 0 {
10090 sa[i + 2] = tmp;
10091 tmp = sa[src_index];
10092 src_index += 1;
10093 }
10094 if sa[i + 3] == 0 {
10095 sa[i + 3] = tmp;
10096 tmp = sa[src_index];
10097 src_index += 1;
10098 }
10099 i += 4;
10100 }
10101
10102 while i < block_end {
10103 if sa[i] == 0 {
10104 sa[i] = tmp;
10105 tmp = sa[src_index];
10106 src_index += 1;
10107 }
10108 i += 1;
10109 }
10110}
10111
10112#[doc(hidden)]
10114pub fn merge_unique_lms_suffixes_32s_omp(
10115 t: &mut [SaSint],
10116 sa: &mut [SaSint],
10117 n: SaSint,
10118 m: SaSint,
10119 threads: SaSint,
10120 thread_state: &mut [ThreadState],
10121) {
10122 if threads == 1 || n < 65_536 {
10123 merge_unique_lms_suffixes_32s(t, sa, n, m, 0, 0, n as FastSint);
10124 return;
10125 }
10126
10127 let threads_usize = usize::try_from(threads)
10128 .expect("threads must be non-negative")
10129 .max(1);
10130 let n_usize = usize::try_from(n).expect("n must be non-negative");
10131 let omp_num_threads = threads_usize.min(n_usize.max(1));
10132 let omp_block_stride = (n_usize / omp_num_threads) & !15usize;
10133
10134 for omp_thread_num in 0..omp_num_threads {
10135 let omp_block_start = omp_thread_num * omp_block_stride;
10136 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
10137 omp_block_stride
10138 } else {
10139 n_usize - omp_block_start
10140 };
10141
10142 thread_state[omp_thread_num].count = count_negative_marked_suffixes(
10143 t,
10144 omp_block_start as FastSint,
10145 omp_block_size as FastSint,
10146 ) as FastSint;
10147 }
10148
10149 let mut count = 0 as FastSint;
10150 for omp_thread_num in 0..omp_num_threads {
10151 let omp_block_start = omp_thread_num * omp_block_stride;
10152 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
10153 omp_block_stride
10154 } else {
10155 n_usize - omp_block_start
10156 };
10157
10158 merge_unique_lms_suffixes_32s(
10159 t,
10160 sa,
10161 n,
10162 m,
10163 count,
10164 omp_block_start as FastSint,
10165 omp_block_size as FastSint,
10166 );
10167 count += thread_state[omp_thread_num].count;
10168 }
10169}
10170
10171#[doc(hidden)]
10173pub fn merge_nonunique_lms_suffixes_32s_omp(
10174 sa: &mut [SaSint],
10175 n: SaSint,
10176 m: SaSint,
10177 f: SaSint,
10178 threads: SaSint,
10179 thread_state: &mut [ThreadState],
10180) {
10181 if threads == 1 || m < 65_536 {
10182 merge_nonunique_lms_suffixes_32s(sa, n, m, f as FastSint, 0, m as FastSint);
10183 return;
10184 }
10185
10186 let threads_usize = usize::try_from(threads)
10187 .expect("threads must be non-negative")
10188 .max(1);
10189 let m_usize = usize::try_from(m).expect("m must be non-negative");
10190 let omp_num_threads = threads_usize.min(m_usize.max(1));
10191 let omp_block_stride = (m_usize / omp_num_threads) & !15usize;
10192
10193 for omp_thread_num in 0..omp_num_threads {
10194 let omp_block_start = omp_thread_num * omp_block_stride;
10195 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
10196 omp_block_stride
10197 } else {
10198 m_usize - omp_block_start
10199 };
10200
10201 thread_state[omp_thread_num].count =
10202 count_zero_marked_suffixes(sa, omp_block_start as FastSint, omp_block_size as FastSint)
10203 as FastSint;
10204 }
10205
10206 let mut count = f as FastSint;
10207 for omp_thread_num in 0..omp_num_threads {
10208 let omp_block_start = omp_thread_num * omp_block_stride;
10209 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
10210 omp_block_stride
10211 } else {
10212 m_usize - omp_block_start
10213 };
10214
10215 merge_nonunique_lms_suffixes_32s(
10216 sa,
10217 n,
10218 m,
10219 count,
10220 omp_block_start as FastSint,
10221 omp_block_size as FastSint,
10222 );
10223 count += thread_state[omp_thread_num].count;
10224 }
10225}
10226
10227#[doc(hidden)]
10229pub fn merge_compacted_lms_suffixes_32s_omp(
10230 t: &mut [SaSint],
10231 sa: &mut [SaSint],
10232 n: SaSint,
10233 m: SaSint,
10234 f: SaSint,
10235 threads: SaSint,
10236 thread_state: &mut [ThreadState],
10237) {
10238 merge_unique_lms_suffixes_32s_omp(t, sa, n, m, threads, thread_state);
10239 merge_nonunique_lms_suffixes_32s_omp(sa, n, m, f, threads, thread_state);
10240}
10241
10242#[doc(hidden)]
10244pub fn reconstruct_compacted_lms_suffixes_32s_2k_omp(
10245 t: &mut [SaSint],
10246 sa: &mut [SaSint],
10247 n: SaSint,
10248 k: SaSint,
10249 m: SaSint,
10250 fs: SaSint,
10251 f: SaSint,
10252 buckets: &mut [SaSint],
10253 local_buckets: SaSint,
10254 threads: SaSint,
10255 thread_state: &mut [ThreadState],
10256) {
10257 if f > 0 {
10258 let dst = usize::try_from(n - m - 1).expect("destination must be non-negative");
10259 let src = usize::try_from(n + fs - m).expect("source must be non-negative");
10260 let len = usize::try_from(f).expect("length must be non-negative");
10261 sa.copy_within(src..src + len, dst);
10262
10263 let _ = count_and_gather_compacted_lms_suffixes_32s_2k_omp(
10264 t,
10265 sa,
10266 n,
10267 k,
10268 buckets,
10269 local_buckets,
10270 threads,
10271 thread_state,
10272 );
10273 reconstruct_lms_suffixes_omp(sa, n, m - f, threads);
10274
10275 let src_copy = 0usize;
10276 let dst_copy = usize::try_from(n - m - 1 + f).expect("destination must be non-negative");
10277 let copy_len = usize::try_from(m - f).expect("copy length must be non-negative");
10278 sa.copy_within(src_copy..src_copy + copy_len, dst_copy);
10279 sa[..usize::try_from(m).expect("m must be non-negative")].fill(0);
10280
10281 merge_compacted_lms_suffixes_32s_omp(t, sa, n, m, f, threads, thread_state);
10282 } else {
10283 let _ = count_and_gather_lms_suffixes_32s_2k(t, sa, n, k, buckets, 0, n as FastSint);
10284 reconstruct_lms_suffixes_omp(sa, n, m, threads);
10285 }
10286}
10287
10288#[doc(hidden)]
10290pub fn reconstruct_compacted_lms_suffixes_32s_1k_omp(
10291 t: &mut [SaSint],
10292 sa: &mut [SaSint],
10293 n: SaSint,
10294 m: SaSint,
10295 fs: SaSint,
10296 f: SaSint,
10297 threads: SaSint,
10298 thread_state: &mut [ThreadState],
10299) {
10300 if f > 0 {
10301 let dst = usize::try_from(n - m - 1).expect("destination must be non-negative");
10302 let src = usize::try_from(n + fs - m).expect("source must be non-negative");
10303 let len = usize::try_from(f).expect("length must be non-negative");
10304 sa.copy_within(src..src + len, dst);
10305
10306 let _ = gather_compacted_lms_suffixes_32s(t, sa, n);
10307 reconstruct_lms_suffixes_omp(sa, n, m - f, threads);
10308
10309 let dst_copy = usize::try_from(n - m - 1 + f).expect("destination must be non-negative");
10310 let copy_len = usize::try_from(m - f).expect("copy length must be non-negative");
10311 sa.copy_within(0..copy_len, dst_copy);
10312 sa[..usize::try_from(m).expect("m must be non-negative")].fill(0);
10313
10314 merge_compacted_lms_suffixes_32s_omp(t, sa, n, m, f, threads, thread_state);
10315 } else {
10316 let _ = gather_lms_suffixes_32s(t, sa, n);
10317 reconstruct_lms_suffixes_omp(sa, n, m, threads);
10318 }
10319}
10320
10321fn normalize_omp_threads(threads: SaSint) -> SaSint {
10322 if threads > 0 {
10323 threads
10324 } else {
10325 std::thread::available_parallelism()
10326 .map(|value| value.get() as SaSint)
10327 .unwrap_or(1)
10328 .max(1)
10329 }
10330}
10331
10332fn libsais64_main_32s_recursion(
10333 t_ptr: *mut SaSint,
10334 sa_ptr: *mut SaSint,
10335 sa_capacity: usize,
10336 n: SaSint,
10337 k: SaSint,
10338 fs: SaSint,
10339 threads: SaSint,
10340 thread_state: &mut [ThreadState],
10341 _local_buffer: &mut [SaSint],
10342) -> SaSint {
10343 let fs = fs.min(SAINT_MAX - n);
10344 let local_buffer_size = SaSint::try_from(LIBSAIS_LOCAL_BUFFER_SIZE).expect("fits");
10345 let n_usize = usize::try_from(n).expect("n must be non-negative");
10346 let fs_usize = usize::try_from(fs).expect("fs must be non-negative");
10347 let total_len = n_usize + fs_usize;
10348 assert!(total_len <= sa_capacity);
10349
10350 if k > 0 && n <= i32::MAX as SaSint {
10351 let int32_max = i32::MAX as SaSint;
10352 let expanded_space = fs as i128 + fs as i128 + n as i128 + n as i128;
10353 let new_fs = if expanded_space <= int32_max as i128 {
10354 fs + fs + n
10355 } else {
10356 int32_max - n
10357 };
10358
10359 if (new_fs / k >= 6)
10360 || (new_fs / k >= 4 && n <= int32_max / 2)
10361 || (new_fs / k < 4 && new_fs >= fs)
10362 {
10363 let mut t32 = unsafe { std::slice::from_raw_parts(t_ptr, n_usize) }
10364 .iter()
10365 .map(|&value| (value as u64 as u32) as i32)
10366 .collect::<Vec<_>>();
10367 let mut sa32 = vec![0i32; n_usize + new_fs as usize];
10368
10369 let index = crate::libsais_int_omp(
10370 &mut t32,
10371 &mut sa32,
10372 k as i32,
10373 new_fs as i32,
10374 threads as i32,
10375 );
10376 if index >= 0 {
10377 unsafe {
10378 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10379 for (dst, src) in t.iter_mut().zip(t32.iter()) {
10380 *dst = (*src as u32) as SaSint;
10381 }
10382
10383 let sa = std::slice::from_raw_parts_mut(sa_ptr, n_usize);
10384 for (dst, src) in sa.iter_mut().zip(sa32.iter()) {
10385 *dst = (*src as u32) as SaSint;
10386 }
10387 }
10388 }
10389
10390 return index as SaSint;
10391 }
10392 }
10393
10394 if k > 0 && ((fs / k) >= 6 || (local_buffer_size / k) >= 6) {
10395 let k_usize = usize::try_from(k).expect("k must be non-negative");
10396 let alignment = if fs >= 1024 && ((fs - 1024) / k) >= 6 {
10397 1024usize
10398 } else {
10399 16usize
10400 };
10401 let need = 6 * k_usize;
10402 let use_local_buffer = local_buffer_size > fs;
10403 let buckets_ptr = if use_local_buffer {
10404 _local_buffer.as_mut_ptr()
10405 } else {
10406 unsafe {
10407 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10408 let start =
10409 if fs_usize >= need + alignment && ((fs_usize - alignment) / k_usize) >= 6 {
10410 let byte_ptr = sa[total_len - need - alignment..].as_mut_ptr() as usize;
10411 let aligned = align_up(byte_ptr, alignment * mem::size_of::<SaSint>());
10412 (aligned - sa_ptr as usize) / mem::size_of::<SaSint>()
10413 } else {
10414 total_len - need
10415 };
10416 sa[start..].as_mut_ptr()
10417 }
10418 };
10419
10420 let m = unsafe {
10421 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10422 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10423 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10424 count_and_gather_lms_suffixes_32s_4k_omp(
10425 t,
10426 sa,
10427 n,
10428 k,
10429 buckets,
10430 SaSint::from(use_local_buffer),
10431 threads,
10432 thread_state,
10433 )
10434 };
10435 if m > 1 {
10436 let m_usize = usize::try_from(m).expect("m must be non-negative");
10437 unsafe {
10438 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10439 sa[..n_usize - m_usize].fill(0);
10440 }
10441
10442 let first_lms_suffix = unsafe {
10443 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10444 sa[n_usize - m_usize]
10445 };
10446 let left_suffixes_count = unsafe {
10447 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10448 initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(
10449 std::slice::from_raw_parts_mut(t_ptr, n_usize),
10450 k,
10451 buckets,
10452 first_lms_suffix,
10453 )
10454 };
10455
10456 unsafe {
10457 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10458 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10459 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10460 let (_, induction_bucket) = buckets.split_at_mut(4 * k_usize);
10461 radix_sort_lms_suffixes_32s_6k_omp(
10462 t,
10463 sa,
10464 n,
10465 m,
10466 induction_bucket,
10467 threads,
10468 thread_state,
10469 );
10470 if (n / 8192) < k {
10471 radix_sort_set_markers_32s_6k_omp(sa, k, induction_bucket, threads);
10472 }
10473 if threads > 1 && n >= 65_536 {
10474 sa[n_usize - m_usize..n_usize].fill(0);
10475 }
10476 initialize_buckets_for_partial_sorting_32s_6k(
10477 t,
10478 k,
10479 buckets,
10480 first_lms_suffix,
10481 left_suffixes_count,
10482 );
10483 induce_partial_order_32s_6k_omp(
10484 t,
10485 sa,
10486 n,
10487 k,
10488 buckets,
10489 first_lms_suffix,
10490 left_suffixes_count,
10491 threads,
10492 thread_state,
10493 );
10494 }
10495
10496 let names = unsafe {
10497 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10498 if (n / 8192) < k {
10499 renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(
10500 sa,
10501 n,
10502 m,
10503 threads,
10504 thread_state,
10505 )
10506 } else {
10507 renumber_and_gather_lms_suffixes_omp(sa, n, m, fs, threads, thread_state)
10508 }
10509 };
10510
10511 if names < m {
10512 let f = if (n / 8192) < k {
10513 unsafe {
10514 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10515 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10516 compact_lms_suffixes_32s_omp(t, sa, n, m, fs, threads, thread_state)
10517 }
10518 } else {
10519 0
10520 };
10521
10522 let new_t_start =
10523 total_len - usize::try_from(m - f).expect("m - f must be non-negative");
10524 if libsais64_main_32s_recursion(
10525 unsafe {
10526 std::slice::from_raw_parts_mut(sa_ptr, total_len)[new_t_start..]
10527 .as_mut_ptr()
10528 },
10529 sa_ptr,
10530 sa_capacity,
10531 m - f,
10532 names - f,
10533 fs + n - 2 * m + f,
10534 threads,
10535 thread_state,
10536 _local_buffer,
10537 ) != 0
10538 {
10539 return -2;
10540 }
10541
10542 unsafe {
10543 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10544 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10545 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10546 reconstruct_compacted_lms_suffixes_32s_2k_omp(
10547 t,
10548 sa,
10549 n,
10550 k,
10551 m,
10552 fs,
10553 f,
10554 buckets,
10555 SaSint::from(use_local_buffer),
10556 threads,
10557 thread_state,
10558 );
10559 }
10560 } else {
10561 unsafe {
10562 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10563 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10564 count_lms_suffixes_32s_2k(t, n, k, buckets);
10565 }
10566 }
10567
10568 unsafe {
10569 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10570 initialize_buckets_start_and_end_32s_4k(k, buckets);
10571 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10572 place_lms_suffixes_histogram_32s_4k(sa, n, k, m, buckets);
10573 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10574 induce_final_order_32s_4k(t, sa, n, k, buckets, threads, thread_state);
10575 }
10576 } else {
10577 unsafe {
10578 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10579 sa[0] = sa[n_usize - 1];
10580 }
10581
10582 unsafe {
10583 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10584 initialize_buckets_start_and_end_32s_6k(k, buckets);
10585 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10586 place_lms_suffixes_histogram_32s_6k(sa, n, k, m, buckets);
10587 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10588 induce_final_order_32s_6k(t, sa, n, k, buckets, threads, thread_state);
10589 }
10590 }
10591
10592 return 0;
10593 } else if k > 0 && n <= SAINT_MAX / 2 && ((fs / k) >= 4 || (local_buffer_size / k) >= 4) {
10594 let k_usize = usize::try_from(k).expect("k must be non-negative");
10595 let alignment = if fs >= 1024 && ((fs - 1024) / k) >= 4 {
10596 1024usize
10597 } else {
10598 16usize
10599 };
10600 let need = 4 * k_usize;
10601 let use_local_buffer = local_buffer_size > fs;
10602 let buckets_ptr = if use_local_buffer {
10603 _local_buffer.as_mut_ptr()
10604 } else {
10605 unsafe {
10606 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10607 let start =
10608 if fs_usize >= need + alignment && ((fs_usize - alignment) / k_usize) >= 4 {
10609 let byte_ptr = sa[total_len - need - alignment..].as_mut_ptr() as usize;
10610 let aligned = align_up(byte_ptr, alignment * mem::size_of::<SaSint>());
10611 (aligned - sa_ptr as usize) / mem::size_of::<SaSint>()
10612 } else {
10613 total_len - need
10614 };
10615 sa[start..].as_mut_ptr()
10616 }
10617 };
10618
10619 let m = unsafe {
10620 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10621 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10622 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10623 count_and_gather_lms_suffixes_32s_2k_omp(
10624 t,
10625 sa,
10626 n,
10627 k,
10628 buckets,
10629 SaSint::from(use_local_buffer),
10630 threads,
10631 thread_state,
10632 )
10633 };
10634 if m > 1 {
10635 let m_usize = usize::try_from(m).expect("m must be non-negative");
10636 unsafe {
10637 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10638 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10639 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10640 initialize_buckets_for_radix_and_partial_sorting_32s_4k(
10641 t,
10642 k,
10643 buckets,
10644 sa[n_usize - m_usize],
10645 );
10646 let (_, induction_bucket) = buckets.split_at_mut(1);
10647 radix_sort_lms_suffixes_32s_2k_omp(
10648 t,
10649 sa,
10650 n,
10651 m,
10652 induction_bucket,
10653 threads,
10654 thread_state,
10655 );
10656 radix_sort_set_markers_32s_4k_omp(sa, k, induction_bucket, threads);
10657 place_lms_suffixes_interval_32s_4k(sa, n, k, m - 1, buckets);
10658 induce_partial_order_32s_4k_omp(t, sa, n, k, buckets, threads, thread_state);
10659 }
10660
10661 let names = unsafe {
10662 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10663 renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(sa, n, m, threads, thread_state)
10664 };
10665 if names < m {
10666 let f = unsafe {
10667 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10668 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10669 compact_lms_suffixes_32s_omp(t, sa, n, m, fs, threads, thread_state)
10670 };
10671
10672 let new_t_start =
10673 total_len - usize::try_from(m - f).expect("m - f must be non-negative");
10674 if libsais64_main_32s_recursion(
10675 unsafe {
10676 std::slice::from_raw_parts_mut(sa_ptr, total_len)[new_t_start..]
10677 .as_mut_ptr()
10678 },
10679 sa_ptr,
10680 sa_capacity,
10681 m - f,
10682 names - f,
10683 fs + n - 2 * m + f,
10684 threads,
10685 thread_state,
10686 _local_buffer,
10687 ) != 0
10688 {
10689 return -2;
10690 }
10691
10692 unsafe {
10693 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10694 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10695 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10696 reconstruct_compacted_lms_suffixes_32s_2k_omp(
10697 t,
10698 sa,
10699 n,
10700 k,
10701 m,
10702 fs,
10703 f,
10704 buckets,
10705 SaSint::from(use_local_buffer),
10706 threads,
10707 thread_state,
10708 );
10709 }
10710 } else {
10711 unsafe {
10712 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10713 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10714 count_lms_suffixes_32s_2k(t, n, k, buckets);
10715 }
10716 }
10717 } else {
10718 unsafe {
10719 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10720 sa[0] = sa[n_usize - 1];
10721 }
10722 }
10723
10724 unsafe {
10725 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10726 initialize_buckets_start_and_end_32s_4k(k, buckets);
10727 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10728 place_lms_suffixes_histogram_32s_4k(sa, n, k, m, buckets);
10729 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10730 induce_final_order_32s_4k(t, sa, n, k, buckets, threads, thread_state);
10731 }
10732
10733 return 0;
10734 } else if k > 0 && ((fs / k) >= 2 || (local_buffer_size / k) >= 2) {
10735 let k_usize = usize::try_from(k).expect("k must be non-negative");
10736 let alignment = if fs >= 1024 && ((fs - 1024) / k) >= 2 {
10737 1024usize
10738 } else {
10739 16usize
10740 };
10741 let need = 2 * k_usize;
10742 let use_local_buffer = local_buffer_size > fs;
10743 let buckets_ptr = if use_local_buffer {
10744 _local_buffer.as_mut_ptr()
10745 } else {
10746 unsafe {
10747 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10748 let start =
10749 if fs_usize >= need + alignment && ((fs_usize - alignment) / k_usize) >= 2 {
10750 let byte_ptr = sa[total_len - need - alignment..].as_mut_ptr() as usize;
10751 let aligned = align_up(byte_ptr, alignment * mem::size_of::<SaSint>());
10752 (aligned - sa_ptr as usize) / mem::size_of::<SaSint>()
10753 } else {
10754 total_len - need
10755 };
10756 sa[start..].as_mut_ptr()
10757 }
10758 };
10759
10760 let m = unsafe {
10761 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10762 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10763 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10764 count_and_gather_lms_suffixes_32s_2k_omp(
10765 t,
10766 sa,
10767 n,
10768 k,
10769 buckets,
10770 SaSint::from(use_local_buffer),
10771 threads,
10772 thread_state,
10773 )
10774 };
10775 if m > 1 {
10776 let m_usize = usize::try_from(m).expect("m must be non-negative");
10777 unsafe {
10778 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10779 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10780 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10781 initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(
10782 t,
10783 k,
10784 buckets,
10785 sa[n_usize - m_usize],
10786 );
10787 let (_, induction_bucket) = buckets.split_at_mut(1);
10788 radix_sort_lms_suffixes_32s_2k_omp(
10789 t,
10790 sa,
10791 n,
10792 m,
10793 induction_bucket,
10794 threads,
10795 thread_state,
10796 );
10797 place_lms_suffixes_interval_32s_2k(sa, n, k, m - 1, buckets);
10798 }
10799
10800 unsafe {
10801 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10802 initialize_buckets_start_and_end_32s_2k(k, buckets);
10803 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10804 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10805 induce_partial_order_32s_2k_omp(t, sa, n, k, buckets, threads, thread_state);
10806 }
10807
10808 let names = unsafe {
10809 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10810 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10811 renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(t, sa, n, m, threads)
10812 };
10813 if names < m {
10814 let f = unsafe {
10815 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10816 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10817 compact_lms_suffixes_32s_omp(t, sa, n, m, fs, threads, thread_state)
10818 };
10819
10820 let new_t_start =
10821 total_len - usize::try_from(m - f).expect("m - f must be non-negative");
10822 if libsais64_main_32s_recursion(
10823 unsafe {
10824 std::slice::from_raw_parts_mut(sa_ptr, total_len)[new_t_start..]
10825 .as_mut_ptr()
10826 },
10827 sa_ptr,
10828 sa_capacity,
10829 m - f,
10830 names - f,
10831 fs + n - 2 * m + f,
10832 threads,
10833 thread_state,
10834 _local_buffer,
10835 ) != 0
10836 {
10837 return -2;
10838 }
10839
10840 unsafe {
10841 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10842 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10843 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10844 reconstruct_compacted_lms_suffixes_32s_2k_omp(
10845 t,
10846 sa,
10847 n,
10848 k,
10849 m,
10850 fs,
10851 f,
10852 buckets,
10853 SaSint::from(use_local_buffer),
10854 threads,
10855 thread_state,
10856 );
10857 }
10858 } else {
10859 unsafe {
10860 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10861 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10862 count_lms_suffixes_32s_2k(t, n, k, buckets);
10863 }
10864 }
10865 } else {
10866 unsafe {
10867 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10868 sa[0] = sa[n_usize - 1];
10869 }
10870 }
10871
10872 unsafe {
10873 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10874 initialize_buckets_end_32s_2k(k, buckets);
10875 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10876 place_lms_suffixes_histogram_32s_2k(sa, n, k, m, buckets);
10877 }
10878
10879 unsafe {
10880 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10881 initialize_buckets_start_and_end_32s_2k(k, buckets);
10882 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10883 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10884 induce_final_order_32s_2k(t, sa, n, k, buckets, threads, thread_state);
10885 }
10886
10887 return 0;
10888 } else {
10889 let k_usize = usize::try_from(k).expect("k must be non-negative");
10890 let mut heap_buckets = if fs < k { Some(vec![0; k_usize]) } else { None };
10891 let alignment = if fs >= 1024 && (fs - 1024) >= k {
10892 1024usize
10893 } else {
10894 16usize
10895 };
10896 let mut buckets_ptr = if let Some(ref mut heap) = heap_buckets {
10897 heap.as_mut_ptr()
10898 } else {
10899 unsafe {
10900 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10901 let start = if fs_usize >= k_usize + alignment {
10902 let byte_ptr = sa[total_len - k_usize - alignment..].as_mut_ptr() as usize;
10903 let aligned = align_up(byte_ptr, alignment * mem::size_of::<SaSint>());
10904 (aligned - sa_ptr as usize) / mem::size_of::<SaSint>()
10905 } else {
10906 total_len - k_usize
10907 };
10908 sa[start..].as_mut_ptr()
10909 }
10910 };
10911
10912 if buckets_ptr.is_null() {
10913 return -2;
10914 }
10915
10916 unsafe {
10917 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10918 sa[..n_usize].fill(0);
10919 }
10920
10921 unsafe {
10922 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10923 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
10924 count_suffixes_32s(t, n, k, buckets);
10925 }
10926 unsafe {
10927 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
10928 initialize_buckets_end_32s_1k(k, buckets);
10929 }
10930
10931 let m = unsafe {
10932 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10933 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10934 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
10935 radix_sort_lms_suffixes_32s_1k(t, sa, n, buckets)
10936 };
10937 if m > 1 {
10938 unsafe {
10939 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10940 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10941 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
10942 induce_partial_order_32s_1k_omp(t, sa, n, k, buckets, threads, thread_state);
10943 }
10944
10945 let names = unsafe {
10946 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10947 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10948 renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(t, sa, n, m, threads)
10949 };
10950 if names < m {
10951 if heap_buckets.is_some() {
10952 let _ = heap_buckets.take();
10953 buckets_ptr = std::ptr::null_mut();
10954 }
10955
10956 let f = unsafe {
10957 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10958 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10959 compact_lms_suffixes_32s_omp(t, sa, n, m, fs, threads, thread_state)
10960 };
10961
10962 let new_t_start =
10963 total_len - usize::try_from(m - f).expect("m - f must be non-negative");
10964 if libsais64_main_32s_recursion(
10965 unsafe {
10966 std::slice::from_raw_parts_mut(sa_ptr, total_len)[new_t_start..]
10967 .as_mut_ptr()
10968 },
10969 sa_ptr,
10970 sa_capacity,
10971 m - f,
10972 names - f,
10973 fs + n - 2 * m + f,
10974 threads,
10975 thread_state,
10976 _local_buffer,
10977 ) != 0
10978 {
10979 return -2;
10980 }
10981
10982 unsafe {
10983 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10984 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10985 reconstruct_compacted_lms_suffixes_32s_1k_omp(
10986 t,
10987 sa,
10988 n,
10989 m,
10990 fs,
10991 f,
10992 threads,
10993 thread_state,
10994 );
10995 }
10996
10997 if buckets_ptr.is_null() {
10998 heap_buckets = Some(vec![0; k_usize]);
10999 buckets_ptr = heap_buckets
11000 .as_mut()
11001 .expect("heap buckets must exist")
11002 .as_mut_ptr();
11003 if buckets_ptr.is_null() {
11004 return -2;
11005 }
11006 }
11007 }
11008
11009 unsafe {
11010 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
11011 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
11012 count_suffixes_32s(t, n, k, buckets);
11013 }
11014 unsafe {
11015 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
11016 initialize_buckets_end_32s_1k(k, buckets);
11017 }
11018 unsafe {
11019 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
11020 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
11021 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
11022 place_lms_suffixes_interval_32s_1k(t, sa, k, m, buckets);
11023 }
11024 }
11025
11026 unsafe {
11027 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
11028 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
11029 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
11030 induce_final_order_32s_1k(t, sa, n, k, buckets, threads, thread_state);
11031 }
11032
11033 0
11034 }
11035}
11036
11037fn libsais64_main_32s_entry(
11038 t: &mut [SaSint],
11039 sa: &mut [SaSint],
11040 n: SaSint,
11041 k: SaSint,
11042 fs: SaSint,
11043 threads: SaSint,
11044 thread_state: &mut [ThreadState],
11045) -> SaSint {
11046 let mut local_buffer = [0; 2 * LIBSAIS_LOCAL_BUFFER_SIZE];
11047 libsais64_main_32s_recursion(
11048 t.as_mut_ptr(),
11049 sa.as_mut_ptr(),
11050 sa.len(),
11051 n,
11052 k,
11053 fs,
11054 threads,
11055 thread_state,
11056 &mut local_buffer[LIBSAIS_LOCAL_BUFFER_SIZE..],
11057 )
11058}
11059
11060fn libsais64_main_8u(
11061 t: &[u8],
11062 sa: &mut [SaSint],
11063 buckets: &mut [SaSint],
11064 flags: SaSint,
11065 r: SaSint,
11066 i: Option<&mut [SaSint]>,
11067 fs: SaSint,
11068 freq: Option<&mut [SaSint]>,
11069 threads: SaSint,
11070 thread_state: &mut [ThreadState],
11071) -> SaSint {
11072 let n = SaSint::try_from(t.len()).expect("input length must fit SaSint");
11073 let n_usize = usize::try_from(n).expect("n must be non-negative");
11074 let fs = fs.min(SAINT_MAX - n);
11075
11076 let m = count_and_gather_lms_suffixes_8u_omp(t, sa, n, buckets, threads, thread_state);
11077 let k = initialize_buckets_start_and_end_8u(buckets, freq);
11078
11079 if (flags & LIBSAIS_FLAGS_GSA) != 0 && (buckets[0] != 0 || buckets[2] != 0 || buckets[3] != 1) {
11080 return -1;
11081 }
11082
11083 if m > 0 {
11084 let m_usize = usize::try_from(m).expect("m must be non-negative");
11085 let first_lms_suffix = sa[n_usize - m_usize];
11086 let left_suffixes_count =
11087 initialize_buckets_for_lms_suffixes_radix_sort_8u(t, buckets, first_lms_suffix);
11088
11089 if threads > 1 && n >= 65_536 {
11090 sa[..n_usize - m_usize].fill(0);
11091 }
11092 radix_sort_lms_suffixes_8u_omp(t, sa, n, m, flags, buckets, threads, thread_state);
11093 if threads > 1 && n >= 65_536 {
11094 sa[n_usize - m_usize..n_usize].fill(0);
11095 }
11096
11097 initialize_buckets_for_partial_sorting_8u(
11098 t,
11099 buckets,
11100 first_lms_suffix,
11101 left_suffixes_count,
11102 );
11103 induce_partial_order_8u_omp(
11104 t,
11105 sa,
11106 n,
11107 k,
11108 flags,
11109 buckets,
11110 first_lms_suffix,
11111 left_suffixes_count,
11112 threads,
11113 thread_state,
11114 );
11115
11116 let names = renumber_and_gather_lms_suffixes_omp(sa, n, m, fs, threads, thread_state);
11117 if names < m {
11118 if libsais64_main_32s_entry(
11119 unsafe {
11120 std::slice::from_raw_parts_mut(
11121 sa[n_usize + usize::try_from(fs).expect("fs must be non-negative")
11122 - m_usize..]
11123 .as_mut_ptr(),
11124 m_usize,
11125 )
11126 },
11127 sa,
11128 m,
11129 names,
11130 fs + n - 2 * m,
11131 threads,
11132 thread_state,
11133 ) != 0
11134 {
11135 return -2;
11136 }
11137
11138 gather_lms_suffixes_8u_omp(t, sa, n, threads, thread_state);
11139 reconstruct_lms_suffixes_omp(sa, n, m, threads);
11140 }
11141
11142 place_lms_suffixes_interval_8u(sa, n, m, flags, buckets);
11143 } else {
11144 sa[..n_usize].fill(0);
11145 }
11146
11147 induce_final_order_8u_omp(t, sa, n, k, flags, r, i, buckets, threads, thread_state)
11148}
11149
11150fn libsais64_main(
11151 t: &[u8],
11152 sa: &mut [SaSint],
11153 flags: SaSint,
11154 r: SaSint,
11155 i: Option<&mut [SaSint]>,
11156 fs: SaSint,
11157 freq: Option<&mut [SaSint]>,
11158 threads: SaSint,
11159) -> SaSint {
11160 let threads = normalize_omp_threads(threads);
11161 if threads > 1 {
11162 let mut thread_state = match alloc_thread_state(threads) {
11163 Some(thread_state) => thread_state,
11164 None => return -2,
11165 };
11166 let mut buckets = vec![0; 8 * ALPHABET_SIZE];
11167
11168 libsais64_main_8u(
11169 t,
11170 sa,
11171 &mut buckets,
11172 flags,
11173 r,
11174 i,
11175 fs,
11176 freq,
11177 threads,
11178 &mut thread_state,
11179 )
11180 } else {
11181 let mut thread_state = [];
11182 let mut buckets = [0; 8 * ALPHABET_SIZE];
11183
11184 libsais64_main_8u(
11185 t,
11186 sa,
11187 &mut buckets,
11188 flags,
11189 r,
11190 i,
11191 fs,
11192 freq,
11193 threads,
11194 &mut thread_state,
11195 )
11196 }
11197}
11198
11199fn libsais64_main_int(
11200 t: &mut [SaSint],
11201 sa: &mut [SaSint],
11202 k: SaSint,
11203 fs: SaSint,
11204 threads: SaSint,
11205) -> SaSint {
11206 let threads = normalize_omp_threads(threads);
11207 let mut thread_state = if threads > 1 {
11208 match alloc_thread_state(threads) {
11209 Some(thread_state) => thread_state,
11210 None => return -2,
11211 }
11212 } else {
11213 Vec::new()
11214 };
11215
11216 libsais64_main_32s_entry(
11217 t,
11218 sa,
11219 SaSint::try_from(t.len()).expect("input length must fit SaSint"),
11220 k,
11221 fs,
11222 threads,
11223 &mut thread_state,
11224 )
11225}
11226
11227#[allow(dead_code)]
11228fn main_32s_recursion(
11229 t_ptr: *mut SaSint,
11230 sa_ptr: *mut SaSint,
11231 sa_capacity: usize,
11232 n: SaSint,
11233 k: SaSint,
11234 fs: SaSint,
11235 threads: SaSint,
11236 thread_state: &mut [ThreadState],
11237 local_buffer: &mut [SaSint],
11238) -> SaSint {
11239 libsais64_main_32s_recursion(
11240 t_ptr,
11241 sa_ptr,
11242 sa_capacity,
11243 n,
11244 k,
11245 fs,
11246 threads,
11247 thread_state,
11248 local_buffer,
11249 )
11250}
11251
11252#[allow(dead_code)]
11253fn main_32s_entry(
11254 t: &mut [SaSint],
11255 sa: &mut [SaSint],
11256 n: SaSint,
11257 k: SaSint,
11258 fs: SaSint,
11259 threads: SaSint,
11260 thread_state: &mut [ThreadState],
11261) -> SaSint {
11262 libsais64_main_32s_entry(t, sa, n, k, fs, threads, thread_state)
11263}
11264
11265#[allow(dead_code)]
11266fn main_8u(
11267 t: &[u8],
11268 sa: &mut [SaSint],
11269 buckets: &mut [SaSint],
11270 flags: SaSint,
11271 r: SaSint,
11272 i: Option<&mut [SaSint]>,
11273 fs: SaSint,
11274 freq: Option<&mut [SaSint]>,
11275 threads: SaSint,
11276 thread_state: &mut [ThreadState],
11277) -> SaSint {
11278 libsais64_main_8u(t, sa, buckets, flags, r, i, fs, freq, threads, thread_state)
11279}
11280
11281#[allow(dead_code)]
11282fn main_long(
11283 t: &mut [SaSint],
11284 sa: &mut [SaSint],
11285 k: SaSint,
11286 fs: SaSint,
11287 threads: SaSint,
11288) -> SaSint {
11289 libsais64_main_int(t, sa, k, fs, threads)
11290}
11291
11292#[allow(dead_code)]
11293fn convert_32u_to_64u(s: &[u32], d: &mut [u64], block_start: usize, block_size: usize) {
11294 for i in block_start..block_start + block_size {
11295 d[i] = s[i] as u64;
11296 }
11297}
11298
11299#[allow(dead_code)]
11300fn convert_inplace_32u_to_64u(v: &mut [u32], block_start: usize, block_size: usize) {
11301 for i in (block_start..block_start + block_size).rev() {
11302 let value = v[i];
11303 v[2 * i] = value;
11304 v[2 * i + 1] = 0;
11305 }
11306}
11307
11308#[allow(dead_code)]
11309fn convert_inplace_64u_to_32u(v: &mut [u32], block_start: usize, block_size: usize) {
11310 for i in block_start..block_start + block_size {
11311 v[i] = v[2 * i];
11312 }
11313}
11314
11315#[allow(dead_code)]
11316fn convert_inplace_32u_to_64u_omp(v: &mut [u32], n: SaSint, threads: SaSint) {
11317 let mut n = usize::try_from(n).expect("n must be non-negative");
11318 let threads = usize::try_from(threads.max(1)).expect("threads must be non-negative");
11319
11320 while n >= 65_536 {
11321 let block_size = n >> 1;
11322 n -= block_size;
11323
11324 let omp_block_stride = (block_size / threads) & !15usize;
11325 for thread in 0..threads {
11326 let block_start = thread * omp_block_stride;
11327 let size = if thread + 1 < threads {
11328 omp_block_stride
11329 } else {
11330 block_size - block_start
11331 };
11332 convert_inplace_32u_to_64u(v, n + block_start, size);
11333 }
11334 }
11335
11336 convert_inplace_32u_to_64u(v, 0, n);
11337}
11338
11339fn libsais64_main_ctx(
11340 ctx: &mut Context,
11341 t: &[u8],
11342 sa: &mut [SaSint],
11343 flags: SaSint,
11344 r: SaSint,
11345 i: Option<&mut [SaSint]>,
11346 fs: SaSint,
11347 freq: Option<&mut [SaSint]>,
11348) -> SaSint {
11349 if ctx.threads <= 0 || ctx.buckets.len() != 8 * ALPHABET_SIZE {
11350 return -2;
11351 }
11352
11353 let mut empty_thread_state = [];
11354 let thread_state = if ctx.threads > 1 {
11355 match ctx.thread_state.as_deref_mut() {
11356 Some(thread_state) if thread_state.len() >= ctx.threads as usize => thread_state,
11357 None => return -2,
11358 Some(_) => return -2,
11359 }
11360 } else {
11361 &mut empty_thread_state
11362 };
11363
11364 libsais64_main_8u(
11365 t,
11366 sa,
11367 &mut ctx.buckets,
11368 flags,
11369 r,
11370 i,
11371 fs,
11372 freq,
11373 ctx.threads as SaSint,
11374 thread_state,
11375 )
11376}
11377
11378pub fn libsais64(t: &[u8], sa: &mut [SaSint], fs: SaSint, freq: Option<&mut [SaSint]>) -> SaSint {
11387 if fs < 0
11388 || sa.len()
11389 < t.len()
11390 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
11391 {
11392 return -1;
11393 }
11394 if let Some(freq) = freq.as_ref() {
11395 if freq.len() < ALPHABET_SIZE {
11396 return -1;
11397 }
11398 }
11399
11400 let n = t.len();
11401 if n <= 1 {
11402 if let Some(freq) = freq {
11403 freq[..ALPHABET_SIZE].fill(0);
11404 if n == 1 {
11405 freq[t[0] as usize] += 1;
11406 }
11407 }
11408 if n == 1 {
11409 sa[0] = 0;
11410 }
11411 return 0;
11412 }
11413
11414 if n <= i32::MAX as usize {
11415 return libsais64_run_32bit_omp(t, sa, fs, freq, 1, false)
11416 .expect("n <= INT32_MAX must have 32-bit workspace");
11417 }
11418
11419 libsais64_main(t, sa, LIBSAIS_FLAGS_NONE, 0, None, fs, freq, 1)
11420}
11421
11422#[cfg(feature = "upstream-c")]
11423unsafe extern "C" {
11424 fn probe_public_libsais64_omp_freq(
11425 t: *const u8,
11426 sa: *mut SaSint,
11427 n: SaSint,
11428 fs: SaSint,
11429 freq: *mut SaSint,
11430 threads: SaSint,
11431 ) -> SaSint;
11432}
11433
11434#[cfg(feature = "upstream-c")]
11446pub fn libsais64_upstream_c_omp(
11447 t: &[u8],
11448 sa: &mut [SaSint],
11449 fs: SaSint,
11450 freq: Option<&mut [SaSint]>,
11451 threads: SaSint,
11452) -> SaSint {
11453 if threads < 0 {
11454 return -1;
11455 }
11456 if fs < 0
11457 || t.len() > SaSint::MAX as usize
11458 || sa.len()
11459 < t.len()
11460 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
11461 {
11462 return -1;
11463 }
11464 if let Some(freq) = freq.as_ref() {
11465 if freq.len() < ALPHABET_SIZE {
11466 return -1;
11467 }
11468 }
11469
11470 let n = t.len() as SaSint;
11471 let freq_ptr = freq.map_or(std::ptr::null_mut(), |freq| freq.as_mut_ptr());
11472 unsafe {
11473 probe_public_libsais64_omp_freq(
11474 t.as_ptr(),
11475 sa.as_mut_ptr(),
11476 n,
11477 fs,
11478 freq_ptr,
11479 threads.max(1),
11480 )
11481 }
11482}
11483
11484#[cfg(feature = "upstream-c")]
11488pub fn libsais64_upstream_c_omp_uninit(
11489 t: &[u8],
11490 sa: &mut [MaybeUninit<SaSint>],
11491 fs: SaSint,
11492 freq: Option<&mut [SaSint]>,
11493 threads: SaSint,
11494) -> SaSint {
11495 if threads < 0 {
11496 return -1;
11497 }
11498 if fs < 0
11499 || t.len() > SaSint::MAX as usize
11500 || sa.len()
11501 < t.len()
11502 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
11503 {
11504 return -1;
11505 }
11506 if let Some(freq) = freq.as_ref() {
11507 if freq.len() < ALPHABET_SIZE {
11508 return -1;
11509 }
11510 }
11511
11512 let n = t.len() as SaSint;
11513 let freq_ptr = freq.map_or(std::ptr::null_mut(), |freq| freq.as_mut_ptr());
11514 unsafe {
11515 probe_public_libsais64_omp_freq(
11516 t.as_ptr(),
11517 sa.as_mut_ptr().cast::<SaSint>(),
11518 n,
11519 fs,
11520 freq_ptr,
11521 threads.max(1),
11522 )
11523 }
11524}
11525
11526pub fn libsais64_gsa(
11535 t: &[u8],
11536 sa: &mut [SaSint],
11537 fs: SaSint,
11538 freq: Option<&mut [SaSint]>,
11539) -> SaSint {
11540 if fs < 0
11541 || sa.len()
11542 < t.len()
11543 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
11544 {
11545 return -1;
11546 }
11547 if let Some(freq) = freq.as_ref() {
11548 if freq.len() < ALPHABET_SIZE {
11549 return -1;
11550 }
11551 }
11552
11553 let n = t.len();
11554 if n > 0 && t[n - 1] != 0 {
11555 return -1;
11556 }
11557
11558 if n <= 1 {
11559 if let Some(freq) = freq {
11560 freq[..ALPHABET_SIZE].fill(0);
11561 if n == 1 {
11562 freq[t[0] as usize] += 1;
11563 }
11564 }
11565 if n == 1 {
11566 sa[0] = 0;
11567 }
11568 return 0;
11569 }
11570
11571 if n <= i32::MAX as usize {
11572 return libsais64_run_32bit_omp(t, sa, fs, freq, 1, true)
11573 .expect("n <= INT32_MAX must have 32-bit workspace");
11574 }
11575
11576 libsais64_main(t, sa, LIBSAIS_FLAGS_GSA, 0, None, fs, freq, 1)
11577}
11578
11579pub fn libsais64_int(t: &mut [SaSint], sa: &mut [SaSint], k: SaSint, fs: SaSint) -> SaSint {
11581 if fs < 0
11582 || sa.len()
11583 < t.len()
11584 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
11585 {
11586 return -1;
11587 }
11588
11589 if t.len() <= 1 {
11590 if t.len() == 1 {
11591 sa[0] = 0;
11592 }
11593 return 0;
11594 }
11595
11596 libsais64_main_int(t, sa, k, fs, 1)
11597}
11598
11599pub fn libsais64_long(t: &mut [SaSint], sa: &mut [SaSint], k: SaSint, fs: SaSint) -> SaSint {
11610 libsais64_int(t, sa, k, fs)
11611}
11612
11613pub fn libsais64_ctx(
11623 ctx: &mut Context,
11624 t: &[u8],
11625 sa: &mut [SaSint],
11626 fs: SaSint,
11627 freq: Option<&mut [SaSint]>,
11628) -> SaSint {
11629 if fs < 0
11630 || sa.len()
11631 < t.len()
11632 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
11633 {
11634 return -1;
11635 }
11636 if let Some(freq) = freq.as_ref() {
11637 if freq.len() < ALPHABET_SIZE {
11638 return -1;
11639 }
11640 }
11641
11642 let n = t.len();
11643 if n <= 1 {
11644 if let Some(freq) = freq {
11645 freq[..ALPHABET_SIZE].fill(0);
11646 if n == 1 {
11647 freq[t[0] as usize] += 1;
11648 }
11649 }
11650 if n == 1 {
11651 sa[0] = 0;
11652 }
11653 return 0;
11654 }
11655
11656 libsais64_main_ctx(ctx, t, sa, LIBSAIS_FLAGS_NONE, 0, None, fs, freq)
11657}
11658
11659pub fn libsais64_gsa_ctx(
11669 ctx: &mut Context,
11670 t: &[u8],
11671 sa: &mut [SaSint],
11672 fs: SaSint,
11673 freq: Option<&mut [SaSint]>,
11674) -> SaSint {
11675 if fs < 0
11676 || sa.len()
11677 < t.len()
11678 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
11679 {
11680 return -1;
11681 }
11682 if let Some(freq) = freq.as_ref() {
11683 if freq.len() < ALPHABET_SIZE {
11684 return -1;
11685 }
11686 }
11687
11688 let n = t.len();
11689 if n > 0 && t[n - 1] != 0 {
11690 return -1;
11691 }
11692
11693 if n <= 1 {
11694 if let Some(freq) = freq {
11695 freq[..ALPHABET_SIZE].fill(0);
11696 if n == 1 {
11697 freq[t[0] as usize] += 1;
11698 }
11699 }
11700 if n == 1 {
11701 sa[0] = 0;
11702 }
11703 return 0;
11704 }
11705
11706 libsais64_main_ctx(ctx, t, sa, LIBSAIS_FLAGS_GSA, 0, None, fs, freq)
11707}
11708
11709pub fn libsais64_bwt(
11719 t: &[u8],
11720 u: &mut [u8],
11721 a: &mut [SaSint],
11722 fs: SaSint,
11723 freq: Option<&mut [SaSint]>,
11724) -> SaSint {
11725 if fs < 0
11726 || u.len() < t.len()
11727 || a.len()
11728 < t.len()
11729 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
11730 {
11731 return -1;
11732 }
11733 if let Some(freq) = freq.as_ref() {
11734 if freq.len() < ALPHABET_SIZE {
11735 return -1;
11736 }
11737 }
11738
11739 let n = t.len();
11740 if n <= 1 {
11741 if let Some(freq) = freq {
11742 freq[..ALPHABET_SIZE].fill(0);
11743 if n == 1 {
11744 u[0] = t[0];
11745 freq[t[0] as usize] += 1;
11746 }
11747 } else if n == 1 {
11748 u[0] = t[0];
11749 }
11750 return n as SaSint;
11751 }
11752
11753 if n <= i32::MAX as usize {
11754 return libsais64_bwt_run_32bit_omp(t, u, fs, freq, 1)
11755 .expect("n <= INT32_MAX must have 32-bit workspace");
11756 }
11757
11758 let mut index = libsais64_main(t, a, LIBSAIS_FLAGS_BWT, 0, None, fs, freq, 1);
11759 if index >= 0 {
11760 index += 1;
11761 let split = usize::try_from(index).expect("index must be non-negative");
11762 u[0] = t[n - 1];
11763 bwt_copy_8u_omp(&mut u[1..split], &a[..split - 1], index - 1, 1);
11764 bwt_copy_8u_omp(
11765 &mut u[split..n],
11766 &a[split..n],
11767 SaSint::try_from(n - split).expect("fits"),
11768 1,
11769 );
11770 }
11771 index
11772}
11773
11774pub fn libsais64_bwt_aux(
11786 t: &[u8],
11787 u: &mut [u8],
11788 a: &mut [SaSint],
11789 fs: SaSint,
11790 freq: Option<&mut [SaSint]>,
11791 r: SaSint,
11792 i: &mut [SaSint],
11793) -> SaSint {
11794 let n = t.len();
11795 if fs < 0
11796 || r < 2
11797 || (r & (r - 1)) != 0
11798 || u.len() < n
11799 || a.len() < n.saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
11800 || freq.as_ref().is_some_and(|freq| freq.len() < ALPHABET_SIZE)
11801 {
11802 return -1;
11803 }
11804 let sample_count = if n == 0 {
11805 1
11806 } else {
11807 usize::try_from((SaSint::try_from(n).expect("input length must fit SaSint") - 1) / r)
11808 .expect("sample count must be non-negative")
11809 + 1
11810 };
11811 if i.len() < sample_count {
11812 return -1;
11813 }
11814
11815 if n <= 1 {
11816 if let Some(freq) = freq {
11817 freq[..ALPHABET_SIZE].fill(0);
11818 if n == 1 {
11819 u[0] = t[0];
11820 freq[t[0] as usize] += 1;
11821 }
11822 } else if n == 1 {
11823 u[0] = t[0];
11824 }
11825 i[0] = n as SaSint;
11826 return 0;
11827 }
11828
11829 if n <= i32::MAX as usize && r <= i32::MAX as SaSint {
11830 return libsais64_bwt_aux_run_32bit_omp(t, u, fs, freq, r, i, 1)
11831 .expect("n/r <= INT32_MAX must have 32-bit workspace");
11832 }
11833
11834 let index = libsais64_main(t, a, LIBSAIS_FLAGS_BWT, r, Some(i), fs, freq, 1);
11835 if index == 0 {
11836 let split = usize::try_from(i[0]).expect("primary index must be non-negative");
11837 u[0] = t[n - 1];
11838 bwt_copy_8u_omp(&mut u[1..split], &a[..split - 1], i[0] - 1, 1);
11839 bwt_copy_8u_omp(
11840 &mut u[split..n],
11841 &a[split..n],
11842 SaSint::try_from(n - split).expect("fits"),
11843 1,
11844 );
11845 }
11846 index
11847}
11848
11849pub fn libsais64_bwt_ctx(
11860 ctx: &mut Context,
11861 t: &[u8],
11862 u: &mut [u8],
11863 a: &mut [SaSint],
11864 fs: SaSint,
11865 freq: Option<&mut [SaSint]>,
11866) -> SaSint {
11867 if fs < 0
11868 || u.len() < t.len()
11869 || a.len()
11870 < t.len()
11871 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
11872 {
11873 return -1;
11874 }
11875 if let Some(freq) = freq.as_ref() {
11876 if freq.len() < ALPHABET_SIZE {
11877 return -1;
11878 }
11879 }
11880
11881 let n = t.len();
11882 if n <= 1 {
11883 if let Some(freq) = freq {
11884 freq[..ALPHABET_SIZE].fill(0);
11885 if n == 1 {
11886 u[0] = t[0];
11887 freq[t[0] as usize] += 1;
11888 }
11889 } else if n == 1 {
11890 u[0] = t[0];
11891 }
11892 return n as SaSint;
11893 }
11894
11895 let mut index = libsais64_main_ctx(ctx, t, a, LIBSAIS_FLAGS_BWT, 0, None, fs, freq);
11896 if index >= 0 {
11897 index += 1;
11898 let split = usize::try_from(index).expect("index must be non-negative");
11899 u[0] = t[n - 1];
11900 bwt_copy_8u_omp(
11901 &mut u[1..split],
11902 &a[..split - 1],
11903 index - 1,
11904 ctx.threads as SaSint,
11905 );
11906 bwt_copy_8u_omp(
11907 &mut u[split..n],
11908 &a[split..n],
11909 SaSint::try_from(n - split).expect("fits"),
11910 ctx.threads as SaSint,
11911 );
11912 }
11913 index
11914}
11915
11916pub fn libsais64_bwt_aux_ctx(
11929 ctx: &mut Context,
11930 t: &[u8],
11931 u: &mut [u8],
11932 a: &mut [SaSint],
11933 fs: SaSint,
11934 freq: Option<&mut [SaSint]>,
11935 r: SaSint,
11936 i: &mut [SaSint],
11937) -> SaSint {
11938 let n = t.len();
11939 if fs < 0
11940 || r < 2
11941 || (r & (r - 1)) != 0
11942 || u.len() < n
11943 || a.len() < n.saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
11944 {
11945 return -1;
11946 }
11947 if let Some(freq) = freq.as_ref() {
11948 if freq.len() < ALPHABET_SIZE {
11949 return -1;
11950 }
11951 }
11952 let sample_count = if n == 0 {
11953 1
11954 } else {
11955 usize::try_from((SaSint::try_from(n).expect("input length must fit SaSint") - 1) / r)
11956 .expect("sample count must be non-negative")
11957 + 1
11958 };
11959 if i.len() < sample_count {
11960 return -1;
11961 }
11962
11963 if n <= 1 {
11964 if let Some(freq) = freq {
11965 freq[..ALPHABET_SIZE].fill(0);
11966 if n == 1 {
11967 u[0] = t[0];
11968 freq[t[0] as usize] += 1;
11969 }
11970 } else if n == 1 {
11971 u[0] = t[0];
11972 }
11973 i[0] = n as SaSint;
11974 return 0;
11975 }
11976
11977 let index = libsais64_main_ctx(ctx, t, a, LIBSAIS_FLAGS_BWT, r, Some(i), fs, freq);
11978 if index == 0 {
11979 let split = usize::try_from(i[0]).expect("primary index must be non-negative");
11980 u[0] = t[n - 1];
11981 bwt_copy_8u_omp(
11982 &mut u[1..split],
11983 &a[..split - 1],
11984 i[0] - 1,
11985 ctx.threads as SaSint,
11986 );
11987 bwt_copy_8u_omp(
11988 &mut u[split..n],
11989 &a[split..n],
11990 SaSint::try_from(n - split).expect("fits"),
11991 ctx.threads as SaSint,
11992 );
11993 }
11994 index
11995}
11996
11997pub fn create_ctx_omp(threads: SaSint) -> Option<Context> {
12005 if threads < 0 {
12006 return None;
12007 }
12008
12009 create_ctx_main(normalize_omp_threads(threads))
12010}
12011
12012fn libsais64_new_32bit_fs(n: usize, fs: SaSint) -> Option<i32> {
12013 if n > i32::MAX as usize {
12014 return None;
12015 }
12016
12017 let n = n as SaSint;
12018 let int32_max = i32::MAX as SaSint;
12019 let expanded_space = fs as i128 + fs as i128 + n as i128 + n as i128;
12020 let new_fs = if expanded_space <= int32_max as i128 {
12021 fs + fs + n
12022 } else {
12023 int32_max - n
12024 };
12025
12026 i32::try_from(new_fs).ok()
12027}
12028
12029fn libsais64_run_32bit_omp(
12030 t: &[u8],
12031 sa: &mut [SaSint],
12032 fs: SaSint,
12033 freq: Option<&mut [SaSint]>,
12034 threads: SaSint,
12035 gsa: bool,
12036) -> Option<SaSint> {
12037 let new_fs = libsais64_new_32bit_fs(t.len(), fs)?;
12038 let mut sa32 = vec![0i32; t.len() + usize::try_from(new_fs).expect("new_fs is non-negative")];
12039
12040 let index = if let Some(freq) = freq {
12041 let mut freq32 = vec![0i32; ALPHABET_SIZE];
12042 let index = if gsa {
12043 crate::libsais_gsa_omp(t, &mut sa32, new_fs, Some(&mut freq32), threads as i32)
12044 } else {
12045 crate::libsais_omp(t, &mut sa32, new_fs, Some(&mut freq32), threads as i32)
12046 };
12047 if index >= 0 {
12048 for (dst, src) in freq.iter_mut().zip(freq32.iter()) {
12049 *dst = SaSint::from(*src);
12050 }
12051 }
12052 index
12053 } else if gsa {
12054 crate::libsais_gsa_omp(t, &mut sa32, new_fs, None, threads as i32)
12055 } else {
12056 crate::libsais_omp(t, &mut sa32, new_fs, None, threads as i32)
12057 };
12058
12059 if index >= 0 {
12060 for (dst, src) in sa.iter_mut().zip(sa32.iter()).take(t.len()) {
12061 *dst = SaSint::from(*src as u32);
12062 }
12063 }
12064
12065 Some(SaSint::from(index))
12066}
12067
12068fn copy_freq32_to_64(freq: &mut [SaSint], freq32: &[i32]) {
12069 for (dst, src) in freq.iter_mut().zip(freq32.iter()).take(ALPHABET_SIZE) {
12070 *dst = SaSint::from(*src);
12071 }
12072}
12073
12074fn libsais64_bwt_run_32bit_omp(
12075 t: &[u8],
12076 u: &mut [u8],
12077 fs: SaSint,
12078 freq: Option<&mut [SaSint]>,
12079 threads: SaSint,
12080) -> Option<SaSint> {
12081 let new_fs = libsais64_new_32bit_fs(t.len(), fs)?;
12082 let mut a32 = vec![0i32; t.len() + usize::try_from(new_fs).expect("new_fs is non-negative")];
12083
12084 let index = if let Some(freq) = freq {
12085 let mut freq32 = vec![0i32; ALPHABET_SIZE];
12086 let index =
12087 crate::libsais_bwt_omp(t, u, &mut a32, new_fs, Some(&mut freq32), threads as i32);
12088 if index >= 0 {
12089 copy_freq32_to_64(freq, &freq32);
12090 }
12091 index
12092 } else {
12093 crate::libsais_bwt_omp(t, u, &mut a32, new_fs, None, threads as i32)
12094 };
12095
12096 Some(SaSint::from(index))
12097}
12098
12099fn libsais64_bwt_aux_run_32bit_omp(
12100 t: &[u8],
12101 u: &mut [u8],
12102 fs: SaSint,
12103 freq: Option<&mut [SaSint]>,
12104 r: SaSint,
12105 i: &mut [SaSint],
12106 threads: SaSint,
12107) -> Option<SaSint> {
12108 if r > i32::MAX as SaSint {
12109 return None;
12110 }
12111
12112 let new_fs = libsais64_new_32bit_fs(t.len(), fs)?;
12113 let mut a32 = vec![0i32; t.len() + usize::try_from(new_fs).expect("new_fs is non-negative")];
12114 let sample_count = if t.is_empty() {
12115 1
12116 } else {
12117 (t.len() - 1) / usize::try_from(r).expect("r must be positive") + 1
12118 };
12119 let mut i32_out = vec![0i32; sample_count];
12120
12121 let index = if let Some(freq) = freq {
12122 let mut freq32 = vec![0i32; ALPHABET_SIZE];
12123 let index = crate::libsais_bwt_aux_omp(
12124 t,
12125 u,
12126 &mut a32,
12127 new_fs,
12128 Some(&mut freq32),
12129 r as i32,
12130 &mut i32_out,
12131 threads as i32,
12132 );
12133 if index >= 0 {
12134 copy_freq32_to_64(freq, &freq32);
12135 }
12136 index
12137 } else {
12138 crate::libsais_bwt_aux_omp(
12139 t,
12140 u,
12141 &mut a32,
12142 new_fs,
12143 None,
12144 r as i32,
12145 &mut i32_out,
12146 threads as i32,
12147 )
12148 };
12149
12150 if index >= 0 {
12151 for (dst, src) in i.iter_mut().zip(i32_out.iter()).take(sample_count) {
12152 *dst = SaSint::from(*src);
12153 }
12154 }
12155
12156 Some(SaSint::from(index))
12157}
12158
12159pub fn libsais64_omp(
12169 t: &[u8],
12170 sa: &mut [SaSint],
12171 fs: SaSint,
12172 freq: Option<&mut [SaSint]>,
12173 threads: SaSint,
12174) -> SaSint {
12175 if threads < 0 {
12176 return -1;
12177 }
12178 if fs < 0
12179 || sa.len()
12180 < t.len()
12181 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
12182 {
12183 return -1;
12184 }
12185 if let Some(freq) = freq.as_ref() {
12186 if freq.len() < ALPHABET_SIZE {
12187 return -1;
12188 }
12189 }
12190 let n = t.len();
12191 if n <= 1 {
12192 if let Some(freq) = freq {
12193 freq[..ALPHABET_SIZE].fill(0);
12194 if n == 1 {
12195 sa[0] = 0;
12196 freq[t[0] as usize] += 1;
12197 }
12198 } else if n == 1 {
12199 sa[0] = 0;
12200 }
12201 return 0;
12202 }
12203
12204 let threads = normalize_omp_threads(threads);
12205 if n <= i32::MAX as usize {
12206 return libsais64_run_32bit_omp(t, sa, fs, freq, threads, false)
12207 .expect("n <= INT32_MAX must have 32-bit workspace");
12208 }
12209
12210 libsais64_main(t, sa, LIBSAIS_FLAGS_NONE, 0, None, fs, freq, threads)
12211}
12212
12213pub fn libsais64_gsa_omp(
12223 t: &[u8],
12224 sa: &mut [SaSint],
12225 fs: SaSint,
12226 freq: Option<&mut [SaSint]>,
12227 threads: SaSint,
12228) -> SaSint {
12229 if threads < 0
12230 || t.last().copied().unwrap_or(0) != 0
12231 || fs < 0
12232 || sa.len()
12233 < t.len()
12234 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
12235 {
12236 return -1;
12237 }
12238 if let Some(freq) = freq.as_ref() {
12239 if freq.len() < ALPHABET_SIZE {
12240 return -1;
12241 }
12242 }
12243 let n = t.len();
12244 if n <= 1 {
12245 if let Some(freq) = freq {
12246 freq[..ALPHABET_SIZE].fill(0);
12247 if n == 1 {
12248 sa[0] = 0;
12249 freq[t[0] as usize] += 1;
12250 }
12251 } else if n == 1 {
12252 sa[0] = 0;
12253 }
12254 return 0;
12255 }
12256
12257 let threads = normalize_omp_threads(threads);
12258 if n <= i32::MAX as usize {
12259 return libsais64_run_32bit_omp(t, sa, fs, freq, threads, true)
12260 .expect("n <= INT32_MAX must have 32-bit workspace");
12261 }
12262
12263 libsais64_main(t, sa, LIBSAIS_FLAGS_GSA, 0, None, fs, freq, threads)
12264}
12265
12266pub fn libsais64_int_omp(
12268 t: &mut [SaSint],
12269 sa: &mut [SaSint],
12270 k: SaSint,
12271 fs: SaSint,
12272 threads: SaSint,
12273) -> SaSint {
12274 if threads < 0 {
12275 return -1;
12276 }
12277 if fs < 0
12278 || sa.len()
12279 < t.len()
12280 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
12281 {
12282 return -1;
12283 }
12284 if t.len() <= 1 {
12285 if t.len() == 1 {
12286 sa[0] = 0;
12287 }
12288 return 0;
12289 }
12290
12291 libsais64_main_int(t, sa, k, fs, normalize_omp_threads(threads))
12292}
12293
12294pub fn libsais64_long_omp(
12306 t: &mut [SaSint],
12307 sa: &mut [SaSint],
12308 k: SaSint,
12309 fs: SaSint,
12310 threads: SaSint,
12311) -> SaSint {
12312 libsais64_int_omp(t, sa, k, fs, threads)
12313}
12314
12315pub fn libsais64_bwt_omp(
12326 t: &[u8],
12327 u: &mut [u8],
12328 a: &mut [SaSint],
12329 fs: SaSint,
12330 freq: Option<&mut [SaSint]>,
12331 threads: SaSint,
12332) -> SaSint {
12333 let n = t.len();
12334 if threads < 0
12335 || fs < 0
12336 || u.len() < n
12337 || a.len() < n.saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
12338 || freq.as_ref().is_some_and(|freq| freq.len() < ALPHABET_SIZE)
12339 {
12340 return -1;
12341 }
12342
12343 if n <= 1 {
12344 if let Some(freq) = freq {
12345 freq[..ALPHABET_SIZE].fill(0);
12346 if n == 1 {
12347 u[0] = t[0];
12348 freq[t[0] as usize] += 1;
12349 }
12350 } else if n == 1 {
12351 u[0] = t[0];
12352 }
12353 return n as SaSint;
12354 }
12355
12356 let threads = normalize_omp_threads(threads);
12357 if n <= i32::MAX as usize {
12358 return libsais64_bwt_run_32bit_omp(t, u, fs, freq, threads)
12359 .expect("n <= INT32_MAX must have 32-bit workspace");
12360 }
12361
12362 let mut index = libsais64_main(t, a, LIBSAIS_FLAGS_BWT, 0, None, fs, freq, threads);
12363 if index >= 0 {
12364 index += 1;
12365 let index_usize = usize::try_from(index).expect("index must be non-negative");
12366 u[0] = t[n - 1];
12367 bwt_copy_8u_omp(
12368 &mut u[1..index_usize],
12369 &a[..index_usize - 1],
12370 index - 1,
12371 threads,
12372 );
12373 bwt_copy_8u_omp(
12374 &mut u[index_usize..n],
12375 &a[index_usize..n],
12376 SaSint::try_from(n - index_usize).expect("fits"),
12377 threads,
12378 );
12379 }
12380 index
12381}
12382
12383pub fn libsais64_bwt_aux_omp(
12396 t: &[u8],
12397 u: &mut [u8],
12398 a: &mut [SaSint],
12399 fs: SaSint,
12400 freq: Option<&mut [SaSint]>,
12401 r: SaSint,
12402 i: &mut [SaSint],
12403 threads: SaSint,
12404) -> SaSint {
12405 let n = t.len();
12406 if threads < 0
12407 || fs < 0
12408 || r < 2
12409 || (r & (r - 1)) != 0
12410 || u.len() < n
12411 || a.len() < n.saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
12412 {
12413 return -1;
12414 }
12415 if let Some(freq) = freq.as_ref() {
12416 if freq.len() < ALPHABET_SIZE {
12417 return -1;
12418 }
12419 }
12420 let sample_count = if n == 0 {
12421 1
12422 } else {
12423 usize::try_from((SaSint::try_from(n).expect("input length must fit SaSint") - 1) / r)
12424 .expect("sample count must be non-negative")
12425 + 1
12426 };
12427 if i.len() < sample_count {
12428 return -1;
12429 }
12430 if n <= 1 {
12431 if let Some(freq) = freq {
12432 freq[..ALPHABET_SIZE].fill(0);
12433 if n == 1 {
12434 u[0] = t[0];
12435 freq[t[0] as usize] += 1;
12436 }
12437 } else if n == 1 {
12438 u[0] = t[0];
12439 }
12440 i[0] = n as SaSint;
12441 return 0;
12442 }
12443
12444 let threads = normalize_omp_threads(threads);
12445 if n <= i32::MAX as usize && r <= i32::MAX as SaSint {
12446 return libsais64_bwt_aux_run_32bit_omp(t, u, fs, freq, r, i, threads)
12447 .expect("n/r <= INT32_MAX must have 32-bit workspace");
12448 }
12449
12450 let index = libsais64_main(t, a, LIBSAIS_FLAGS_BWT, r, Some(i), fs, freq, threads);
12451 if index == 0 {
12452 let split = usize::try_from(i[0]).expect("primary index must be non-negative");
12453 u[0] = t[n - 1];
12454 bwt_copy_8u_omp(&mut u[1..split], &a[..split - 1], i[0] - 1, threads);
12455 bwt_copy_8u_omp(
12456 &mut u[split..n],
12457 &a[split..n],
12458 SaSint::try_from(n - split).expect("fits"),
12459 threads,
12460 );
12461 }
12462 index
12463}
12464
12465#[doc(hidden)]
12467pub fn compute_phi(
12468 sa: &[SaSint],
12469 plcp: &mut [SaSint],
12470 n: SaSint,
12471 omp_block_start: FastSint,
12472 omp_block_size: FastSint,
12473) {
12474 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
12475 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
12476 let end = start + size;
12477 let n_usize = usize::try_from(n).expect("n must be non-negative");
12478 let mut i = start;
12479 let mut k = if omp_block_start > 0 {
12480 sa[start - 1]
12481 } else {
12482 n
12483 };
12484
12485 let fast_end = omp_block_start + omp_block_size - 64 - 3;
12486 while (i as FastSint) < fast_end {
12487 plcp[usize::try_from(sa[i]).expect("suffix index must be non-negative")] = k;
12488 k = sa[i];
12489 plcp[usize::try_from(sa[i + 1]).expect("suffix index must be non-negative")] = k;
12490 k = sa[i + 1];
12491 plcp[usize::try_from(sa[i + 2]).expect("suffix index must be non-negative")] = k;
12492 k = sa[i + 2];
12493 plcp[usize::try_from(sa[i + 3]).expect("suffix index must be non-negative")] = k;
12494 k = sa[i + 3];
12495 i += 4;
12496 }
12497
12498 while i < end.min(n_usize) {
12499 plcp[usize::try_from(sa[i]).expect("suffix index must be non-negative")] = k;
12500 k = sa[i];
12501 i += 1;
12502 }
12503}
12504
12505#[doc(hidden)]
12507pub fn compute_phi_omp(sa: &[SaSint], plcp: &mut [SaSint], n: SaSint, threads: SaSint) {
12508 if threads == 1 || n < 65_536 {
12509 compute_phi(sa, plcp, n, 0, n as FastSint);
12510 return;
12511 }
12512
12513 let threads_usize = usize::try_from(threads).expect("threads must be non-negative");
12514 let block_stride = ((n as FastSint) / (threads as FastSint)) & !15;
12515 let plcp_addr = plcp.as_mut_ptr() as usize;
12516 let n_usize = usize::try_from(n).expect("n must be non-negative");
12517
12518 run_rayon_with_threads(threads_usize, || {
12519 (0..threads_usize).into_par_iter().for_each(|thread| {
12520 let block_start = thread as FastSint * block_stride;
12521 let block_size = if thread + 1 < threads_usize {
12522 block_stride
12523 } else {
12524 n as FastSint - block_start
12525 };
12526 let start = usize::try_from(block_start).expect("omp_block_start must be non-negative");
12527 let size = usize::try_from(block_size).expect("omp_block_size must be non-negative");
12528 let end = start + size;
12529 let mut i = start;
12530 let mut k = if block_start > 0 { sa[start - 1] } else { n };
12531 let plcp_ptr = plcp_addr as *mut SaSint;
12532
12533 let fast_end = block_start + block_size - 64 - 3;
12534 while (i as FastSint) < fast_end {
12535 unsafe {
12536 *plcp_ptr
12538 .add(usize::try_from(sa[i]).expect("suffix index must be non-negative")) =
12539 k;
12540 k = sa[i];
12541 *plcp_ptr.add(
12542 usize::try_from(sa[i + 1]).expect("suffix index must be non-negative"),
12543 ) = k;
12544 k = sa[i + 1];
12545 *plcp_ptr.add(
12546 usize::try_from(sa[i + 2]).expect("suffix index must be non-negative"),
12547 ) = k;
12548 k = sa[i + 2];
12549 *plcp_ptr.add(
12550 usize::try_from(sa[i + 3]).expect("suffix index must be non-negative"),
12551 ) = k;
12552 k = sa[i + 3];
12553 }
12554 i += 4;
12555 }
12556
12557 while i < end.min(n_usize) {
12558 unsafe {
12559 *plcp_ptr
12561 .add(usize::try_from(sa[i]).expect("suffix index must be non-negative")) =
12562 k;
12563 }
12564 k = sa[i];
12565 i += 1;
12566 }
12567 });
12568 });
12569}
12570
12571#[doc(hidden)]
12573pub fn compute_plcp(
12574 t: &[u8],
12575 plcp: &mut [SaSint],
12576 n: FastSint,
12577 omp_block_start: FastSint,
12578 omp_block_size: FastSint,
12579) {
12580 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
12581 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
12582 let end = start + size;
12583 let n_usize = usize::try_from(n).expect("n must be non-negative");
12584 let mut l = 0usize;
12585
12586 for i in start..end.min(n_usize) {
12587 let k = usize::try_from(plcp[i]).expect("phi entry must be non-negative");
12588 let m = n_usize - i.max(k);
12589 while l < m && t[i + l] == t[k + l] {
12590 l += 1;
12591 }
12592 plcp[i] = SaSint::try_from(l).expect("LCP length must fit SaSint");
12593 l = l.saturating_sub(1);
12594 }
12595}
12596
12597#[doc(hidden)]
12599pub fn compute_plcp_omp(t: &[u8], plcp: &mut [SaSint], n: SaSint, threads: SaSint) {
12600 if threads == 1 || n < 65_536 {
12601 compute_plcp(t, plcp, n as FastSint, 0, n as FastSint);
12602 return;
12603 }
12604
12605 let n_usize = usize::try_from(n).expect("n must be non-negative");
12606 let threads_usize = usize::try_from(threads).expect("threads must be non-negative");
12607 let chunk_size = ((n_usize / threads_usize) & !15usize).max(16);
12608 run_rayon_with_threads(threads_usize, || {
12609 plcp[..n_usize]
12610 .par_chunks_mut(chunk_size)
12611 .enumerate()
12612 .for_each(|(chunk_index, chunk)| {
12613 let start = chunk_index * chunk_size;
12614 let mut l = 0usize;
12615 for (offset, value) in chunk.iter_mut().enumerate() {
12616 let i = start + offset;
12617 let k = usize::try_from(*value).expect("phi entry must be non-negative");
12618 let m = n_usize - i.max(k);
12619 while l < m && t[i + l] == t[k + l] {
12620 l += 1;
12621 }
12622 *value = SaSint::try_from(l).expect("LCP length must fit SaSint");
12623 l = l.saturating_sub(1);
12624 }
12625 });
12626 });
12627}
12628
12629#[doc(hidden)]
12631pub fn compute_plcp_gsa(
12632 t: &[u8],
12633 plcp: &mut [SaSint],
12634 omp_block_start: FastSint,
12635 omp_block_size: FastSint,
12636) {
12637 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
12638 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
12639 let end = start + size;
12640 let mut l = 0usize;
12641
12642 for i in start..end.min(t.len()) {
12643 let k = usize::try_from(plcp[i]).expect("phi entry must be non-negative");
12644 while t[i + l] > 0 && t[i + l] == t[k + l] {
12645 l += 1;
12646 }
12647 plcp[i] = SaSint::try_from(l).expect("LCP length must fit SaSint");
12648 l = l.saturating_sub(1);
12649 }
12650}
12651
12652#[doc(hidden)]
12654pub fn compute_plcp_gsa_omp(t: &[u8], plcp: &mut [SaSint], n: SaSint, threads: SaSint) {
12655 if threads == 1 || n < 65_536 {
12656 compute_plcp_gsa(t, plcp, 0, n as FastSint);
12657 return;
12658 }
12659
12660 let n_usize = usize::try_from(n).expect("n must be non-negative");
12661 let threads_usize = usize::try_from(threads).expect("threads must be non-negative");
12662 let chunk_size = ((n_usize / threads_usize) & !15usize).max(16);
12663 run_rayon_with_threads(threads_usize, || {
12664 plcp[..n_usize]
12665 .par_chunks_mut(chunk_size)
12666 .enumerate()
12667 .for_each(|(chunk_index, chunk)| {
12668 let start = chunk_index * chunk_size;
12669 let mut l = 0usize;
12670 for (offset, value) in chunk.iter_mut().enumerate() {
12671 let i = start + offset;
12672 let k = usize::try_from(*value).expect("phi entry must be non-negative");
12673 while t[i + l] > 0 && t[i + l] == t[k + l] {
12674 l += 1;
12675 }
12676 *value = SaSint::try_from(l).expect("LCP length must fit SaSint");
12677 l = l.saturating_sub(1);
12678 }
12679 });
12680 });
12681}
12682
12683#[doc(hidden)]
12685pub fn compute_plcp_int(
12686 t: &[SaSint],
12687 plcp: &mut [SaSint],
12688 n: FastSint,
12689 omp_block_start: FastSint,
12690 omp_block_size: FastSint,
12691) {
12692 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
12693 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
12694 let end = start + size;
12695 let n_usize = usize::try_from(n).expect("n must be non-negative");
12696 let mut l = 0usize;
12697
12698 for i in start..end.min(n_usize) {
12699 let k = usize::try_from(plcp[i]).expect("phi entry must be non-negative");
12700 let m = n_usize - i.max(k);
12701 while l < m && t[i + l] == t[k + l] {
12702 l += 1;
12703 }
12704 plcp[i] = SaSint::try_from(l).expect("LCP length must fit SaSint");
12705 l = l.saturating_sub(1);
12706 }
12707}
12708
12709#[doc(hidden)]
12711pub fn compute_plcp_int_omp(t: &[SaSint], plcp: &mut [SaSint], n: SaSint, threads: SaSint) {
12712 if threads == 1 || n < 65_536 {
12713 compute_plcp_int(t, plcp, n as FastSint, 0, n as FastSint);
12714 return;
12715 }
12716
12717 let n_usize = usize::try_from(n).expect("n must be non-negative");
12718 let threads_usize = usize::try_from(threads).expect("threads must be non-negative");
12719 let chunk_size = ((n_usize / threads_usize) & !15usize).max(16);
12720 run_rayon_with_threads(threads_usize, || {
12721 plcp[..n_usize]
12722 .par_chunks_mut(chunk_size)
12723 .enumerate()
12724 .for_each(|(chunk_index, chunk)| {
12725 let start = chunk_index * chunk_size;
12726 let mut l = 0usize;
12727 for (offset, value) in chunk.iter_mut().enumerate() {
12728 let i = start + offset;
12729 let k = usize::try_from(*value).expect("phi entry must be non-negative");
12730 let m = n_usize - i.max(k);
12731 while l < m && t[i + l] == t[k + l] {
12732 l += 1;
12733 }
12734 *value = SaSint::try_from(l).expect("LCP length must fit SaSint");
12735 l = l.saturating_sub(1);
12736 }
12737 });
12738 });
12739}
12740
12741#[doc(hidden)]
12743pub fn compute_lcp(
12744 plcp: &[SaSint],
12745 sa: &[SaSint],
12746 lcp: &mut [SaSint],
12747 omp_block_start: FastSint,
12748 omp_block_size: FastSint,
12749) {
12750 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
12751 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
12752 let end = start + size;
12753
12754 for i in start..end.min(sa.len()) {
12755 lcp[i] = plcp[usize::try_from(sa[i]).expect("suffix index must be non-negative")];
12756 }
12757}
12758
12759#[doc(hidden)]
12761pub fn compute_lcp_omp(
12762 plcp: &[SaSint],
12763 sa: &[SaSint],
12764 lcp: &mut [SaSint],
12765 n: SaSint,
12766 threads: SaSint,
12767) {
12768 if threads == 1 || n < 65_536 {
12769 compute_lcp(plcp, sa, lcp, 0, n as FastSint);
12770 return;
12771 }
12772
12773 let n_usize = usize::try_from(n).expect("n must be non-negative");
12774 assert!(plcp.len() >= n_usize);
12775 assert!(sa.len() >= n_usize);
12776 assert!(lcp.len() >= n_usize);
12777 let threads_usize = usize::try_from(threads).expect("threads must be non-negative");
12778 let chunk_size = ((n_usize / threads_usize) & !15usize).max(16);
12779 let plcp_ptr = plcp.as_ptr() as usize;
12780 let sa_ptr = sa.as_ptr() as usize;
12781 run_rayon_with_threads(threads_usize, || {
12782 lcp[..n_usize]
12783 .par_chunks_mut(chunk_size)
12784 .enumerate()
12785 .for_each(|(chunk_index, chunk)| {
12786 let start = chunk_index * chunk_size;
12787 let dst_ptr = chunk.as_mut_ptr();
12788 let sa_ptr = sa_ptr as *const SaSint;
12789 let plcp_ptr = plcp_ptr as *const SaSint;
12790 for offset in 0..chunk.len() {
12791 let i = start + offset;
12792 let suffix = unsafe { *sa_ptr.add(i) };
12793 let suffix =
12794 usize::try_from(suffix).expect("suffix index must be non-negative");
12795 assert!(suffix < plcp.len());
12796 unsafe {
12797 *dst_ptr.add(offset) = *plcp_ptr.add(suffix);
12798 }
12799 }
12800 });
12801 });
12802}
12803
12804pub fn libsais64_plcp(t: &[u8], sa: &[SaSint], plcp: &mut [SaSint]) -> SaSint {
12812 if sa.len() != t.len() || plcp.len() != t.len() {
12813 return -1;
12814 }
12815 if !suffix_entries_in_bounds(sa, t.len()) {
12816 return -1;
12817 }
12818 if t.len() <= 1 {
12819 if t.len() == 1 {
12820 plcp[0] = 0;
12821 }
12822 return 0;
12823 }
12824
12825 let n = SaSint::try_from(t.len()).expect("input length must fit SaSint");
12826 compute_phi_omp(sa, plcp, n, 1);
12827 compute_plcp_omp(t, plcp, n, 1);
12828 0
12829}
12830
12831pub fn libsais64_plcp_gsa(t: &[u8], sa: &[SaSint], plcp: &mut [SaSint]) -> SaSint {
12839 if t.last().copied().unwrap_or(0) != 0 {
12840 return -1;
12841 }
12842 if sa.len() != t.len() || plcp.len() != t.len() {
12843 return -1;
12844 }
12845 if !suffix_entries_in_bounds(sa, t.len()) {
12846 return -1;
12847 }
12848 if t.len() <= 1 {
12849 if t.len() == 1 {
12850 plcp[0] = 0;
12851 }
12852 return 0;
12853 }
12854
12855 let n = SaSint::try_from(t.len()).expect("input length must fit SaSint");
12856 compute_phi_omp(sa, plcp, n, 1);
12857 compute_plcp_gsa_omp(t, plcp, n, 1);
12858 0
12859}
12860
12861pub fn libsais64_plcp_int(t: &[SaSint], sa: &[SaSint], plcp: &mut [SaSint]) -> SaSint {
12869 if sa.len() != t.len() || plcp.len() != t.len() {
12870 return -1;
12871 }
12872 if !suffix_entries_in_bounds(sa, t.len()) {
12873 return -1;
12874 }
12875 if t.len() <= 1 {
12876 if t.len() == 1 {
12877 plcp[0] = 0;
12878 }
12879 return 0;
12880 }
12881
12882 let n = SaSint::try_from(t.len()).expect("input length must fit SaSint");
12883 compute_phi_omp(sa, plcp, n, 1);
12884 compute_plcp_int_omp(t, plcp, n, 1);
12885 0
12886}
12887
12888pub fn libsais64_lcp(plcp: &[SaSint], sa: &[SaSint], lcp: &mut [SaSint]) -> SaSint {
12896 if plcp.len() != sa.len() || lcp.len() != sa.len() {
12897 return -1;
12898 }
12899 if !suffix_entries_in_bounds(sa, plcp.len()) {
12900 return -1;
12901 }
12902 if sa.len() <= 1 {
12903 if sa.len() == 1 {
12904 lcp[0] = plcp[usize::try_from(sa[0]).expect("suffix index must be non-negative")];
12905 }
12906 return 0;
12907 }
12908
12909 compute_lcp_omp(
12910 plcp,
12911 sa,
12912 lcp,
12913 SaSint::try_from(sa.len()).expect("suffix array length must fit SaSint"),
12914 1,
12915 );
12916 0
12917}
12918
12919pub fn libsais64_plcp_omp(t: &[u8], sa: &[SaSint], plcp: &mut [SaSint], threads: SaSint) -> SaSint {
12928 if threads < 0 {
12929 return -1;
12930 }
12931 if sa.len() != t.len() || plcp.len() != t.len() {
12932 return -1;
12933 }
12934 if !suffix_entries_in_bounds(sa, t.len()) {
12935 return -1;
12936 }
12937 if t.len() <= 1 {
12938 if t.len() == 1 {
12939 plcp[0] = 0;
12940 }
12941 return 0;
12942 }
12943
12944 let n = SaSint::try_from(t.len()).expect("input length must fit SaSint");
12945 let threads = normalize_omp_threads(threads);
12946 compute_phi_omp(sa, plcp, n, threads);
12947 compute_plcp_omp(t, plcp, n, threads);
12948 0
12949}
12950
12951pub fn libsais64_plcp_gsa_omp(
12960 t: &[u8],
12961 sa: &[SaSint],
12962 plcp: &mut [SaSint],
12963 threads: SaSint,
12964) -> SaSint {
12965 if threads < 0 || t.last().copied().unwrap_or(0) != 0 {
12966 return -1;
12967 }
12968 if sa.len() != t.len() || plcp.len() != t.len() {
12969 return -1;
12970 }
12971 if !suffix_entries_in_bounds(sa, t.len()) {
12972 return -1;
12973 }
12974 if t.len() <= 1 {
12975 if t.len() == 1 {
12976 plcp[0] = 0;
12977 }
12978 return 0;
12979 }
12980
12981 let n = SaSint::try_from(t.len()).expect("input length must fit SaSint");
12982 let threads = normalize_omp_threads(threads);
12983 compute_phi_omp(sa, plcp, n, threads);
12984 compute_plcp_gsa_omp(t, plcp, n, threads);
12985 0
12986}
12987
12988pub fn libsais64_plcp_int_omp(
12997 t: &[SaSint],
12998 sa: &[SaSint],
12999 plcp: &mut [SaSint],
13000 threads: SaSint,
13001) -> SaSint {
13002 if threads < 0 {
13003 return -1;
13004 }
13005 if sa.len() != t.len() || plcp.len() != t.len() {
13006 return -1;
13007 }
13008 if !suffix_entries_in_bounds(sa, t.len()) {
13009 return -1;
13010 }
13011 if t.len() <= 1 {
13012 if t.len() == 1 {
13013 plcp[0] = 0;
13014 }
13015 return 0;
13016 }
13017
13018 let n = SaSint::try_from(t.len()).expect("input length must fit SaSint");
13019 let threads = normalize_omp_threads(threads);
13020 compute_phi_omp(sa, plcp, n, threads);
13021 compute_plcp_int_omp(t, plcp, n, threads);
13022 0
13023}
13024
13025pub fn libsais64_lcp_omp(
13034 plcp: &[SaSint],
13035 sa: &[SaSint],
13036 lcp: &mut [SaSint],
13037 threads: SaSint,
13038) -> SaSint {
13039 if threads < 0 {
13040 return -1;
13041 }
13042 if plcp.len() != sa.len() || lcp.len() != sa.len() {
13043 return -1;
13044 }
13045 if !suffix_entries_in_bounds(sa, plcp.len()) {
13046 return -1;
13047 }
13048 if sa.len() <= 1 {
13049 if sa.len() == 1 {
13050 lcp[0] = plcp[usize::try_from(sa[0]).expect("suffix index must be non-negative")];
13051 }
13052 return 0;
13053 }
13054
13055 compute_lcp_omp(
13056 plcp,
13057 sa,
13058 lcp,
13059 SaSint::try_from(sa.len()).expect("suffix array length must fit SaSint"),
13060 normalize_omp_threads(threads),
13061 );
13062 0
13063}
13064
13065fn suffix_entries_in_bounds(sa: &[SaSint], len: usize) -> bool {
13066 sa.iter()
13067 .all(|&value| usize::try_from(value).is_ok_and(|index| index < len))
13068}
13069
13070#[doc(hidden)]
13072pub fn unbwt_compute_histogram(t: &[u8], n: FastSint, count: &mut [SaUint]) {
13073 let n = usize::try_from(n).expect("n must be non-negative");
13074 assert!(count.len() >= ALPHABET_SIZE);
13075 for &byte in &t[..n] {
13076 count[byte as usize] += 1;
13077 }
13078}
13079
13080#[doc(hidden)]
13082pub fn unbwt_transpose_bucket2(bucket2: &mut [SaUint]) {
13083 assert!(bucket2.len() >= ALPHABET_SIZE * ALPHABET_SIZE);
13084 for x in 0..ALPHABET_SIZE {
13085 for y in x + 1..ALPHABET_SIZE {
13086 bucket2.swap((y << 8) + x, (x << 8) + y);
13087 }
13088 }
13089}
13090
13091#[doc(hidden)]
13093pub fn unbwt_compute_bigram_histogram_single(
13094 t: &[u8],
13095 bucket1: &mut [SaUint],
13096 bucket2: &mut [SaUint],
13097 index: FastUint,
13098) {
13099 let mut sum = 1usize;
13100 for c in 0..ALPHABET_SIZE {
13101 let prev = sum;
13102 sum += bucket1[c] as usize;
13103 bucket1[c] = prev as SaUint;
13104 if prev != sum {
13105 let bucket2_p = &mut bucket2[c << 8..(c + 1) << 8];
13106
13107 let hi = sum.min(index);
13108 if hi > prev {
13109 unbwt_compute_histogram(&t[prev..], (hi - prev) as FastSint, bucket2_p);
13110 }
13111
13112 let lo = prev.max(index + 1);
13113 if sum > lo {
13114 unbwt_compute_histogram(&t[lo - 1..], (sum - lo) as FastSint, bucket2_p);
13115 }
13116 }
13117 }
13118
13119 unbwt_transpose_bucket2(bucket2);
13120}
13121
13122#[doc(hidden)]
13124pub fn unbwt_calculate_fastbits(
13125 bucket2: &mut [SaUint],
13126 fastbits: &mut [u16],
13127 lastc: FastUint,
13128 shift: FastUint,
13129) {
13130 let mut v = 0usize;
13131 let mut w = 0usize;
13132 let mut sum = 1usize;
13133
13134 for c in 0..ALPHABET_SIZE {
13135 if c == lastc {
13136 sum += 1;
13137 }
13138
13139 for _d in 0..ALPHABET_SIZE {
13140 let prev = sum;
13141 sum += bucket2[w] as usize;
13142 bucket2[w] = prev as SaUint;
13143 if prev != sum {
13144 while v <= ((sum - 1) >> shift) {
13145 fastbits[v] = w as u16;
13146 v += 1;
13147 }
13148 }
13149 w += 1;
13150 }
13151 }
13152}
13153
13154#[doc(hidden)]
13156pub fn unbwt_calculate_bi_psi(
13157 t: &[u8],
13158 p: &mut [SaUint],
13159 bucket1: &mut [SaUint],
13160 bucket2: &mut [SaUint],
13161 index: FastUint,
13162 omp_block_start: FastSint,
13163 omp_block_end: FastSint,
13164) {
13165 let mut i = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
13166 let mut j = index;
13167 let block_end = usize::try_from(omp_block_end).expect("omp_block_end must be non-negative");
13168 if block_end < j {
13169 j = block_end;
13170 }
13171 while i < j {
13172 let c = t[i] as usize;
13173 let pidx = bucket1[c] as usize;
13174 bucket1[c] += 1;
13175 let tidx = index.wrapping_sub(pidx) as i64;
13176 if tidx != 0 {
13177 let src = pidx.wrapping_add((tidx >> 63) as usize);
13178 let w = ((t[src] as usize) << 8) + c;
13179 let dst = bucket2[w] as usize;
13180 p[dst] = i as SaUint;
13181 bucket2[w] += 1;
13182 }
13183 i += 1;
13184 }
13185
13186 let mut i = index;
13187 if usize::try_from(omp_block_start).expect("omp_block_start must be non-negative") > i {
13188 i = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
13189 }
13190 i += 1;
13191 while i <= block_end {
13192 let c = t[i - 1] as usize;
13193 let pidx = bucket1[c] as usize;
13194 bucket1[c] += 1;
13195 let tidx = index.wrapping_sub(pidx) as i64;
13196 if tidx != 0 {
13197 let src = pidx.wrapping_add((tidx >> 63) as usize);
13198 let w = ((t[src] as usize) << 8) + c;
13199 let dst = bucket2[w] as usize;
13200 p[dst] = i as SaUint;
13201 bucket2[w] += 1;
13202 }
13203 i += 1;
13204 }
13205}
13206
13207#[doc(hidden)]
13209#[allow(dead_code, non_snake_case)]
13210pub fn unbwt_calculate_biPSI(
13211 t: &[u8],
13212 p: &mut [SaUint],
13213 bucket1: &mut [SaUint],
13214 bucket2: &mut [SaUint],
13215 index: FastUint,
13216 omp_block_start: FastSint,
13217 omp_block_end: FastSint,
13218) {
13219 unbwt_calculate_bi_psi(
13220 t,
13221 p,
13222 bucket1,
13223 bucket2,
13224 index,
13225 omp_block_start,
13226 omp_block_end,
13227 );
13228}
13229
13230#[doc(hidden)]
13232pub fn unbwt_init_single(
13233 t: &[u8],
13234 p: &mut [SaUint],
13235 n: SaSint,
13236 freq: Option<&[SaSint]>,
13237 i: &[SaUint],
13238 bucket2: &mut [SaUint],
13239 fastbits: &mut [u16],
13240) {
13241 let mut bucket1 = vec![0u64; ALPHABET_SIZE];
13242 let index = i[0] as usize;
13243 let lastc = t[0] as usize;
13244 let mut shift = 0usize;
13245 while (usize::try_from(n).expect("n must be non-negative") >> shift)
13246 > (1usize << UNBWT_FASTBITS)
13247 {
13248 shift += 1;
13249 }
13250
13251 if let Some(freq) = freq {
13252 for c in 0..ALPHABET_SIZE {
13253 bucket1[c] = freq[c] as SaUint;
13254 }
13255 } else {
13256 unbwt_compute_histogram(t, n as FastSint, &mut bucket1);
13257 }
13258
13259 bucket2.fill(0);
13260 unbwt_compute_bigram_histogram_single(t, &mut bucket1, bucket2, index);
13261 unbwt_calculate_fastbits(bucket2, fastbits, lastc, shift);
13262 unbwt_calculate_bi_psi(t, p, &mut bucket1, bucket2, index, 0, n as FastSint);
13263}
13264
13265#[doc(hidden)]
13267pub fn unbwt_compute_bigram_histogram_parallel(
13268 t: &[u8],
13269 index: FastUint,
13270 bucket1: &mut [SaUint],
13271 bucket2: &mut [SaUint],
13272 omp_block_start: FastSint,
13273 omp_block_size: FastSint,
13274) {
13275 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
13276 let end = start + usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
13277 for &c_u8 in &t[start..end] {
13278 let c = c_u8 as usize;
13279 let p = bucket1[c] as usize;
13280 bucket1[c] += 1;
13281 let tidx = index.wrapping_sub(p) as i64;
13282 if tidx != 0 {
13283 let src = p.wrapping_add((tidx >> 63) as usize);
13284 let w = ((t[src] as usize) << 8) + c;
13285 bucket2[w] += 1;
13286 }
13287 }
13288}
13289
13290#[doc(hidden)]
13292pub fn unbwt_init_parallel(
13293 t: &[u8],
13294 p: &mut [SaUint],
13295 n: SaSint,
13296 freq: Option<&[SaSint]>,
13297 i: &[SaUint],
13298 bucket2: &mut [SaUint],
13299 fastbits: &mut [u16],
13300 buckets: Option<&mut [SaUint]>,
13301 threads: SaSint,
13302) {
13303 let num_threads = usize::try_from(threads.max(1)).expect("threads must be non-negative");
13304 if num_threads <= 1 || usize::try_from(n).expect("n must be non-negative") < 65_536 {
13305 unbwt_init_single(t, p, n, freq, i, bucket2, fastbits);
13306 return;
13307 }
13308
13309 let buckets = match buckets {
13310 Some(buckets) => buckets,
13311 None => {
13312 unbwt_init_single(t, p, n, freq, i, bucket2, fastbits);
13313 return;
13314 }
13315 };
13316
13317 let segment_len = ALPHABET_SIZE + ALPHABET_SIZE * ALPHABET_SIZE;
13318 assert!(buckets.len() >= num_threads * segment_len);
13319
13320 let index = i[0] as usize;
13321 let lastc = t[0] as usize;
13322 let mut shift = 0usize;
13323 while (usize::try_from(n).expect("n must be non-negative") >> shift)
13324 > (1usize << UNBWT_FASTBITS)
13325 {
13326 shift += 1;
13327 }
13328
13329 let mut bucket1 = vec![0u64; ALPHABET_SIZE];
13330 bucket2.fill(0);
13331
13332 let n_fast = n as FastSint;
13333 let block_stride = (n_fast / num_threads as FastSint) & (-16);
13334 let mut block_starts = vec![0usize; num_threads];
13335 let mut block_sizes = vec![0usize; num_threads];
13336
13337 for thread in 0..num_threads {
13338 let start = usize::try_from(thread as FastSint * block_stride)
13339 .expect("block start must be non-negative");
13340 let size = if thread + 1 < num_threads {
13341 usize::try_from(block_stride).expect("block stride must be non-negative")
13342 } else {
13343 usize::try_from(n_fast - thread as FastSint * block_stride)
13344 .expect("block size must be non-negative")
13345 };
13346 block_starts[thread] = start;
13347 block_sizes[thread] = size;
13348
13349 let segment = &mut buckets[thread * segment_len..(thread + 1) * segment_len];
13350 let (bucket1_local, _) = segment.split_at_mut(ALPHABET_SIZE);
13351 bucket1_local.fill(0);
13352 unbwt_compute_histogram(&t[start..], size as FastSint, bucket1_local);
13353 }
13354
13355 for thread in 0..num_threads {
13356 let segment = &mut buckets[thread * segment_len..(thread + 1) * segment_len];
13357 let (bucket1_temp, _) = segment.split_at_mut(ALPHABET_SIZE);
13358 for c in 0..ALPHABET_SIZE {
13359 let a = bucket1[c];
13360 let b = bucket1_temp[c];
13361 bucket1[c] = a + b;
13362 bucket1_temp[c] = a;
13363 }
13364 }
13365
13366 let mut sum = 1usize;
13367 for c in 0..ALPHABET_SIZE {
13368 let prev = sum;
13369 sum += bucket1[c] as usize;
13370 bucket1[c] = prev as SaUint;
13371 }
13372
13373 for thread in 0..num_threads {
13374 let start = block_starts[thread];
13375 let size = block_sizes[thread];
13376 let segment = &mut buckets[thread * segment_len..(thread + 1) * segment_len];
13377 let (bucket1_local, bucket2_local) = segment.split_at_mut(ALPHABET_SIZE);
13378 for c in 0..ALPHABET_SIZE {
13379 bucket1_local[c] += bucket1[c];
13380 }
13381 bucket2_local.fill(0);
13382 unbwt_compute_bigram_histogram_parallel(
13383 t,
13384 index,
13385 bucket1_local,
13386 bucket2_local,
13387 start as FastSint,
13388 size as FastSint,
13389 );
13390 }
13391
13392 for thread in 0..num_threads {
13393 let segment = &mut buckets[thread * segment_len..(thread + 1) * segment_len];
13394 let (_, bucket2_temp) = segment.split_at_mut(ALPHABET_SIZE);
13395 for c in 0..ALPHABET_SIZE * ALPHABET_SIZE {
13396 let a = bucket2[c];
13397 let b = bucket2_temp[c];
13398 bucket2[c] = a + b;
13399 bucket2_temp[c] = a;
13400 }
13401 }
13402
13403 unbwt_calculate_fastbits(bucket2, fastbits, lastc, shift);
13404
13405 for thread in (1..num_threads).rev() {
13406 let src_start = (thread - 1) * segment_len;
13407 let dst_start = thread * segment_len;
13408 let (head, tail) = buckets.split_at_mut(dst_start);
13409 let src = &head[src_start..src_start + ALPHABET_SIZE];
13410 let dst = &mut tail[..ALPHABET_SIZE];
13411 dst.copy_from_slice(src);
13412 }
13413 buckets[..ALPHABET_SIZE].copy_from_slice(&bucket1);
13414
13415 for thread in 0..num_threads {
13416 let start = block_starts[thread];
13417 let size = block_sizes[thread];
13418 let segment = &mut buckets[thread * segment_len..(thread + 1) * segment_len];
13419 let (bucket1_local, bucket2_local) = segment.split_at_mut(ALPHABET_SIZE);
13420 for c in 0..ALPHABET_SIZE * ALPHABET_SIZE {
13421 bucket2_local[c] += bucket2[c];
13422 }
13423 unbwt_calculate_bi_psi(
13424 t,
13425 p,
13426 bucket1_local,
13427 bucket2_local,
13428 index,
13429 start as FastSint,
13430 (start + size) as FastSint,
13431 );
13432 }
13433
13434 let last_segment = &buckets[(num_threads - 1) * segment_len..num_threads * segment_len];
13435 let (_, last_bucket2) = last_segment.split_at(ALPHABET_SIZE);
13436 bucket2.copy_from_slice(last_bucket2);
13437}
13438
13439fn bswap16(value: u16) -> u16 {
13440 value.swap_bytes()
13441}
13442
13443fn unbwt_resolve_symbol(bucket2: &[SaUint], fastbits: &[u16], shift: FastUint, p: SaUint) -> u16 {
13444 let mut c = fastbits[(p as usize) >> shift];
13445 while bucket2[c as usize] <= p {
13446 c += 1;
13447 }
13448 c
13449}
13450
13451#[doc(hidden)]
13453pub fn unbwt_decode_1(
13454 u: &mut [u8],
13455 p: &[SaUint],
13456 bucket2: &[SaUint],
13457 fastbits: &[u16],
13458 shift: FastUint,
13459 i0: &mut FastUint,
13460 k: FastUint,
13461) {
13462 let words = &mut u[..2 * k];
13463 let mut p0 = *i0 as SaUint;
13464
13465 for i in 0..k {
13466 let c0 = unbwt_resolve_symbol(bucket2, fastbits, shift, p0);
13467 p0 = p[p0 as usize];
13468 let bytes = bswap16(c0).to_ne_bytes();
13469 words[2 * i] = bytes[0];
13470 words[2 * i + 1] = bytes[1];
13471 }
13472
13473 *i0 = p0 as FastUint;
13474}
13475
13476#[doc(hidden)]
13478pub fn unbwt_decode_2(
13479 u: &mut [u8],
13480 p: &[SaUint],
13481 bucket2: &[SaUint],
13482 fastbits: &[u16],
13483 shift: FastUint,
13484 r: FastUint,
13485 i0: &mut FastUint,
13486 i1: &mut FastUint,
13487 k: FastUint,
13488) {
13489 let width = 2 * k;
13490 unbwt_decode_1(&mut u[0..width], p, bucket2, fastbits, shift, i0, k);
13491 unbwt_decode_1(&mut u[r..r + width], p, bucket2, fastbits, shift, i1, k);
13492}
13493
13494#[doc(hidden)]
13496pub fn unbwt_decode_3(
13497 u: &mut [u8],
13498 p: &[SaUint],
13499 bucket2: &[SaUint],
13500 fastbits: &[u16],
13501 shift: FastUint,
13502 r: FastUint,
13503 i0: &mut FastUint,
13504 i1: &mut FastUint,
13505 i2: &mut FastUint,
13506 k: FastUint,
13507) {
13508 let width = 2 * k;
13509 unbwt_decode_1(&mut u[0..width], p, bucket2, fastbits, shift, i0, k);
13510 unbwt_decode_1(&mut u[r..r + width], p, bucket2, fastbits, shift, i1, k);
13511 unbwt_decode_1(
13512 &mut u[2 * r..2 * r + width],
13513 p,
13514 bucket2,
13515 fastbits,
13516 shift,
13517 i2,
13518 k,
13519 );
13520}
13521
13522#[doc(hidden)]
13524pub fn unbwt_decode_4(
13525 u: &mut [u8],
13526 p: &[SaUint],
13527 bucket2: &[SaUint],
13528 fastbits: &[u16],
13529 shift: FastUint,
13530 r: FastUint,
13531 i0: &mut FastUint,
13532 i1: &mut FastUint,
13533 i2: &mut FastUint,
13534 i3: &mut FastUint,
13535 k: FastUint,
13536) {
13537 let width = 2 * k;
13538 unbwt_decode_1(&mut u[0..width], p, bucket2, fastbits, shift, i0, k);
13539 unbwt_decode_1(&mut u[r..r + width], p, bucket2, fastbits, shift, i1, k);
13540 unbwt_decode_1(
13541 &mut u[2 * r..2 * r + width],
13542 p,
13543 bucket2,
13544 fastbits,
13545 shift,
13546 i2,
13547 k,
13548 );
13549 unbwt_decode_1(
13550 &mut u[3 * r..3 * r + width],
13551 p,
13552 bucket2,
13553 fastbits,
13554 shift,
13555 i3,
13556 k,
13557 );
13558}
13559
13560#[doc(hidden)]
13562pub fn unbwt_decode_5(
13563 u: &mut [u8],
13564 p: &[SaUint],
13565 bucket2: &[SaUint],
13566 fastbits: &[u16],
13567 shift: FastUint,
13568 r: FastUint,
13569 i0: &mut FastUint,
13570 i1: &mut FastUint,
13571 i2: &mut FastUint,
13572 i3: &mut FastUint,
13573 i4: &mut FastUint,
13574 k: FastUint,
13575) {
13576 let width = 2 * k;
13577 unbwt_decode_1(&mut u[0..width], p, bucket2, fastbits, shift, i0, k);
13578 unbwt_decode_1(&mut u[r..r + width], p, bucket2, fastbits, shift, i1, k);
13579 unbwt_decode_1(
13580 &mut u[2 * r..2 * r + width],
13581 p,
13582 bucket2,
13583 fastbits,
13584 shift,
13585 i2,
13586 k,
13587 );
13588 unbwt_decode_1(
13589 &mut u[3 * r..3 * r + width],
13590 p,
13591 bucket2,
13592 fastbits,
13593 shift,
13594 i3,
13595 k,
13596 );
13597 unbwt_decode_1(
13598 &mut u[4 * r..4 * r + width],
13599 p,
13600 bucket2,
13601 fastbits,
13602 shift,
13603 i4,
13604 k,
13605 );
13606}
13607
13608#[doc(hidden)]
13610pub fn unbwt_decode_6(
13611 u: &mut [u8],
13612 p: &[SaUint],
13613 bucket2: &[SaUint],
13614 fastbits: &[u16],
13615 shift: FastUint,
13616 r: FastUint,
13617 i0: &mut FastUint,
13618 i1: &mut FastUint,
13619 i2: &mut FastUint,
13620 i3: &mut FastUint,
13621 i4: &mut FastUint,
13622 i5: &mut FastUint,
13623 k: FastUint,
13624) {
13625 let width = 2 * k;
13626 unbwt_decode_1(&mut u[0..width], p, bucket2, fastbits, shift, i0, k);
13627 unbwt_decode_1(&mut u[r..r + width], p, bucket2, fastbits, shift, i1, k);
13628 unbwt_decode_1(
13629 &mut u[2 * r..2 * r + width],
13630 p,
13631 bucket2,
13632 fastbits,
13633 shift,
13634 i2,
13635 k,
13636 );
13637 unbwt_decode_1(
13638 &mut u[3 * r..3 * r + width],
13639 p,
13640 bucket2,
13641 fastbits,
13642 shift,
13643 i3,
13644 k,
13645 );
13646 unbwt_decode_1(
13647 &mut u[4 * r..4 * r + width],
13648 p,
13649 bucket2,
13650 fastbits,
13651 shift,
13652 i4,
13653 k,
13654 );
13655 unbwt_decode_1(
13656 &mut u[5 * r..5 * r + width],
13657 p,
13658 bucket2,
13659 fastbits,
13660 shift,
13661 i5,
13662 k,
13663 );
13664}
13665
13666#[doc(hidden)]
13668pub fn unbwt_decode_7(
13669 u: &mut [u8],
13670 p: &[SaUint],
13671 bucket2: &[SaUint],
13672 fastbits: &[u16],
13673 shift: FastUint,
13674 r: FastUint,
13675 i0: &mut FastUint,
13676 i1: &mut FastUint,
13677 i2: &mut FastUint,
13678 i3: &mut FastUint,
13679 i4: &mut FastUint,
13680 i5: &mut FastUint,
13681 i6: &mut FastUint,
13682 k: FastUint,
13683) {
13684 let width = 2 * k;
13685 unbwt_decode_1(&mut u[0..width], p, bucket2, fastbits, shift, i0, k);
13686 unbwt_decode_1(&mut u[r..r + width], p, bucket2, fastbits, shift, i1, k);
13687 unbwt_decode_1(
13688 &mut u[2 * r..2 * r + width],
13689 p,
13690 bucket2,
13691 fastbits,
13692 shift,
13693 i2,
13694 k,
13695 );
13696 unbwt_decode_1(
13697 &mut u[3 * r..3 * r + width],
13698 p,
13699 bucket2,
13700 fastbits,
13701 shift,
13702 i3,
13703 k,
13704 );
13705 unbwt_decode_1(
13706 &mut u[4 * r..4 * r + width],
13707 p,
13708 bucket2,
13709 fastbits,
13710 shift,
13711 i4,
13712 k,
13713 );
13714 unbwt_decode_1(
13715 &mut u[5 * r..5 * r + width],
13716 p,
13717 bucket2,
13718 fastbits,
13719 shift,
13720 i5,
13721 k,
13722 );
13723 unbwt_decode_1(
13724 &mut u[6 * r..6 * r + width],
13725 p,
13726 bucket2,
13727 fastbits,
13728 shift,
13729 i6,
13730 k,
13731 );
13732}
13733
13734#[doc(hidden)]
13736pub fn unbwt_decode_8(
13737 u: &mut [u8],
13738 p: &[SaUint],
13739 bucket2: &[SaUint],
13740 fastbits: &[u16],
13741 shift: FastUint,
13742 r: FastUint,
13743 i0: &mut FastUint,
13744 i1: &mut FastUint,
13745 i2: &mut FastUint,
13746 i3: &mut FastUint,
13747 i4: &mut FastUint,
13748 i5: &mut FastUint,
13749 i6: &mut FastUint,
13750 i7: &mut FastUint,
13751 k: FastUint,
13752) {
13753 let width = 2 * k;
13754 unbwt_decode_1(&mut u[0..width], p, bucket2, fastbits, shift, i0, k);
13755 unbwt_decode_1(&mut u[r..r + width], p, bucket2, fastbits, shift, i1, k);
13756 unbwt_decode_1(
13757 &mut u[2 * r..2 * r + width],
13758 p,
13759 bucket2,
13760 fastbits,
13761 shift,
13762 i2,
13763 k,
13764 );
13765 unbwt_decode_1(
13766 &mut u[3 * r..3 * r + width],
13767 p,
13768 bucket2,
13769 fastbits,
13770 shift,
13771 i3,
13772 k,
13773 );
13774 unbwt_decode_1(
13775 &mut u[4 * r..4 * r + width],
13776 p,
13777 bucket2,
13778 fastbits,
13779 shift,
13780 i4,
13781 k,
13782 );
13783 unbwt_decode_1(
13784 &mut u[5 * r..5 * r + width],
13785 p,
13786 bucket2,
13787 fastbits,
13788 shift,
13789 i5,
13790 k,
13791 );
13792 unbwt_decode_1(
13793 &mut u[6 * r..6 * r + width],
13794 p,
13795 bucket2,
13796 fastbits,
13797 shift,
13798 i6,
13799 k,
13800 );
13801 unbwt_decode_1(
13802 &mut u[7 * r..7 * r + width],
13803 p,
13804 bucket2,
13805 fastbits,
13806 shift,
13807 i7,
13808 k,
13809 );
13810}
13811
13812#[doc(hidden)]
13814pub fn unbwt_decode(
13815 u: &mut [u8],
13816 p: &[SaUint],
13817 n: SaSint,
13818 r: SaSint,
13819 i: &[SaUint],
13820 bucket2: &[SaUint],
13821 fastbits: &[u16],
13822 mut blocks: FastSint,
13823 remainder: FastUint,
13824) {
13825 let mut shift = 0usize;
13826 while (usize::try_from(n).expect("n must be non-negative") >> shift)
13827 > (1usize << UNBWT_FASTBITS)
13828 {
13829 shift += 1;
13830 }
13831 let mut offset = 0usize;
13832 let mut i_index = 0usize;
13833 let r_usize = usize::try_from(r).expect("r must be non-negative");
13834
13835 while blocks > 8 {
13836 let mut i0 = i[i_index] as FastUint;
13837 let mut i1 = i[i_index + 1] as FastUint;
13838 let mut i2 = i[i_index + 2] as FastUint;
13839 let mut i3 = i[i_index + 3] as FastUint;
13840 let mut i4 = i[i_index + 4] as FastUint;
13841 let mut i5 = i[i_index + 5] as FastUint;
13842 let mut i6 = i[i_index + 6] as FastUint;
13843 let mut i7 = i[i_index + 7] as FastUint;
13844 unbwt_decode_8(
13845 &mut u[offset..],
13846 p,
13847 bucket2,
13848 fastbits,
13849 shift,
13850 r_usize,
13851 &mut i0,
13852 &mut i1,
13853 &mut i2,
13854 &mut i3,
13855 &mut i4,
13856 &mut i5,
13857 &mut i6,
13858 &mut i7,
13859 r_usize >> 1,
13860 );
13861 i_index += 8;
13862 blocks -= 8;
13863 offset += 8 * r_usize;
13864 }
13865
13866 match blocks {
13867 1 => {
13868 let mut i0 = i[i_index] as FastUint;
13869 unbwt_decode_1(
13870 &mut u[offset..],
13871 p,
13872 bucket2,
13873 fastbits,
13874 shift,
13875 &mut i0,
13876 remainder >> 1,
13877 );
13878 }
13879 2 => {
13880 let mut i0 = i[i_index] as FastUint;
13881 let mut i1 = i[i_index + 1] as FastUint;
13882 unbwt_decode_2(
13883 &mut u[offset..],
13884 p,
13885 bucket2,
13886 fastbits,
13887 shift,
13888 r_usize,
13889 &mut i0,
13890 &mut i1,
13891 remainder >> 1,
13892 );
13893 unbwt_decode_1(
13894 &mut u[offset + 2 * (remainder >> 1)..],
13895 p,
13896 bucket2,
13897 fastbits,
13898 shift,
13899 &mut i0,
13900 (r_usize >> 1) - (remainder >> 1),
13901 );
13902 }
13903 3 => {
13904 let mut i0 = i[i_index] as FastUint;
13905 let mut i1 = i[i_index + 1] as FastUint;
13906 let mut i2 = i[i_index + 2] as FastUint;
13907 unbwt_decode_3(
13908 &mut u[offset..],
13909 p,
13910 bucket2,
13911 fastbits,
13912 shift,
13913 r_usize,
13914 &mut i0,
13915 &mut i1,
13916 &mut i2,
13917 remainder >> 1,
13918 );
13919 unbwt_decode_2(
13920 &mut u[offset + 2 * (remainder >> 1)..],
13921 p,
13922 bucket2,
13923 fastbits,
13924 shift,
13925 r_usize,
13926 &mut i0,
13927 &mut i1,
13928 (r_usize >> 1) - (remainder >> 1),
13929 );
13930 }
13931 4 => {
13932 let mut i0 = i[i_index] as FastUint;
13933 let mut i1 = i[i_index + 1] as FastUint;
13934 let mut i2 = i[i_index + 2] as FastUint;
13935 let mut i3 = i[i_index + 3] as FastUint;
13936 unbwt_decode_4(
13937 &mut u[offset..],
13938 p,
13939 bucket2,
13940 fastbits,
13941 shift,
13942 r_usize,
13943 &mut i0,
13944 &mut i1,
13945 &mut i2,
13946 &mut i3,
13947 remainder >> 1,
13948 );
13949 unbwt_decode_3(
13950 &mut u[offset + 2 * (remainder >> 1)..],
13951 p,
13952 bucket2,
13953 fastbits,
13954 shift,
13955 r_usize,
13956 &mut i0,
13957 &mut i1,
13958 &mut i2,
13959 (r_usize >> 1) - (remainder >> 1),
13960 );
13961 }
13962 5 => {
13963 let mut i0 = i[i_index] as FastUint;
13964 let mut i1 = i[i_index + 1] as FastUint;
13965 let mut i2 = i[i_index + 2] as FastUint;
13966 let mut i3 = i[i_index + 3] as FastUint;
13967 let mut i4 = i[i_index + 4] as FastUint;
13968 unbwt_decode_5(
13969 &mut u[offset..],
13970 p,
13971 bucket2,
13972 fastbits,
13973 shift,
13974 r_usize,
13975 &mut i0,
13976 &mut i1,
13977 &mut i2,
13978 &mut i3,
13979 &mut i4,
13980 remainder >> 1,
13981 );
13982 unbwt_decode_4(
13983 &mut u[offset + 2 * (remainder >> 1)..],
13984 p,
13985 bucket2,
13986 fastbits,
13987 shift,
13988 r_usize,
13989 &mut i0,
13990 &mut i1,
13991 &mut i2,
13992 &mut i3,
13993 (r_usize >> 1) - (remainder >> 1),
13994 );
13995 }
13996 6 => {
13997 let mut i0 = i[i_index] as FastUint;
13998 let mut i1 = i[i_index + 1] as FastUint;
13999 let mut i2 = i[i_index + 2] as FastUint;
14000 let mut i3 = i[i_index + 3] as FastUint;
14001 let mut i4 = i[i_index + 4] as FastUint;
14002 let mut i5 = i[i_index + 5] as FastUint;
14003 unbwt_decode_6(
14004 &mut u[offset..],
14005 p,
14006 bucket2,
14007 fastbits,
14008 shift,
14009 r_usize,
14010 &mut i0,
14011 &mut i1,
14012 &mut i2,
14013 &mut i3,
14014 &mut i4,
14015 &mut i5,
14016 remainder >> 1,
14017 );
14018 unbwt_decode_5(
14019 &mut u[offset + 2 * (remainder >> 1)..],
14020 p,
14021 bucket2,
14022 fastbits,
14023 shift,
14024 r_usize,
14025 &mut i0,
14026 &mut i1,
14027 &mut i2,
14028 &mut i3,
14029 &mut i4,
14030 (r_usize >> 1) - (remainder >> 1),
14031 );
14032 }
14033 7 => {
14034 let mut i0 = i[i_index] as FastUint;
14035 let mut i1 = i[i_index + 1] as FastUint;
14036 let mut i2 = i[i_index + 2] as FastUint;
14037 let mut i3 = i[i_index + 3] as FastUint;
14038 let mut i4 = i[i_index + 4] as FastUint;
14039 let mut i5 = i[i_index + 5] as FastUint;
14040 let mut i6 = i[i_index + 6] as FastUint;
14041 unbwt_decode_7(
14042 &mut u[offset..],
14043 p,
14044 bucket2,
14045 fastbits,
14046 shift,
14047 r_usize,
14048 &mut i0,
14049 &mut i1,
14050 &mut i2,
14051 &mut i3,
14052 &mut i4,
14053 &mut i5,
14054 &mut i6,
14055 remainder >> 1,
14056 );
14057 unbwt_decode_6(
14058 &mut u[offset + 2 * (remainder >> 1)..],
14059 p,
14060 bucket2,
14061 fastbits,
14062 shift,
14063 r_usize,
14064 &mut i0,
14065 &mut i1,
14066 &mut i2,
14067 &mut i3,
14068 &mut i4,
14069 &mut i5,
14070 (r_usize >> 1) - (remainder >> 1),
14071 );
14072 }
14073 8 => {
14074 let mut i0 = i[i_index] as FastUint;
14075 let mut i1 = i[i_index + 1] as FastUint;
14076 let mut i2 = i[i_index + 2] as FastUint;
14077 let mut i3 = i[i_index + 3] as FastUint;
14078 let mut i4 = i[i_index + 4] as FastUint;
14079 let mut i5 = i[i_index + 5] as FastUint;
14080 let mut i6 = i[i_index + 6] as FastUint;
14081 let mut i7 = i[i_index + 7] as FastUint;
14082 unbwt_decode_8(
14083 &mut u[offset..],
14084 p,
14085 bucket2,
14086 fastbits,
14087 shift,
14088 r_usize,
14089 &mut i0,
14090 &mut i1,
14091 &mut i2,
14092 &mut i3,
14093 &mut i4,
14094 &mut i5,
14095 &mut i6,
14096 &mut i7,
14097 remainder >> 1,
14098 );
14099 unbwt_decode_7(
14100 &mut u[offset + 2 * (remainder >> 1)..],
14101 p,
14102 bucket2,
14103 fastbits,
14104 shift,
14105 r_usize,
14106 &mut i0,
14107 &mut i1,
14108 &mut i2,
14109 &mut i3,
14110 &mut i4,
14111 &mut i5,
14112 &mut i6,
14113 (r_usize >> 1) - (remainder >> 1),
14114 );
14115 }
14116 _ => {}
14117 }
14118}
14119
14120#[doc(hidden)]
14122pub fn unbwt_decode_omp(
14123 t: &[u8],
14124 u: &mut [u8],
14125 p: &[SaUint],
14126 n: SaSint,
14127 r: SaSint,
14128 i: &[SaUint],
14129 bucket2: &[SaUint],
14130 fastbits: &[u16],
14131 threads: SaSint,
14132) {
14133 let lastc = t[0];
14134 let blocks = 1 + ((n as FastSint - 1) / r as FastSint);
14135 let remainder = usize::try_from(n).expect("n must be non-negative")
14136 - usize::try_from(r).expect("r must be non-negative")
14137 * (usize::try_from(blocks).expect("blocks") - 1);
14138 let max_threads = usize::try_from(blocks.min(threads.max(1) as FastSint))
14139 .expect("thread count must fit usize");
14140 let block_stride = usize::try_from(blocks).expect("blocks must be non-negative") / max_threads;
14141 let block_remainder =
14142 usize::try_from(blocks).expect("blocks must be non-negative") % max_threads;
14143 let r_usize = usize::try_from(r).expect("r must be non-negative");
14144
14145 for thread in 0..max_threads {
14146 let block_size = block_stride + usize::from(thread < block_remainder);
14147 let block_start = block_stride * thread + thread.min(block_remainder);
14148 unbwt_decode(
14149 &mut u[r_usize * block_start..],
14150 p,
14151 n,
14152 r,
14153 &i[block_start..],
14154 bucket2,
14155 fastbits,
14156 block_size as FastSint,
14157 if thread + 1 < max_threads {
14158 r_usize
14159 } else {
14160 remainder
14161 },
14162 );
14163 }
14164 u[usize::try_from(n).expect("n must be non-negative") - 1] = lastc;
14165}
14166
14167#[doc(hidden)]
14169pub fn unbwt_core(
14170 t: &[u8],
14171 u: &mut [u8],
14172 p: &mut [SaUint],
14173 n: SaSint,
14174 freq: Option<&[SaSint]>,
14175 r: SaSint,
14176 i: &[SaUint],
14177 bucket2: &mut [SaUint],
14178 fastbits: &mut [u16],
14179 buckets: Option<&mut [SaUint]>,
14180 threads: SaSint,
14181) -> SaSint {
14182 if threads > 1 && n >= 262_144 {
14183 unbwt_init_parallel(t, p, n, freq, i, bucket2, fastbits, buckets, threads);
14184 } else {
14185 unbwt_init_single(t, p, n, freq, i, bucket2, fastbits);
14186 }
14187
14188 unbwt_decode_omp(t, u, p, n, r, i, bucket2, fastbits, threads);
14189 0
14190}
14191
14192#[doc(hidden)]
14194pub fn unbwt_main(
14195 t: &[u8],
14196 u: &mut [u8],
14197 p: &mut [SaUint],
14198 n: SaSint,
14199 freq: Option<&[SaSint]>,
14200 r: SaSint,
14201 i: &[SaUint],
14202 threads: SaSint,
14203) -> SaSint {
14204 let mut shift = 0usize;
14205 while (usize::try_from(n).expect("n must be non-negative") >> shift)
14206 > (1usize << UNBWT_FASTBITS)
14207 {
14208 shift += 1;
14209 }
14210
14211 let mut bucket2 = vec![0u64; ALPHABET_SIZE * ALPHABET_SIZE];
14212 let mut fastbits =
14213 vec![0u16; 1 + (usize::try_from(n).expect("n must be non-negative") >> shift)];
14214 let mut buckets = if threads > 1 && n >= 262_144 {
14215 Some(vec![
14216 0u64;
14217 usize::try_from(threads)
14218 .expect("threads must be non-negative")
14219 * (ALPHABET_SIZE + ALPHABET_SIZE * ALPHABET_SIZE)
14220 ])
14221 } else {
14222 None
14223 };
14224
14225 unbwt_core(
14226 t,
14227 u,
14228 p,
14229 n,
14230 freq,
14231 r,
14232 i,
14233 &mut bucket2,
14234 &mut fastbits,
14235 buckets.as_deref_mut(),
14236 threads,
14237 )
14238}
14239
14240#[doc(hidden)]
14242pub fn unbwt_main_ctx(
14243 ctx: &mut UnbwtContext,
14244 t: &[u8],
14245 u: &mut [u8],
14246 p: &mut [SaUint],
14247 n: SaSint,
14248 freq: Option<&[SaSint]>,
14249 r: SaSint,
14250 i: &[SaUint],
14251) -> SaSint {
14252 if ctx.threads <= 0 {
14253 return -2;
14254 }
14255 let mut shift = 0usize;
14256 while (usize::try_from(n).expect("n must be non-negative") >> shift)
14257 > (1usize << UNBWT_FASTBITS)
14258 {
14259 shift += 1;
14260 }
14261 let required_fastbits = 1 + (usize::try_from(n).expect("n must be non-negative") >> shift);
14262 if ctx.bucket2.len() < ALPHABET_SIZE * ALPHABET_SIZE
14263 || ctx.fastbits.len() < required_fastbits
14264 || (ctx.threads > 1 && ctx.buckets.is_none())
14265 {
14266 return -2;
14267 }
14268
14269 unbwt_core(
14270 t,
14271 u,
14272 p,
14273 n,
14274 freq,
14275 r,
14276 i,
14277 &mut ctx.bucket2,
14278 &mut ctx.fastbits,
14279 ctx.buckets.as_deref_mut(),
14280 ctx.threads as SaSint,
14281 )
14282}
14283
14284pub fn libsais64_unbwt(
14294 t: &[u8],
14295 u: &mut [u8],
14296 a: &mut [SaSint],
14297 freq: Option<&[SaSint]>,
14298 i: SaSint,
14299) -> SaSint {
14300 libsais64_unbwt_aux(
14301 t,
14302 u,
14303 a,
14304 freq,
14305 SaSint::try_from(t.len()).expect("input length must fit SaSint"),
14306 &[i],
14307 )
14308}
14309
14310pub fn libsais64_unbwt_ctx(
14321 ctx: &mut UnbwtContext,
14322 t: &[u8],
14323 u: &mut [u8],
14324 a: &mut [SaSint],
14325 freq: Option<&[SaSint]>,
14326 i: SaSint,
14327) -> SaSint {
14328 libsais64_unbwt_aux_ctx(
14329 ctx,
14330 t,
14331 u,
14332 a,
14333 freq,
14334 SaSint::try_from(t.len()).expect("input length must fit SaSint"),
14335 &[i],
14336 )
14337}
14338
14339pub fn libsais64_unbwt_aux(
14350 t: &[u8],
14351 u: &mut [u8],
14352 a: &mut [SaSint],
14353 freq: Option<&[SaSint]>,
14354 r: SaSint,
14355 i: &[SaSint],
14356) -> SaSint {
14357 let t_len = t.len();
14358 let n = SaSint::try_from(t_len).expect("input length must fit SaSint");
14359 if u.len() < t_len
14360 || a.len() < t_len
14361 || freq.is_some_and(|freq| freq.len() < ALPHABET_SIZE)
14362 || (r != n && (r < 2 || (r & (r - 1)) != 0))
14363 {
14364 return -1;
14365 }
14366 let sample_count = if n == 0 {
14367 1
14368 } else {
14369 ((n - 1) / r + 1) as usize
14370 };
14371 if i.len() < sample_count {
14372 return -1;
14373 }
14374
14375 if n <= 1 {
14376 if i[0] != n {
14377 return -1;
14378 }
14379 if n == 1 {
14380 u[0] = t[0];
14381 }
14382 return 0;
14383 }
14384
14385 for t in 0..sample_count {
14386 let sample = i[t];
14387 if sample <= 0 || sample > n {
14388 return -1;
14389 }
14390 }
14391
14392 let i_uint: Vec<SaUint> = i
14393 .iter()
14394 .take(sample_count)
14395 .map(|&sample| SaUint::try_from(sample).expect("sample was validated positive"))
14396 .collect();
14397 let mut p = vec![0u64; t_len + 1];
14398 let result = unbwt_main(t, u, &mut p, n, freq, r, &i_uint, 1);
14399 for t in 0..t_len {
14400 a[t] = p[t] as SaSint;
14401 }
14402 result
14403}
14404
14405pub fn libsais64_unbwt_aux_ctx(
14417 ctx: &mut UnbwtContext,
14418 t: &[u8],
14419 u: &mut [u8],
14420 a: &mut [SaSint],
14421 freq: Option<&[SaSint]>,
14422 r: SaSint,
14423 i: &[SaSint],
14424) -> SaSint {
14425 let t_len = t.len();
14426 let n = SaSint::try_from(t_len).expect("input length must fit SaSint");
14427 if u.len() < t_len
14428 || a.len() < t_len
14429 || freq.is_some_and(|freq| freq.len() < ALPHABET_SIZE)
14430 || (r != n && (r < 2 || (r & (r - 1)) != 0))
14431 {
14432 return -1;
14433 }
14434 let sample_count = if n == 0 {
14435 1
14436 } else {
14437 ((n - 1) / r + 1) as usize
14438 };
14439 if i.len() < sample_count {
14440 return -1;
14441 }
14442
14443 if n <= 1 {
14444 if i[0] != n {
14445 return -1;
14446 }
14447 if n == 1 {
14448 u[0] = t[0];
14449 }
14450 return 0;
14451 }
14452
14453 for t in 0..sample_count {
14454 let sample = i[t];
14455 if sample <= 0 || sample > n {
14456 return -1;
14457 }
14458 }
14459
14460 let i_uint: Vec<SaUint> = i
14461 .iter()
14462 .take(sample_count)
14463 .map(|&sample| SaUint::try_from(sample).expect("sample was validated positive"))
14464 .collect();
14465 let mut p = vec![0u64; t_len + 1];
14466 let result = unbwt_main_ctx(ctx, t, u, &mut p, n, freq, r, &i_uint);
14467 for t in 0..t_len {
14468 a[t] = p[t] as SaSint;
14469 }
14470 result
14471}
14472
14473pub fn unbwt_create_ctx_omp(threads: SaSint) -> Option<UnbwtContext> {
14481 if threads < 0 {
14482 return None;
14483 }
14484 unbwt_create_ctx_main(normalize_omp_threads(threads))
14485}
14486
14487pub fn libsais64_unbwt_omp(
14498 t: &[u8],
14499 u: &mut [u8],
14500 a: &mut [SaSint],
14501 freq: Option<&[SaSint]>,
14502 i: SaSint,
14503 threads: SaSint,
14504) -> SaSint {
14505 libsais64_unbwt_aux_omp(
14506 t,
14507 u,
14508 a,
14509 freq,
14510 SaSint::try_from(t.len()).expect("input length must fit SaSint"),
14511 &[i],
14512 threads,
14513 )
14514}
14515
14516pub fn libsais64_unbwt_aux_omp(
14528 t: &[u8],
14529 u: &mut [u8],
14530 a: &mut [SaSint],
14531 freq: Option<&[SaSint]>,
14532 r: SaSint,
14533 i: &[SaSint],
14534 threads: SaSint,
14535) -> SaSint {
14536 let t_len = t.len();
14537 let n = SaSint::try_from(t_len).expect("input length must fit SaSint");
14538 if threads < 0
14539 || u.len() < t_len
14540 || a.len() < t_len
14541 || freq.is_some_and(|freq| freq.len() < ALPHABET_SIZE)
14542 || (r != n && (r < 2 || (r & (r - 1)) != 0))
14543 {
14544 return -1;
14545 }
14546 let sample_count = if n == 0 {
14547 1
14548 } else {
14549 ((n - 1) / r + 1) as usize
14550 };
14551 if i.len() < sample_count {
14552 return -1;
14553 }
14554
14555 if n <= 1 {
14556 if i[0] != n {
14557 return -1;
14558 }
14559 if n == 1 {
14560 u[0] = t[0];
14561 }
14562 return 0;
14563 }
14564
14565 for sample in i.iter().take(sample_count) {
14566 let sample = *sample;
14567 if sample <= 0 || sample > n {
14568 return -1;
14569 }
14570 }
14571
14572 let threads = if threads > 0 { threads } else { 1 };
14573 let i_uint: Vec<SaUint> = i
14574 .iter()
14575 .take(sample_count)
14576 .map(|&sample| SaUint::try_from(sample).expect("sample was validated positive"))
14577 .collect();
14578 let mut p = vec![0u64; t_len + 1];
14579 let result = unbwt_main(t, u, &mut p, n, freq, r, &i_uint, threads);
14580 for idx in 0..t_len {
14581 a[idx] = p[idx] as SaSint;
14582 }
14583 result
14584}
14585
14586#[doc(hidden)]
14588pub fn bwt_copy_8u(u: &mut [u8], a: &[SaSint], n: SaSint) {
14589 if n <= 0 {
14590 return;
14591 }
14592
14593 let n_usize = usize::try_from(n).expect("n must be non-negative");
14594 for i in 0..n_usize {
14595 u[i] = a[i] as u8;
14596 }
14597}
14598
14599#[doc(hidden)]
14601pub fn bwt_copy_8u_omp(u: &mut [u8], a: &[SaSint], n: SaSint, threads: SaSint) {
14602 if threads == 1 || n < 65_536 {
14603 bwt_copy_8u(u, a, n);
14604 return;
14605 }
14606
14607 let n_usize = usize::try_from(n).expect("n must be non-negative");
14608 assert!(u.len() >= n_usize);
14609 assert!(a.len() >= n_usize);
14610 let threads_usize = usize::try_from(threads).expect("threads must be non-negative");
14611 let chunk_size = ((n_usize / threads_usize) & !15usize).max(16);
14612 let a_ptr = a.as_ptr() as usize;
14613 run_rayon_with_threads(threads_usize, || {
14614 u[..n_usize]
14615 .par_chunks_mut(chunk_size)
14616 .enumerate()
14617 .for_each(|(chunk_index, chunk)| {
14618 let start = chunk_index * chunk_size;
14619 let dst_ptr = chunk.as_mut_ptr();
14620 let src_ptr = unsafe { (a_ptr as *const SaSint).add(start) };
14621 for offset in 0..chunk.len() {
14622 unsafe {
14623 *dst_ptr.add(offset) = *src_ptr.add(offset) as u8;
14624 }
14625 }
14626 });
14627 });
14628}
14629
14630#[doc(hidden)]
14632pub fn accumulate_counts_s32_2(bucket00: &mut [SaSint], bucket01: &[SaSint]) {
14633 assert_eq!(bucket00.len(), bucket01.len());
14634 for (dst, src) in bucket00.iter_mut().zip(bucket01.iter()) {
14635 *dst += *src;
14636 }
14637}
14638
14639#[doc(hidden)]
14641pub fn accumulate_counts_s32_3(bucket00: &mut [SaSint], bucket01: &[SaSint], bucket02: &[SaSint]) {
14642 assert_eq!(bucket00.len(), bucket01.len());
14643 assert_eq!(bucket00.len(), bucket02.len());
14644 for ((dst, src1), src2) in bucket00
14645 .iter_mut()
14646 .zip(bucket01.iter())
14647 .zip(bucket02.iter())
14648 {
14649 *dst += *src1 + *src2;
14650 }
14651}
14652
14653#[doc(hidden)]
14655pub fn accumulate_counts_s32_4(
14656 bucket00: &mut [SaSint],
14657 bucket01: &[SaSint],
14658 bucket02: &[SaSint],
14659 bucket03: &[SaSint],
14660) {
14661 assert_eq!(bucket00.len(), bucket01.len());
14662 assert_eq!(bucket00.len(), bucket02.len());
14663 assert_eq!(bucket00.len(), bucket03.len());
14664 for (((dst, src1), src2), src3) in bucket00
14665 .iter_mut()
14666 .zip(bucket01.iter())
14667 .zip(bucket02.iter())
14668 .zip(bucket03.iter())
14669 {
14670 *dst += *src1 + *src2 + *src3;
14671 }
14672}
14673
14674#[doc(hidden)]
14676pub fn accumulate_counts_s32_5(
14677 bucket00: &mut [SaSint],
14678 bucket01: &[SaSint],
14679 bucket02: &[SaSint],
14680 bucket03: &[SaSint],
14681 bucket04: &[SaSint],
14682) {
14683 assert_eq!(bucket00.len(), bucket01.len());
14684 assert_eq!(bucket00.len(), bucket02.len());
14685 assert_eq!(bucket00.len(), bucket03.len());
14686 assert_eq!(bucket00.len(), bucket04.len());
14687 for ((((dst, src1), src2), src3), src4) in bucket00
14688 .iter_mut()
14689 .zip(bucket01.iter())
14690 .zip(bucket02.iter())
14691 .zip(bucket03.iter())
14692 .zip(bucket04.iter())
14693 {
14694 *dst += *src1 + *src2 + *src3 + *src4;
14695 }
14696}
14697
14698#[doc(hidden)]
14700pub fn accumulate_counts_s32_6(
14701 bucket00: &mut [SaSint],
14702 bucket01: &[SaSint],
14703 bucket02: &[SaSint],
14704 bucket03: &[SaSint],
14705 bucket04: &[SaSint],
14706 bucket05: &[SaSint],
14707) {
14708 assert_eq!(bucket00.len(), bucket01.len());
14709 assert_eq!(bucket00.len(), bucket02.len());
14710 assert_eq!(bucket00.len(), bucket03.len());
14711 assert_eq!(bucket00.len(), bucket04.len());
14712 assert_eq!(bucket00.len(), bucket05.len());
14713 for (((((dst, src1), src2), src3), src4), src5) in bucket00
14714 .iter_mut()
14715 .zip(bucket01.iter())
14716 .zip(bucket02.iter())
14717 .zip(bucket03.iter())
14718 .zip(bucket04.iter())
14719 .zip(bucket05.iter())
14720 {
14721 *dst += *src1 + *src2 + *src3 + *src4 + *src5;
14722 }
14723}
14724
14725#[doc(hidden)]
14727pub fn accumulate_counts_s32_7(
14728 bucket00: &mut [SaSint],
14729 bucket01: &[SaSint],
14730 bucket02: &[SaSint],
14731 bucket03: &[SaSint],
14732 bucket04: &[SaSint],
14733 bucket05: &[SaSint],
14734 bucket06: &[SaSint],
14735) {
14736 assert_eq!(bucket00.len(), bucket01.len());
14737 assert_eq!(bucket00.len(), bucket02.len());
14738 assert_eq!(bucket00.len(), bucket03.len());
14739 assert_eq!(bucket00.len(), bucket04.len());
14740 assert_eq!(bucket00.len(), bucket05.len());
14741 assert_eq!(bucket00.len(), bucket06.len());
14742 for ((((((dst, src1), src2), src3), src4), src5), src6) in bucket00
14743 .iter_mut()
14744 .zip(bucket01.iter())
14745 .zip(bucket02.iter())
14746 .zip(bucket03.iter())
14747 .zip(bucket04.iter())
14748 .zip(bucket05.iter())
14749 .zip(bucket06.iter())
14750 {
14751 *dst += *src1 + *src2 + *src3 + *src4 + *src5 + *src6;
14752 }
14753}
14754
14755#[doc(hidden)]
14757pub fn accumulate_counts_s32_8(
14758 bucket00: &mut [SaSint],
14759 bucket01: &[SaSint],
14760 bucket02: &[SaSint],
14761 bucket03: &[SaSint],
14762 bucket04: &[SaSint],
14763 bucket05: &[SaSint],
14764 bucket06: &[SaSint],
14765 bucket07: &[SaSint],
14766) {
14767 assert_eq!(bucket00.len(), bucket01.len());
14768 assert_eq!(bucket00.len(), bucket02.len());
14769 assert_eq!(bucket00.len(), bucket03.len());
14770 assert_eq!(bucket00.len(), bucket04.len());
14771 assert_eq!(bucket00.len(), bucket05.len());
14772 assert_eq!(bucket00.len(), bucket06.len());
14773 assert_eq!(bucket00.len(), bucket07.len());
14774 for (((((((dst, src1), src2), src3), src4), src5), src6), src7) in bucket00
14775 .iter_mut()
14776 .zip(bucket01.iter())
14777 .zip(bucket02.iter())
14778 .zip(bucket03.iter())
14779 .zip(bucket04.iter())
14780 .zip(bucket05.iter())
14781 .zip(bucket06.iter())
14782 .zip(bucket07.iter())
14783 {
14784 *dst += *src1 + *src2 + *src3 + *src4 + *src5 + *src6 + *src7;
14785 }
14786}
14787
14788#[doc(hidden)]
14790pub fn accumulate_counts_s32_9(
14791 bucket00: &mut [SaSint],
14792 bucket01: &[SaSint],
14793 bucket02: &[SaSint],
14794 bucket03: &[SaSint],
14795 bucket04: &[SaSint],
14796 bucket05: &[SaSint],
14797 bucket06: &[SaSint],
14798 bucket07: &[SaSint],
14799 bucket08: &[SaSint],
14800) {
14801 assert_eq!(bucket00.len(), bucket01.len());
14802 assert_eq!(bucket00.len(), bucket02.len());
14803 assert_eq!(bucket00.len(), bucket03.len());
14804 assert_eq!(bucket00.len(), bucket04.len());
14805 assert_eq!(bucket00.len(), bucket05.len());
14806 assert_eq!(bucket00.len(), bucket06.len());
14807 assert_eq!(bucket00.len(), bucket07.len());
14808 assert_eq!(bucket00.len(), bucket08.len());
14809 for ((((((((dst, src1), src2), src3), src4), src5), src6), src7), src8) in bucket00
14810 .iter_mut()
14811 .zip(bucket01.iter())
14812 .zip(bucket02.iter())
14813 .zip(bucket03.iter())
14814 .zip(bucket04.iter())
14815 .zip(bucket05.iter())
14816 .zip(bucket06.iter())
14817 .zip(bucket07.iter())
14818 .zip(bucket08.iter())
14819 {
14820 *dst += *src1 + *src2 + *src3 + *src4 + *src5 + *src6 + *src7 + *src8;
14821 }
14822}
14823
14824#[doc(hidden)]
14826pub fn accumulate_counts_s32(
14827 buckets: &mut [SaSint],
14828 bucket_size: FastSint,
14829 bucket_stride: FastSint,
14830 mut num_buckets: FastSint,
14831) {
14832 if num_buckets <= 1 {
14833 return;
14834 }
14835
14836 let bucket_size = usize::try_from(bucket_size).expect("bucket_size must be non-negative");
14837 let bucket_stride = usize::try_from(bucket_stride).expect("bucket_stride must be non-negative");
14838 let num_buckets_usize = usize::try_from(num_buckets).expect("num_buckets must be non-negative");
14839 assert!(buckets.len() >= bucket_size + (num_buckets_usize - 1) * bucket_stride);
14840 let bucket00_start = (num_buckets_usize - 1) * bucket_stride;
14841
14842 while num_buckets >= 9 {
14843 let start = bucket00_start
14844 - usize::try_from(num_buckets - 9).expect("non-negative") * bucket_stride;
14845 accumulate_counts_at(buckets, start, bucket_size, bucket_stride, 9);
14846 num_buckets -= 8;
14847 }
14848
14849 match num_buckets {
14850 1 => {}
14851 2..=8 => accumulate_counts_at(
14852 buckets,
14853 bucket00_start,
14854 bucket_size,
14855 bucket_stride,
14856 usize::try_from(num_buckets).expect("non-negative"),
14857 ),
14858 _ => {}
14859 }
14860}
14861
14862fn block_slice<T>(slice: &[T], block_start: FastSint, block_size: FastSint) -> &[T] {
14863 let start = usize::try_from(block_start).expect("block_start must be non-negative");
14864 let len = usize::try_from(block_size).expect("block_size must be non-negative");
14865 &slice[start..start + len]
14866}
14867
14868#[allow(dead_code)]
14869struct SharedMutArray<'a> {
14870 ptr: *mut SaSint,
14871 len: usize,
14872 _marker: PhantomData<&'a mut [SaSint]>,
14873}
14874
14875#[allow(dead_code)]
14876impl<'a> SharedMutArray<'a> {
14877 fn new(slice: &'a mut [SaSint]) -> Self {
14878 Self {
14879 ptr: slice.as_mut_ptr(),
14880 len: slice.len(),
14881 _marker: PhantomData,
14882 }
14883 }
14884
14885 fn len(&self) -> usize {
14886 self.len
14887 }
14888
14889 fn slice_mut(&mut self, start: usize, len: usize) -> &mut [SaSint] {
14890 assert!(start <= self.len);
14891 assert!(len <= self.len - start);
14892 unsafe {
14893 std::slice::from_raw_parts_mut(self.ptr.add(start), len)
14897 }
14898 }
14899}
14900
14901fn accumulate_counts_at(
14902 buckets: &mut [SaSint],
14903 bucket00_start: usize,
14904 bucket_size: usize,
14905 bucket_stride: usize,
14906 count: usize,
14907) {
14908 assert!((2..=9).contains(&count));
14909 assert!(bucket00_start >= (count - 1) * bucket_stride);
14910
14911 let dst_end = bucket00_start + bucket_size;
14912 let mut sums = vec![0; bucket_size];
14913
14914 for i in 0..count {
14915 let start = bucket00_start - i * bucket_stride;
14916 let end = start + bucket_size;
14917 for (sum, value) in sums.iter_mut().zip(buckets[start..end].iter()) {
14918 *sum += *value;
14919 }
14920 }
14921
14922 buckets[bucket00_start..dst_end].copy_from_slice(&sums);
14923}
14924
14925#[doc(hidden)]
14927pub fn thread_state_size() -> usize {
14928 mem::size_of::<ThreadState>()
14929}
14930
14931#[cfg(all(test, feature = "upstream-c"))]
14932mod tests {
14933 use super::*;
14934
14935 unsafe extern "C" {
14936 fn probe_public_libsais64(t: *const u8, sa: *mut SaSint, n: SaSint, fs: SaSint) -> SaSint;
14937 fn probe_public_libsais64_freq(
14938 t: *const u8,
14939 sa: *mut SaSint,
14940 n: SaSint,
14941 fs: SaSint,
14942 freq: *mut SaSint,
14943 ) -> SaSint;
14944 fn probe_public_libsais64_gsa(
14945 t: *const u8,
14946 sa: *mut SaSint,
14947 n: SaSint,
14948 fs: SaSint,
14949 ) -> SaSint;
14950 fn probe_public_libsais64_gsa_freq(
14951 t: *const u8,
14952 sa: *mut SaSint,
14953 n: SaSint,
14954 fs: SaSint,
14955 freq: *mut SaSint,
14956 ) -> SaSint;
14957 fn probe_public_libsais64_long(
14958 t: *mut SaSint,
14959 sa: *mut SaSint,
14960 n: SaSint,
14961 k: SaSint,
14962 fs: SaSint,
14963 ) -> SaSint;
14964 fn probe_public_libsais64_bwt(
14965 t: *const u8,
14966 u: *mut u8,
14967 a: *mut SaSint,
14968 n: SaSint,
14969 fs: SaSint,
14970 ) -> SaSint;
14971 fn probe_public_libsais64_bwt_freq(
14972 t: *const u8,
14973 u: *mut u8,
14974 a: *mut SaSint,
14975 n: SaSint,
14976 fs: SaSint,
14977 freq: *mut SaSint,
14978 ) -> SaSint;
14979 fn probe_public_libsais64_bwt_aux(
14980 t: *const u8,
14981 u: *mut u8,
14982 a: *mut SaSint,
14983 n: SaSint,
14984 fs: SaSint,
14985 r: SaSint,
14986 i: *mut SaSint,
14987 ) -> SaSint;
14988 fn probe_public_libsais64_bwt_aux_freq(
14989 t: *const u8,
14990 u: *mut u8,
14991 a: *mut SaSint,
14992 n: SaSint,
14993 fs: SaSint,
14994 freq: *mut SaSint,
14995 r: SaSint,
14996 i: *mut SaSint,
14997 ) -> SaSint;
14998 fn probe_public_libsais64_unbwt(
14999 t: *const u8,
15000 u: *mut u8,
15001 a: *mut SaSint,
15002 n: SaSint,
15003 i: SaSint,
15004 ) -> SaSint;
15005 fn probe_public_libsais64_unbwt_freq(
15006 t: *const u8,
15007 u: *mut u8,
15008 a: *mut SaSint,
15009 n: SaSint,
15010 freq: *const SaSint,
15011 i: SaSint,
15012 ) -> SaSint;
15013 fn probe_public_libsais64_unbwt_aux(
15014 t: *const u8,
15015 u: *mut u8,
15016 a: *mut SaSint,
15017 n: SaSint,
15018 r: SaSint,
15019 i: *const SaSint,
15020 ) -> SaSint;
15021 fn probe_public_libsais64_unbwt_aux_freq(
15022 t: *const u8,
15023 u: *mut u8,
15024 a: *mut SaSint,
15025 n: SaSint,
15026 freq: *const SaSint,
15027 r: SaSint,
15028 i: *const SaSint,
15029 ) -> SaSint;
15030 fn probe_public_libsais64_plcp(
15031 t: *const u8,
15032 sa: *const SaSint,
15033 plcp: *mut SaSint,
15034 n: SaSint,
15035 ) -> SaSint;
15036 fn probe_public_libsais64_plcp_gsa(
15037 t: *const u8,
15038 sa: *const SaSint,
15039 plcp: *mut SaSint,
15040 n: SaSint,
15041 ) -> SaSint;
15042 fn probe_public_libsais64_lcp(
15043 plcp: *const SaSint,
15044 sa: *const SaSint,
15045 lcp: *mut SaSint,
15046 n: SaSint,
15047 ) -> SaSint;
15048 fn probe_libsais64_renumber_lms_suffixes_8u(
15049 sa: *mut SaSint,
15050 m: SaSint,
15051 name: SaSint,
15052 omp_block_start: FastSint,
15053 omp_block_size: FastSint,
15054 ) -> SaSint;
15055 fn probe_libsais64_gather_marked_lms_suffixes(
15056 sa: *mut SaSint,
15057 m: SaSint,
15058 l: FastSint,
15059 omp_block_start: FastSint,
15060 omp_block_size: FastSint,
15061 ) -> FastSint;
15062 fn probe_libsais64_renumber_and_gather_lms_suffixes_omp(
15063 sa: *mut SaSint,
15064 n: SaSint,
15065 m: SaSint,
15066 fs: SaSint,
15067 threads: SaSint,
15068 ) -> SaSint;
15069 fn probe_libsais64_renumber_distinct_lms_suffixes_32s_4k(
15070 sa: *mut SaSint,
15071 m: SaSint,
15072 name: SaSint,
15073 omp_block_start: FastSint,
15074 omp_block_size: FastSint,
15075 ) -> SaSint;
15076 fn probe_libsais64_renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(
15077 sa: *mut SaSint,
15078 n: SaSint,
15079 m: SaSint,
15080 threads: SaSint,
15081 ) -> SaSint;
15082 fn probe_libsais64_renumber_unique_and_nonunique_lms_suffixes_32s(
15083 t: *mut SaSint,
15084 sa: *mut SaSint,
15085 m: SaSint,
15086 f: SaSint,
15087 omp_block_start: FastSint,
15088 omp_block_size: FastSint,
15089 ) -> SaSint;
15090 fn probe_libsais64_renumber_unique_and_nonunique_lms_suffixes_32s_omp(
15091 t: *mut SaSint,
15092 sa: *mut SaSint,
15093 m: SaSint,
15094 threads: SaSint,
15095 ) -> SaSint;
15096 }
15097
15098 #[test]
15099 fn libsais64_align_up_matches_power_of_two_alignment() {
15100 assert_eq!(align_up(0, 4096), 0);
15101 assert_eq!(align_up(1, 4096), 4096);
15102 assert_eq!(align_up(4095, 4096), 4096);
15103 assert_eq!(align_up(4096, 4096), 4096);
15104 assert_eq!(align_up(4097, 4096), 8192);
15105 assert_eq!(align_up(65, 64), 128);
15106 }
15107
15108 #[test]
15109 fn libsais64_shared_mut_array_projects_mutable_spans_from_one_backing_buffer() {
15110 let mut backing = vec![1, 2, 3, 4, 5, 6];
15111 let len;
15112 {
15113 let mut shared = SharedMutArray::new(&mut backing);
15114 shared.slice_mut(1, 3).copy_from_slice(&[20, 30, 40]);
15115 shared.slice_mut(4, 2).copy_from_slice(&[50, 60]);
15116 len = shared.len();
15117 }
15118 assert_eq!(backing, vec![1, 20, 30, 40, 50, 60]);
15119 assert_eq!(len, 6);
15120 }
15121
15122 #[test]
15123 fn libsais64_create_ctx_main_matches_single_thread_layout() {
15124 let ctx = create_ctx_main(1).expect("context");
15125 assert_eq!(ctx.buckets.len(), 8 * ALPHABET_SIZE);
15126 assert_eq!(ctx.threads, 1);
15127 assert!(ctx.thread_state.is_none());
15128 }
15129
15130 #[test]
15131 fn libsais64_create_ctx_main_allocates_thread_state_for_multi_threaded_mode() {
15132 let ctx = create_ctx_main(3).expect("context");
15133 let states = ctx.thread_state.expect("thread state");
15134 assert_eq!(states.len(), 3);
15135 assert!(states
15136 .iter()
15137 .all(|state| state.buckets.len() == 4 * ALPHABET_SIZE));
15138 assert!(states
15139 .iter()
15140 .all(|state| state.cache.len() == LIBSAIS_PER_THREAD_CACHE_SIZE));
15141 }
15142
15143 #[test]
15144 fn libsais64_create_ctx_wraps_single_thread_main_context() {
15145 let ctx = create_ctx().expect("context");
15146 assert_eq!(ctx.threads, 1);
15147 assert_eq!(ctx.buckets.len(), 8 * ALPHABET_SIZE);
15148 assert!(ctx.thread_state.is_none());
15149 }
15150
15151 #[test]
15152 fn libsais64_free_ctx_accepts_context_value() {
15153 let ctx = create_ctx().expect("context");
15154 free_ctx(ctx);
15155 }
15156
15157 #[test]
15158 fn libsais64_unbwt_create_ctx_main_allocates_expected_buffers() {
15159 let ctx = unbwt_create_ctx_main(3).expect("context");
15160 assert_eq!(ctx.bucket2.len(), ALPHABET_SIZE * ALPHABET_SIZE);
15161 assert_eq!(ctx.fastbits.len(), 1 + (1 << UNBWT_FASTBITS));
15162 assert_eq!(
15163 ctx.buckets.as_ref().expect("parallel buckets").len(),
15164 3 * (ALPHABET_SIZE + ALPHABET_SIZE * ALPHABET_SIZE)
15165 );
15166 assert_eq!(ctx.threads, 3);
15167 }
15168
15169 #[test]
15170 fn libsais64_unbwt_compute_histogram_counts_bytes() {
15171 let t = b"banana";
15172 let mut count = vec![0u64; ALPHABET_SIZE];
15173 unbwt_compute_histogram(t, t.len() as FastSint, &mut count);
15174 assert_eq!(count[b'a' as usize], 3);
15175 assert_eq!(count[b'b' as usize], 1);
15176 assert_eq!(count[b'n' as usize], 2);
15177 }
15178
15179 #[test]
15180 fn libsais64_unbwt_transpose_bucket2_swaps_matrix_entries() {
15181 let mut bucket2 = vec![0u64; ALPHABET_SIZE * ALPHABET_SIZE];
15182 bucket2[(2 << 8) + 1] = 7;
15183 bucket2[(1 << 8) + 2] = 9;
15184 unbwt_transpose_bucket2(&mut bucket2);
15185 assert_eq!(bucket2[(1 << 8) + 2], 7);
15186 assert_eq!(bucket2[(2 << 8) + 1], 9);
15187 }
15188
15189 #[test]
15190 fn libsais64_unbwt_init_single_builds_monotone_fastbits_and_writes_psi() {
15191 let t = b"annb\x00aa";
15192 let mut p = vec![0u64; t.len() + 1];
15193 let mut bucket2 = vec![0u64; ALPHABET_SIZE * ALPHABET_SIZE];
15194 let mut fastbits = vec![0u16; 1 + (1 << UNBWT_FASTBITS)];
15195 let i = vec![4u64];
15196
15197 unbwt_init_single(
15198 t,
15199 &mut p,
15200 t.len() as SaSint,
15201 None,
15202 &i,
15203 &mut bucket2,
15204 &mut fastbits,
15205 );
15206
15207 assert!(fastbits
15208 .iter()
15209 .all(|&value| usize::from(value) < ALPHABET_SIZE * ALPHABET_SIZE));
15210 assert!(fastbits.iter().any(|&value| value != 0));
15211 assert!(p.iter().any(|&value| value != 0));
15212 }
15213
15214 #[test]
15215 fn libsais64_unbwt_init_parallel_currently_matches_single_initializer() {
15216 let t = b"annb\x00aa";
15217 let mut p_single = vec![0u64; t.len() + 1];
15218 let mut p_parallel = vec![0u64; t.len() + 1];
15219 let mut bucket2_single = vec![0u64; ALPHABET_SIZE * ALPHABET_SIZE];
15220 let mut bucket2_parallel = vec![0u64; ALPHABET_SIZE * ALPHABET_SIZE];
15221 let mut fastbits_single = vec![0u16; 1 + (1 << UNBWT_FASTBITS)];
15222 let mut fastbits_parallel = vec![0u16; 1 + (1 << UNBWT_FASTBITS)];
15223 let i = vec![4u64];
15224 let mut scratch = vec![0u64; 2 * (ALPHABET_SIZE + ALPHABET_SIZE * ALPHABET_SIZE)];
15225
15226 unbwt_init_single(
15227 t,
15228 &mut p_single,
15229 t.len() as SaSint,
15230 None,
15231 &i,
15232 &mut bucket2_single,
15233 &mut fastbits_single,
15234 );
15235 unbwt_init_parallel(
15236 t,
15237 &mut p_parallel,
15238 t.len() as SaSint,
15239 None,
15240 &i,
15241 &mut bucket2_parallel,
15242 &mut fastbits_parallel,
15243 Some(&mut scratch),
15244 2,
15245 );
15246
15247 assert_eq!(p_parallel, p_single);
15248 assert_eq!(bucket2_parallel, bucket2_single);
15249 assert_eq!(fastbits_parallel, fastbits_single);
15250 }
15251
15252 #[test]
15253 fn libsais64_unbwt_decode_1_writes_big_endian_symbol_words() {
15254 let mut u = vec![0u8; 4];
15255 let p = vec![1u64, 0u64];
15256 let mut bucket2 = vec![0u64; ALPHABET_SIZE * ALPHABET_SIZE];
15257 bucket2[0x1234] = 0;
15258 bucket2[0x1235] = 2;
15259 let mut fastbits = vec![0u16; 1 + (1 << UNBWT_FASTBITS)];
15260 fastbits[0] = 0x1234;
15261 let mut i0 = 0usize;
15262
15263 unbwt_decode_1(&mut u, &p, &bucket2, &fastbits, 0, &mut i0, 2);
15264
15265 assert_eq!(u, vec![0x12, 0x35, 0x12, 0x35]);
15266 assert_eq!(i0, 0);
15267 }
15268
15269 #[test]
15270 fn libsais64_unbwt_decode_dispatches_two_block_tail_shape() {
15271 let mut u = vec![0u8; 8];
15272 let p = vec![1u64, 0u64];
15273 let mut bucket2 = vec![0u64; ALPHABET_SIZE * ALPHABET_SIZE];
15274 bucket2[0x1234] = 0;
15275 bucket2[0x1235] = 2;
15276 let mut fastbits = vec![0u16; 1 + (1 << UNBWT_FASTBITS)];
15277 fastbits[0] = 0x1234;
15278 let i = vec![0u64, 0u64];
15279
15280 unbwt_decode(&mut u, &p, 4, 2, &i, &bucket2, &fastbits, 2, 2);
15281
15282 assert_eq!(u, vec![0x12, 0x35, 0x12, 0x35, 0x00, 0x00, 0x00, 0x00]);
15283 }
15284
15285 fn brute_force_suffix_array_u8(t: &[u8]) -> Vec<SaSint> {
15286 let mut sa: Vec<SaSint> = (0..t.len())
15287 .map(|index| SaSint::try_from(index).expect("index must fit SaSint"))
15288 .collect();
15289 sa.sort_by(|&lhs, &rhs| {
15290 t[usize::try_from(lhs).expect("non-negative")..]
15291 .cmp(&t[usize::try_from(rhs).expect("non-negative")..])
15292 });
15293 sa
15294 }
15295
15296 fn brute_force_plcp_u8(t: &[u8], sa: &[SaSint]) -> Vec<SaSint> {
15297 let mut rank = vec![0usize; t.len()];
15298 for (i, &suffix) in sa.iter().enumerate() {
15299 rank[usize::try_from(suffix).expect("suffix index must be non-negative")] = i;
15300 }
15301
15302 let mut plcp = vec![0; t.len()];
15303 for i in 0..t.len() {
15304 let r = rank[i];
15305 let prev = if r == 0 {
15306 t.len()
15307 } else {
15308 usize::try_from(sa[r - 1]).expect("suffix index must be non-negative")
15309 };
15310 if prev == t.len() {
15311 plcp[i] = 0;
15312 continue;
15313 }
15314
15315 let mut l = 0usize;
15316 while i + l < t.len() && prev + l < t.len() && t[i + l] == t[prev + l] {
15317 l += 1;
15318 }
15319 plcp[i] = l as SaSint;
15320 }
15321 plcp
15322 }
15323
15324 fn brute_force_lcp_from_sa_u8(t: &[u8], sa: &[SaSint]) -> Vec<SaSint> {
15325 let mut lcp = vec![0; sa.len()];
15326 for i in 0..sa.len() {
15327 let lhs = usize::try_from(sa[i]).expect("suffix index must be non-negative");
15328 let rhs = if i == 0 {
15329 sa.len()
15330 } else {
15331 usize::try_from(sa[i - 1]).expect("suffix index must be non-negative")
15332 };
15333 if rhs == sa.len() {
15334 lcp[i] = 0;
15335 continue;
15336 }
15337
15338 let mut l = 0usize;
15339 while lhs + l < t.len() && rhs + l < t.len() && t[lhs + l] == t[rhs + l] {
15340 l += 1;
15341 }
15342 lcp[i] = l as SaSint;
15343 }
15344 lcp
15345 }
15346
15347 fn make_libsais64_recursive_main_32s_text(repeats: usize) -> Vec<SaSint> {
15348 let motif = [9, 4, 9, 2, 9, 4, 9, 1];
15349 let mut t = Vec::with_capacity(repeats * motif.len() + 1);
15350 for _ in 0..repeats {
15351 t.extend_from_slice(&motif);
15352 }
15353 t.push(0);
15354 t
15355 }
15356
15357 fn make_libsais64_large_main_32s_stress_text(len: usize, alphabet: SaSint) -> Vec<SaSint> {
15358 let mut state: u32 = 0x1357_9bdf;
15359 let mut t = Vec::with_capacity(len + 1);
15360
15361 for i in 0..len {
15362 state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223);
15363 let mut value = ((state >> 16) % (alphabet as u32 - 1)) as SaSint + 1;
15364
15365 if i % 17 < 8 {
15366 value = ((i / 17) as SaSint % 11) + 1;
15367 }
15368 if i % 29 < 10 {
15369 value = (((i / 29) as SaSint * 3) % 19) + 1;
15370 }
15371 if i % 64 >= 48 {
15372 value = t[i - 48];
15373 }
15374
15375 t.push(value);
15376 }
15377
15378 t.push(0);
15379 t
15380 }
15381
15382 fn assert_libsais64_main_32s_entry_matches_public_c_long(
15383 t: Vec<SaSint>,
15384 k: SaSint,
15385 fs: SaSint,
15386 compare_full_sa: bool,
15387 ) {
15388 let n = t.len() as SaSint;
15389 let n_usize = t.len();
15390 let threads = 1;
15391 let extra = usize::try_from(fs).expect("fs must be non-negative");
15392
15393 let mut c_t = t.clone();
15394 let mut c_sa = vec![0; t.len() + extra];
15395 let c_result =
15396 unsafe { probe_public_libsais64_long(c_t.as_mut_ptr(), c_sa.as_mut_ptr(), n, k, fs) };
15397
15398 let mut rust_t = t;
15399 let mut rust_sa = vec![0; rust_t.len() + extra];
15400 let mut thread_state = alloc_thread_state(threads).expect("thread state");
15401 let rust_result = libsais64_main_32s_entry(
15402 &mut rust_t,
15403 &mut rust_sa,
15404 n,
15405 k,
15406 fs,
15407 threads,
15408 &mut thread_state,
15409 );
15410
15411 assert_eq!(rust_result, c_result);
15412 assert_eq!(rust_t, c_t);
15413 if compare_full_sa {
15414 assert_eq!(rust_sa, c_sa);
15415 } else {
15416 assert_eq!(&rust_sa[..n_usize], &c_sa[..n_usize]);
15417 }
15418 }
15419
15420 fn assert_libsais64_main_32s_entry_matches_public_c_long_for_branch(k: SaSint) {
15421 assert_libsais64_main_32s_entry_matches_public_c_long(
15422 vec![17, 3, 17, 9, 5, 9, 2, 11, 2, 7, 1, 7, 0],
15423 k,
15424 0,
15425 true,
15426 );
15427 }
15428
15429 #[test]
15430 fn libsais64_matches_bruteforce_suffix_array_for_small_text() {
15431 let t = b"banana";
15432 let mut sa = vec![0; t.len()];
15433 let mut freq = vec![0; ALPHABET_SIZE];
15434
15435 let result = libsais64(t, &mut sa, 0, Some(&mut freq));
15436
15437 assert_eq!(result, 0);
15438 assert_eq!(sa, brute_force_suffix_array_u8(t));
15439 assert_eq!(freq[b'a' as usize], 3);
15440 assert_eq!(freq[b'b' as usize], 1);
15441 assert_eq!(freq[b'n' as usize], 2);
15442 }
15443
15444 #[test]
15445 fn libsais64_int_matches_bruteforce_suffix_array_for_small_integer_text() {
15446 let mut t = vec![2, 1, 3, 1, 0];
15447 let expected = {
15448 let mut sa: Vec<SaSint> = (0..t.len())
15449 .map(|index| SaSint::try_from(index).expect("index must fit SaSint"))
15450 .collect();
15451 sa.sort_by(|&lhs, &rhs| {
15452 t[usize::try_from(lhs).expect("non-negative")..]
15453 .cmp(&t[usize::try_from(rhs).expect("non-negative")..])
15454 });
15455 sa
15456 };
15457 let mut sa = vec![0; t.len()];
15458
15459 let result = libsais64_int(&mut t, &mut sa, 4, 0);
15460
15461 assert_eq!(result, 0);
15462 assert_eq!(sa, expected);
15463 }
15464
15465 #[test]
15466 fn libsais64_plcp_matches_bruteforce_for_small_text() {
15467 let t = b"banana";
15468 let sa = brute_force_suffix_array_u8(t);
15469 let expected = brute_force_plcp_u8(t, &sa);
15470 let mut plcp = vec![0; t.len()];
15471
15472 let result = libsais64_plcp(t, &sa, &mut plcp);
15473
15474 assert_eq!(result, 0);
15475 assert_eq!(plcp, expected);
15476 }
15477
15478 #[test]
15479 fn libsais64_plcp_gsa_stops_at_separator() {
15480 let t = b"ab\0b\0";
15481 let sa = brute_force_suffix_array_u8(t);
15482 let mut plcp = vec![0; t.len()];
15483
15484 let result = libsais64_plcp_gsa(t, &sa, &mut plcp);
15485
15486 assert_eq!(result, 0);
15487 assert_eq!(plcp[2], 0);
15488 assert_eq!(plcp[4], 0);
15489 }
15490
15491 #[test]
15492 fn libsais64_lcp_matches_bruteforce_for_small_text() {
15493 let t = b"banana";
15494 let sa = brute_force_suffix_array_u8(t);
15495 let plcp = brute_force_plcp_u8(t, &sa);
15496 let expected = brute_force_lcp_from_sa_u8(t, &sa);
15497 let mut lcp = vec![0; t.len()];
15498
15499 let result = libsais64_lcp(&plcp, &sa, &mut lcp);
15500
15501 assert_eq!(result, 0);
15502 assert_eq!(lcp, expected);
15503 }
15504
15505 #[test]
15506 fn libsais64_unbwt_init_parallel_uses_block_partition_for_large_inputs() {
15507 let n = 70_003usize;
15508 let t: Vec<u8> = (0..n)
15509 .map(|i| i.wrapping_mul(37).wrapping_add(i >> 3) as u8)
15510 .collect();
15511 let i = [12_345u64];
15512
15513 let mut single_p = vec![0u64; n + 1];
15514 let mut threaded_p = vec![0u64; n + 1];
15515 let mut single_bucket2 = vec![0u64; ALPHABET_SIZE * ALPHABET_SIZE];
15516 let mut threaded_bucket2 = vec![0u64; ALPHABET_SIZE * ALPHABET_SIZE];
15517 let mut single_fastbits = vec![0u16; 1 + (1 << UNBWT_FASTBITS)];
15518 let mut threaded_fastbits = vec![0u16; 1 + (1 << UNBWT_FASTBITS)];
15519 let mut buckets = vec![0u64; 4 * (ALPHABET_SIZE + ALPHABET_SIZE * ALPHABET_SIZE)];
15520
15521 unbwt_init_single(
15522 &t,
15523 &mut single_p,
15524 n as SaSint,
15525 None,
15526 &i,
15527 &mut single_bucket2,
15528 &mut single_fastbits,
15529 );
15530 unbwt_init_parallel(
15531 &t,
15532 &mut threaded_p,
15533 n as SaSint,
15534 None,
15535 &i,
15536 &mut threaded_bucket2,
15537 &mut threaded_fastbits,
15538 Some(&mut buckets),
15539 4,
15540 );
15541
15542 assert_eq!(threaded_p, single_p);
15543 assert_eq!(threaded_bucket2, single_bucket2);
15544 assert_eq!(threaded_fastbits, single_fastbits);
15545 }
15546
15547 #[test]
15548 fn libsais64_radix_sort_lms_suffixes_8u_places_suffixes_by_bucket() {
15549 let t = vec![1_u8, 0, 1, 0];
15550 let mut sa = vec![9, 9, 9, 9, 0, 1, 2, 3];
15551 let mut induction_bucket = vec![0; 2 * ALPHABET_SIZE];
15552 induction_bucket[buckets_index2(0, 0)] = 2;
15553 induction_bucket[buckets_index2(1, 0)] = 4;
15554 radix_sort_lms_suffixes_8u(&t, &mut sa, &mut induction_bucket, 4, 4);
15555 assert_eq!(&sa[..4], &[1, 3, 0, 2]);
15556 }
15557
15558 #[test]
15559 fn libsais64_radix_sort_lms_suffixes_8u_omp_wraps_sequential_version() {
15560 let t = vec![9_u8, 1, 0, 1, 0];
15561 let mut sa = vec![9, 9, 9, 9, 9, 1, 2, 3, 4];
15562 let mut buckets = vec![0; 6 * ALPHABET_SIZE];
15563 buckets[4 * ALPHABET_SIZE + buckets_index2(0, 0)] = 2;
15564 buckets[4 * ALPHABET_SIZE + buckets_index2(1, 0)] = 4;
15565 let mut thread_state = alloc_thread_state(2).unwrap();
15566 radix_sort_lms_suffixes_8u_omp(&t, &mut sa, 9, 5, 0, &mut buckets, 2, &mut thread_state);
15567 assert_eq!(&sa[..4], &[2, 4, 1, 3]);
15568 }
15569
15570 #[test]
15571 fn libsais64_radix_sort_lms_suffixes_32s_6k_places_suffixes_by_bucket() {
15572 let t = vec![1, 0, 1, 0];
15573 let mut sa = vec![9, 9, 9, 9, 0, 1, 2, 3];
15574 let mut induction_bucket = vec![2, 4];
15575 radix_sort_lms_suffixes_32s_6k(&t, &mut sa, &mut induction_bucket, 4, 4);
15576 assert_eq!(&sa[..4], &[1, 3, 0, 2]);
15577 }
15578
15579 #[test]
15580 fn libsais64_radix_sort_lms_suffixes_32s_2k_places_suffixes_by_bucket() {
15581 let t = vec![1, 0, 1, 0];
15582 let mut sa = vec![9, 9, 9, 9, 0, 1, 2, 3];
15583 let mut induction_bucket = vec![2, 0, 4, 0];
15584 radix_sort_lms_suffixes_32s_2k(&t, &mut sa, &mut induction_bucket, 4, 4);
15585 assert_eq!(&sa[..4], &[1, 3, 0, 2]);
15586 }
15587
15588 #[test]
15589 fn libsais64_radix_sort_lms_suffixes_32s_6k_omp_wraps_sequential_version() {
15590 let t = vec![9, 1, 0, 1, 0];
15591 let mut sa = vec![9, 9, 9, 9, 9, 1, 2, 3, 4];
15592 let mut induction_bucket = vec![2, 4];
15593 let mut thread_state = alloc_thread_state(2).unwrap();
15594 radix_sort_lms_suffixes_32s_6k_omp(
15595 &t,
15596 &mut sa,
15597 9,
15598 5,
15599 &mut induction_bucket,
15600 2,
15601 &mut thread_state,
15602 );
15603 assert_eq!(&sa[..4], &[2, 4, 1, 3]);
15604 }
15605
15606 #[test]
15607 fn libsais64_radix_sort_lms_suffixes_32s_2k_omp_wraps_sequential_version() {
15608 let t = vec![9, 1, 0, 1, 0];
15609 let mut sa = vec![9, 9, 9, 9, 9, 1, 2, 3, 4];
15610 let mut induction_bucket = vec![2, 0, 4, 0];
15611 let mut thread_state = alloc_thread_state(2).unwrap();
15612 radix_sort_lms_suffixes_32s_2k_omp(
15613 &t,
15614 &mut sa,
15615 9,
15616 5,
15617 &mut induction_bucket,
15618 2,
15619 &mut thread_state,
15620 );
15621 assert_eq!(&sa[..4], &[2, 4, 1, 3]);
15622 }
15623
15624 #[test]
15625 fn libsais64_radix_sort_lms_suffixes_32s_block_omp_runs_cache_pipeline() {
15626 let t = vec![9, 1, 0, 1, 0];
15627 let mut sa_6k = vec![9, 9, 9, 9, 9, 1, 2, 3, 4];
15628 let mut bucket_6k = vec![2, 4];
15629 let mut cache = vec![ThreadCache::default(); 9];
15630 radix_sort_lms_suffixes_32s_6k_block_omp(
15631 &t,
15632 &mut sa_6k,
15633 &mut bucket_6k,
15634 &mut cache,
15635 5,
15636 4,
15637 2,
15638 );
15639 assert_eq!(&sa_6k[..4], &[2, 4, 1, 3]);
15640
15641 let mut sa_2k = vec![9, 9, 9, 9, 9, 1, 2, 3, 4];
15642 let mut bucket_2k = vec![2, 0, 4, 0];
15643 cache.fill(ThreadCache::default());
15644 radix_sort_lms_suffixes_32s_2k_block_omp(
15645 &t,
15646 &mut sa_2k,
15647 &mut bucket_2k,
15648 &mut cache,
15649 5,
15650 4,
15651 2,
15652 );
15653 assert_eq!(&sa_2k[..4], &[2, 4, 1, 3]);
15654 }
15655
15656 #[test]
15657 fn libsais64_radix_sort_lms_suffixes_8u_omp_uses_thread_state_for_large_inputs() {
15658 let m = 65_600usize;
15659 let n = 2 * m + 16;
15660 let start = n - m + 1;
15661 let t: Vec<u8> = (0..n).map(|i| (i % 4) as u8).collect();
15662 let suffixes: Vec<SaSint> = (0..m - 1).map(|i| i as SaSint).collect();
15663
15664 let mut buckets = vec![0; 6 * ALPHABET_SIZE];
15665 for &suffix in &suffixes {
15666 buckets[4 * ALPHABET_SIZE + buckets_index2(t[suffix as usize] as usize, 0)] += 1;
15667 }
15668 let mut sum = 0;
15669 for symbol in 0..ALPHABET_SIZE {
15670 let bucket = 4 * ALPHABET_SIZE + buckets_index2(symbol, 0);
15671 sum += buckets[bucket];
15672 buckets[bucket] = sum;
15673 }
15674
15675 let mut sa_single = vec![0; n];
15676 sa_single[start..start + suffixes.len()].copy_from_slice(&suffixes);
15677 let mut sa_threaded = sa_single.clone();
15678 let mut buckets_single = buckets.clone();
15679 let mut buckets_threaded = buckets;
15680 let mut thread_state = alloc_thread_state(4).unwrap();
15681 thread_state[3].m = m as FastSint;
15682
15683 radix_sort_lms_suffixes_8u_omp(
15684 &t,
15685 &mut sa_single,
15686 n as SaSint,
15687 m as SaSint,
15688 0,
15689 &mut buckets_single,
15690 1,
15691 &mut [],
15692 );
15693 radix_sort_lms_suffixes_8u_omp(
15694 &t,
15695 &mut sa_threaded,
15696 n as SaSint,
15697 m as SaSint,
15698 0,
15699 &mut buckets_threaded,
15700 4,
15701 &mut thread_state,
15702 );
15703
15704 assert_eq!(sa_threaded, sa_single);
15705 }
15706
15707 #[test]
15708 fn libsais64_radix_sort_lms_suffixes_32s_omp_uses_block_pipeline_for_large_inputs() {
15709 let m = 65_600usize;
15710 let n = 2 * m + 16;
15711 let start = n - m + 1;
15712 let t: Vec<SaSint> = (0..n).map(|i| (i % 4) as SaSint).collect();
15713 let suffixes: Vec<SaSint> = (0..m - 1).map(|i| i as SaSint).collect();
15714
15715 let mut bucket_ends = vec![0; 4];
15716 for &suffix in &suffixes {
15717 bucket_ends[t[suffix as usize] as usize] += 1;
15718 }
15719 let mut sum = 0;
15720 for bucket in &mut bucket_ends {
15721 sum += *bucket;
15722 *bucket = sum;
15723 }
15724
15725 let mut sa_single = vec![0; n];
15726 sa_single[start..start + suffixes.len()].copy_from_slice(&suffixes);
15727 let mut sa_threaded = sa_single.clone();
15728 let mut bucket_single = bucket_ends.clone();
15729 let mut bucket_threaded = bucket_ends.clone();
15730 let mut thread_state = alloc_thread_state(4).unwrap();
15731
15732 radix_sort_lms_suffixes_32s_6k_omp(
15733 &t,
15734 &mut sa_single,
15735 n as SaSint,
15736 m as SaSint,
15737 &mut bucket_single,
15738 1,
15739 &mut [],
15740 );
15741 radix_sort_lms_suffixes_32s_6k_omp(
15742 &t,
15743 &mut sa_threaded,
15744 n as SaSint,
15745 m as SaSint,
15746 &mut bucket_threaded,
15747 4,
15748 &mut thread_state,
15749 );
15750 assert_eq!(sa_threaded, sa_single);
15751 assert_eq!(bucket_threaded, bucket_single);
15752
15753 let mut bucket_2k = vec![0; 8];
15754 for (symbol, &end) in bucket_ends.iter().enumerate() {
15755 bucket_2k[buckets_index2(symbol, 0)] = end;
15756 }
15757 let mut sa_single = vec![0; n];
15758 sa_single[start..start + suffixes.len()].copy_from_slice(&suffixes);
15759 let mut sa_threaded = sa_single.clone();
15760 let mut bucket_single = bucket_2k.clone();
15761 let mut bucket_threaded = bucket_2k;
15762
15763 radix_sort_lms_suffixes_32s_2k_omp(
15764 &t,
15765 &mut sa_single,
15766 n as SaSint,
15767 m as SaSint,
15768 &mut bucket_single,
15769 1,
15770 &mut [],
15771 );
15772 radix_sort_lms_suffixes_32s_2k_omp(
15773 &t,
15774 &mut sa_threaded,
15775 n as SaSint,
15776 m as SaSint,
15777 &mut bucket_threaded,
15778 4,
15779 &mut thread_state,
15780 );
15781 assert_eq!(sa_threaded, sa_single);
15782 assert_eq!(bucket_threaded, bucket_single);
15783 }
15784
15785 #[test]
15786 fn libsais64_radix_sort_lms_suffixes_32s_1k_collects_lms_suffixes() {
15787 let t = vec![2, 1, 3, 1, 0];
15788 let mut sa = vec![0; t.len()];
15789 let mut buckets = vec![0, 2, 4, 5];
15790 let m = radix_sort_lms_suffixes_32s_1k(&t, &mut sa, t.len() as SaSint, &mut buckets);
15791 assert!(m >= 0);
15792 }
15793
15794 #[test]
15795 fn libsais64_radix_sort_set_markers_32s_6k_marks_target_suffixes() {
15796 let mut sa = vec![0; 6];
15797 let induction_bucket = vec![1, 3, 5];
15798 radix_sort_set_markers_32s_6k(&mut sa, &induction_bucket, 0, 3);
15799 assert_eq!(sa[1], SAINT_MIN);
15800 assert_eq!(sa[3], SAINT_MIN);
15801 assert_eq!(sa[5], SAINT_MIN);
15802 }
15803
15804 #[test]
15805 fn libsais64_radix_sort_set_markers_32s_4k_marks_target_suffixes() {
15806 let mut sa = vec![0; 6];
15807 let induction_bucket = vec![1, 0, 3, 0, 5, 0];
15808 radix_sort_set_markers_32s_4k(&mut sa, &induction_bucket, 0, 3);
15809 assert_eq!(sa[1], SUFFIX_GROUP_MARKER);
15810 assert_eq!(sa[3], SUFFIX_GROUP_MARKER);
15811 assert_eq!(sa[5], SUFFIX_GROUP_MARKER);
15812 }
15813
15814 #[test]
15815 fn libsais64_radix_sort_set_markers_32s_6k_omp_wraps_sequential_version() {
15816 let mut sa = vec![0; 6];
15817 let induction_bucket = vec![1, 3, 5];
15818 radix_sort_set_markers_32s_6k_omp(&mut sa, 4, &induction_bucket, 2);
15819 assert_eq!(sa[1], SAINT_MIN);
15820 assert_eq!(sa[3], SAINT_MIN);
15821 assert_eq!(sa[5], SAINT_MIN);
15822 }
15823
15824 #[test]
15825 fn libsais64_radix_sort_set_markers_32s_4k_omp_wraps_sequential_version() {
15826 let mut sa = vec![0; 6];
15827 let induction_bucket = vec![1, 0, 3, 0, 5, 0];
15828 radix_sort_set_markers_32s_4k_omp(&mut sa, 4, &induction_bucket, 2);
15829 assert_eq!(sa[1], SUFFIX_GROUP_MARKER);
15830 assert_eq!(sa[3], SUFFIX_GROUP_MARKER);
15831 assert_eq!(sa[5], SUFFIX_GROUP_MARKER);
15832 }
15833
15834 #[test]
15835 fn libsais64_radix_sort_set_markers_32s_omp_partitions_large_inputs() {
15836 let k = 65_600usize;
15837 let induction_bucket_6k: Vec<SaSint> = (0..k).map(|i| i as SaSint).collect();
15838 let mut sa_single = vec![0; k];
15839 let mut sa_threaded = vec![0; k];
15840 radix_sort_set_markers_32s_6k_omp(&mut sa_single, k as SaSint, &induction_bucket_6k, 1);
15841 radix_sort_set_markers_32s_6k_omp(&mut sa_threaded, k as SaSint, &induction_bucket_6k, 4);
15842 assert_eq!(sa_threaded, sa_single);
15843
15844 let mut induction_bucket_4k = vec![0; 2 * k];
15845 for i in 0..k {
15846 induction_bucket_4k[buckets_index2(i, 0)] = i as SaSint;
15847 }
15848 let mut sa_single = vec![0; k];
15849 let mut sa_threaded = vec![0; k];
15850 radix_sort_set_markers_32s_4k_omp(&mut sa_single, k as SaSint, &induction_bucket_4k, 1);
15851 radix_sort_set_markers_32s_4k_omp(&mut sa_threaded, k as SaSint, &induction_bucket_4k, 4);
15852 assert_eq!(sa_threaded, sa_single);
15853 }
15854
15855 #[test]
15856 fn libsais64_partial_sorting_scan_left_to_right_8u_emits_induced_suffixes() {
15857 let t = vec![2_u8, 1, 3, 1, 0];
15858 let mut sa = vec![2 | SAINT_MIN, 4, 0, 0, 0, 0];
15859 let mut buckets = vec![0; 6 * ALPHABET_SIZE];
15860 buckets[4 * ALPHABET_SIZE + buckets_index2(1, 0)] = 2;
15861 let d = partial_sorting_scan_left_to_right_8u(&t, &mut sa, &mut buckets, 0, 0, 2);
15862 assert!(d >= 0);
15863 assert!(sa.iter().any(|&v| v != 0));
15864 }
15865
15866 #[test]
15867 fn libsais64_partial_sorting_scan_left_to_right_8u_omp_wraps_sequential_version() {
15868 let t = vec![2_u8, 1, 3, 1, 0];
15869 let mut sa = vec![0; 8];
15870 let mut buckets = vec![0; 6 * ALPHABET_SIZE];
15871 buckets[4 * ALPHABET_SIZE + buckets_index2(0, 0)] = 1;
15872 let mut thread_state = alloc_thread_state(2).unwrap();
15873 let d = partial_sorting_scan_left_to_right_8u_omp(
15874 &t,
15875 &mut sa,
15876 5,
15877 4,
15878 &mut buckets,
15879 0,
15880 0,
15881 2,
15882 &mut thread_state,
15883 );
15884 assert!(d >= 1);
15885 }
15886
15887 #[test]
15888 fn libsais64_partial_sorting_scan_left_to_right_32s_6k_emits_induced_suffixes() {
15889 let t = vec![2, 1, 3, 1, 0];
15890 let mut sa = vec![2 | SAINT_MIN, 4, 0, 0, 0, 0];
15891 let mut buckets = vec![0; 4 * 4];
15892 buckets[buckets_index4(1, 0)] = 2;
15893 let d = partial_sorting_scan_left_to_right_32s_6k(&t, &mut sa, &mut buckets, 0, 0, 2);
15894 assert!(d >= 0);
15895 assert!(sa.iter().any(|&v| v != 0));
15896 }
15897
15898 #[test]
15899 fn libsais64_partial_sorting_scan_left_to_right_32s_4k_emits_induced_suffixes() {
15900 let t = vec![2, 1, 3, 1, 0];
15901 let k = 4usize;
15902 let mut sa = vec![2 | SUFFIX_GROUP_MARKER, 4, 0, 0, 0, 0];
15903 let mut buckets = vec![0; 4 * k];
15904 buckets[2 * k + 1] = 2;
15905 let d = partial_sorting_scan_left_to_right_32s_4k(
15906 &t,
15907 &mut sa,
15908 k as SaSint,
15909 &mut buckets,
15910 0,
15911 0,
15912 2,
15913 );
15914 assert!(d >= 0);
15915 assert!(sa.iter().any(|&v| v != 0));
15916 }
15917
15918 #[test]
15919 fn libsais64_partial_sorting_scan_left_to_right_32s_1k_emits_induced_suffixes() {
15920 let t = vec![2, 1, 3, 1, 0];
15921 let mut sa = vec![2, 4, 0, 0, 0, 0];
15922 let mut buckets = vec![0; 4];
15923 buckets[1] = 2;
15924 partial_sorting_scan_left_to_right_32s_1k(&t, &mut sa, &mut buckets, 0, 2);
15925 assert!(sa.iter().any(|&v| v != 0));
15926 }
15927
15928 #[test]
15929 fn libsais64_partial_sorting_scan_left_to_right_32s_6k_omp_wraps_sequential_version() {
15930 let t = vec![2, 1, 3, 1, 0];
15931 let mut sa = vec![0; 8];
15932 let mut buckets = vec![0; 4 * 4];
15933 let mut thread_state = alloc_thread_state(2).unwrap();
15934 let d = partial_sorting_scan_left_to_right_32s_6k_omp(
15935 &t,
15936 &mut sa,
15937 5,
15938 &mut buckets,
15939 0,
15940 0,
15941 2,
15942 &mut thread_state,
15943 );
15944 assert!(d >= 1);
15945 }
15946
15947 #[test]
15948 fn libsais64_partial_sorting_scan_left_to_right_32s_4k_omp_wraps_sequential_version() {
15949 let t = vec![2, 1, 3, 1, 0];
15950 let k = 4usize;
15951 let mut sa = vec![0; 8];
15952 let mut buckets = vec![0; 4 * k];
15953 let mut thread_state = alloc_thread_state(2).unwrap();
15954 let d = partial_sorting_scan_left_to_right_32s_4k_omp(
15955 &t,
15956 &mut sa,
15957 5,
15958 k as SaSint,
15959 &mut buckets,
15960 0,
15961 2,
15962 &mut thread_state,
15963 );
15964 assert!(d >= 1);
15965 }
15966
15967 #[test]
15968 fn libsais64_partial_sorting_scan_left_to_right_32s_1k_omp_wraps_sequential_version() {
15969 let t = vec![2, 1, 3, 1, 0];
15970 let mut sa = vec![0; 8];
15971 let mut buckets = vec![0; 4];
15972 let mut thread_state = alloc_thread_state(2).unwrap();
15973 partial_sorting_scan_left_to_right_32s_1k_omp(
15974 &t,
15975 &mut sa,
15976 5,
15977 &mut buckets,
15978 2,
15979 &mut thread_state,
15980 );
15981 assert!(sa.iter().any(|&v| v != 0));
15982 }
15983
15984 #[test]
15985 fn libsais64_partial_sorting_scan_left_to_right_32s_6k_block_gather_records_bucket_symbols() {
15986 let t = vec![3, 1, 2, 0];
15987 let mut sa = vec![2 | SAINT_MIN, 0, 0, 0];
15988 let mut cache = vec![ThreadCache::default(); 1];
15989
15990 partial_sorting_scan_left_to_right_32s_6k_block_gather(&t, &mut sa, &mut cache, 0, 1);
15991
15992 assert_eq!(cache[0].index, 2 | SAINT_MIN);
15993 assert_eq!(cache[0].symbol, buckets_index4(1, 1) as SaSint);
15994 }
15995
15996 #[test]
15997 fn libsais64_partial_sorting_scan_left_to_right_32s_1k_block_gather_zeroes_positive_entries() {
15998 let t = vec![3, 1, 2, 0];
15999 let mut sa = vec![2, 0, 0, 0];
16000 let mut cache = vec![ThreadCache::default(); 1];
16001
16002 partial_sorting_scan_left_to_right_32s_1k_block_gather(&t, &mut sa, &mut cache, 0, 1);
16003
16004 assert_eq!(cache[0].symbol, 1);
16005 assert_eq!(cache[0].index, 1);
16006 assert_eq!(sa[0], 0);
16007 }
16008
16009 #[test]
16010 fn libsais64_partial_sorting_scan_left_to_right_32s_1k_block_omp_uses_relative_cache() {
16011 let block_start = 20_000usize;
16012 let block_size = 16_384usize;
16013 let n = block_start + block_size + 8;
16014 let t = vec![1; n];
16015 let suffixes: Vec<SaSint> = (2..2 + block_size).map(|i| i as SaSint).collect();
16016
16017 let mut sa_single = vec![0; n];
16018 sa_single[block_start..block_start + block_size].copy_from_slice(&suffixes);
16019 let mut sa_threaded = sa_single.clone();
16020 let mut bucket_single = vec![0, 0];
16021 let mut bucket_threaded = bucket_single.clone();
16022 let mut cache = vec![ThreadCache::default(); 4 * LIBSAIS_PER_THREAD_CACHE_SIZE];
16023
16024 partial_sorting_scan_left_to_right_32s_1k(
16025 &t,
16026 &mut sa_single,
16027 &mut bucket_single,
16028 block_start as FastSint,
16029 block_size as FastSint,
16030 );
16031 partial_sorting_scan_left_to_right_32s_1k_block_omp(
16032 &t,
16033 &mut sa_threaded,
16034 &mut bucket_threaded,
16035 &mut cache,
16036 block_start as FastSint,
16037 block_size as FastSint,
16038 4,
16039 );
16040
16041 assert_eq!(sa_threaded, sa_single);
16042 assert_eq!(bucket_threaded, bucket_single);
16043 }
16044
16045 #[test]
16046 fn libsais64_partial_sorting_scan_left_to_right_8u_block_prepare_records_cache_and_counts() {
16047 let t = vec![2_u8, 1, 3, 1, 0];
16048 let sa = vec![2 | SAINT_MIN, 4, 0, 0, 0, 0];
16049 let k = 4;
16050 let mut buckets = vec![0; 4 * k];
16051 let mut cache = vec![ThreadCache::default(); 8];
16052 let mut state = ThreadState::new();
16053 let (position, count) = partial_sorting_scan_left_to_right_8u_block_prepare(
16054 &t,
16055 &sa,
16056 k as SaSint,
16057 &mut buckets,
16058 &mut cache,
16059 0,
16060 2,
16061 );
16062 state.position = position;
16063 state.count = count;
16064 assert!(state.count >= 1);
16065 assert!(cache
16066 .iter()
16067 .take(state.count as usize)
16068 .any(|entry| entry.symbol >= 0));
16069 }
16070
16071 #[test]
16072 fn libsais64_partial_sorting_scan_left_to_right_8u_block_place_writes_induced_values() {
16073 let mut sa = vec![0; 8];
16074 let mut buckets = vec![0; 8];
16075 buckets[0] = 0;
16076 buckets[1] = 1;
16077 let cache = vec![
16078 ThreadCache {
16079 index: 3 | SAINT_MIN,
16080 symbol: 0,
16081 },
16082 ThreadCache {
16083 index: 5,
16084 symbol: 1,
16085 },
16086 ];
16087 partial_sorting_scan_left_to_right_8u_block_place(&mut sa, &mut buckets, 2, &cache, 2, 0);
16088 assert!(sa[0] != 0 || sa[1] != 0);
16089 }
16090
16091 #[test]
16092 fn libsais64_partial_sorting_scan_left_to_right_8u_block_omp_wraps_sequential_version() {
16093 let t = vec![2_u8, 1, 3, 1, 0];
16094 let mut sa = vec![2 | SAINT_MIN, 4, 0, 0, 0, 0];
16095 let mut buckets = vec![0; 6 * ALPHABET_SIZE];
16096 let mut thread_state = alloc_thread_state(2).unwrap();
16097 let d = partial_sorting_scan_left_to_right_8u_block_omp(
16098 &t,
16099 &mut sa,
16100 4,
16101 &mut buckets,
16102 0,
16103 0,
16104 2,
16105 2,
16106 &mut thread_state,
16107 );
16108 assert!(d >= 0);
16109 }
16110
16111 #[test]
16112 fn libsais64_partial_sorting_shift_buckets_32s_6k_moves_temp_bucket_view_into_main_slots() {
16113 let k = 3usize;
16114 let mut buckets = vec![0; 6 * k];
16115 buckets[4 * k] = 10;
16116 buckets[4 * k + 1] = 11;
16117 buckets[4 * k + 2] = 12;
16118 buckets[4 * k + 3] = 13;
16119 partial_sorting_shift_buckets_32s_6k(k as SaSint, &mut buckets);
16120 assert_eq!(buckets[0], 10);
16121 assert_eq!(buckets[1], 11);
16122 assert_eq!(buckets[4], 12);
16123 assert_eq!(buckets[5], 13);
16124 }
16125
16126 #[test]
16127 fn libsais64_partial_sorting_scan_right_to_left_8u_emits_induced_suffixes() {
16128 let t = vec![0_u8, 1, 2, 1, 0];
16129 let mut sa = vec![0, 0, 4 | SAINT_MIN];
16130 let mut buckets = vec![0; 4 * ALPHABET_SIZE];
16131 buckets[buckets_index2(1, 1)] = 2;
16132
16133 let d = partial_sorting_scan_right_to_left_8u(&t, &mut sa, &mut buckets, 0, 2, 1);
16134
16135 assert_eq!(d, 1);
16136 assert_eq!(sa[1], 3 | SAINT_MIN);
16137 assert_eq!(buckets[buckets_index2(1, 1)], 1);
16138 assert_eq!(buckets[2 * ALPHABET_SIZE + buckets_index2(1, 1)], 1);
16139 }
16140
16141 #[test]
16142 fn libsais64_partial_gsa_scan_right_to_left_8u_skips_separator_bucket() {
16143 let t = vec![1_u8, 0, 0];
16144 let mut sa = vec![0, 2 | SAINT_MIN];
16145 let mut buckets = vec![0; 4 * ALPHABET_SIZE];
16146 buckets[buckets_index2(0, 1)] = 2;
16147
16148 let d = partial_gsa_scan_right_to_left_8u(&t, &mut sa, &mut buckets, 0, 1, 1);
16149
16150 assert_eq!(d, 1);
16151 assert_eq!(sa, vec![0, 2 | SAINT_MIN]);
16152 assert_eq!(buckets[buckets_index2(0, 1)], 2);
16153 }
16154
16155 #[test]
16156 fn libsais64_partial_sorting_scan_right_to_left_32s_6k_emits_induced_suffixes() {
16157 let t = vec![0, 1, 2, 1, 0];
16158 let mut sa = vec![0, 0, 4 | SAINT_MIN];
16159 let mut buckets = vec![0; 4 * 3];
16160 buckets[buckets_index4(1, 1)] = 2;
16161
16162 let d = partial_sorting_scan_right_to_left_32s_6k(&t, &mut sa, &mut buckets, 0, 2, 1);
16163
16164 assert_eq!(d, 1);
16165 assert_eq!(sa[1], 3 | SAINT_MIN);
16166 assert_eq!(buckets[buckets_index4(1, 1)], 1);
16167 assert_eq!(buckets[buckets_index4(1, 1) + 2], 1);
16168 }
16169
16170 #[test]
16171 fn libsais64_partial_sorting_scan_right_to_left_32s_1k_omp_wraps_sequential_version() {
16172 let t = vec![0, 1, 2, 1, 0];
16173 let mut sa = vec![0, 0, 4];
16174 let mut buckets = vec![0; 3];
16175 buckets[1] = 2;
16176 let mut thread_state = alloc_thread_state(2).unwrap();
16177
16178 partial_sorting_scan_right_to_left_32s_1k_omp(
16179 &t,
16180 &mut sa,
16181 3,
16182 &mut buckets,
16183 2,
16184 &mut thread_state,
16185 );
16186
16187 assert_eq!(sa[1], 3 | SAINT_MIN);
16188 assert_eq!(buckets[1], 1);
16189 }
16190
16191 #[test]
16192 fn libsais64_partial_sorting_scan_right_to_left_32s_6k_block_gather_records_symbols() {
16193 let t = vec![0, 1, 2, 1, 0];
16194 let sa = vec![0, 4 | SAINT_MIN, 0];
16195 let mut cache = vec![ThreadCache::default(); sa.len()];
16196
16197 partial_sorting_scan_right_to_left_32s_6k_block_gather(&t, &sa, &mut cache, 1, 1);
16198
16199 assert_eq!(cache[0].index, 4 | SAINT_MIN);
16200 assert_eq!(cache[0].symbol, buckets_index4(1, 1) as SaSint);
16201 }
16202
16203 #[test]
16204 fn libsais64_partial_sorting_scan_right_to_left_32s_4k_block_gather_zeroes_positive_entries() {
16205 let t = vec![0, 1, 2, 1, 0];
16206 let mut sa = vec![0, 4 | SUFFIX_GROUP_MARKER, 0];
16207 let mut cache = vec![ThreadCache::default(); sa.len()];
16208
16209 partial_sorting_scan_right_to_left_32s_4k_block_gather(&t, &mut sa, &mut cache, 1, 1);
16210
16211 assert_eq!(sa[1], 0);
16212 assert_eq!(cache[0].index, 4 | SUFFIX_GROUP_MARKER);
16213 assert_eq!(cache[0].symbol, buckets_index2(1, 1) as SaSint);
16214 }
16215
16216 #[test]
16217 fn libsais64_partial_sorting_scan_right_to_left_32s_1k_block_gather_stores_preinduced_entries()
16218 {
16219 let t = vec![0, 1, 2, 1, 0];
16220 let mut sa = vec![0, 4, 0];
16221 let mut cache = vec![ThreadCache::default(); sa.len()];
16222
16223 partial_sorting_scan_right_to_left_32s_1k_block_gather(&t, &mut sa, &mut cache, 1, 1);
16224
16225 assert_eq!(sa[1], 0);
16226 assert_eq!(cache[0].index, 3 | SAINT_MIN);
16227 assert_eq!(cache[0].symbol, 1);
16228 }
16229
16230 #[test]
16231 fn libsais64_partial_sorting_scan_right_to_left_32s_6k_block_sort_updates_bucket_and_marker_state(
16232 ) {
16233 let t = vec![0, 1, 2, 1, 0];
16234 let mut cache = vec![ThreadCache::default(); 3];
16235 cache[0].index = 4 | SAINT_MIN;
16236 cache[0].symbol = buckets_index4(1, 1) as SaSint;
16237 let mut buckets = vec![0; 4 * 3];
16238 buckets[buckets_index4(1, 1)] = 2;
16239
16240 let d = partial_sorting_scan_right_to_left_32s_6k_block_sort(
16241 &t,
16242 &mut buckets,
16243 0,
16244 &mut cache,
16245 1,
16246 1,
16247 );
16248
16249 assert_eq!(d, 1);
16250 assert_eq!(cache[0].index, 3 | SAINT_MIN);
16251 assert_eq!(buckets[buckets_index4(1, 1)], 1);
16252 assert_eq!(buckets[buckets_index4(1, 1) + 2], 1);
16253 }
16254
16255 #[test]
16256 fn libsais64_partial_sorting_scan_right_to_left_32s_1k_block_omp_places_cached_suffixes() {
16257 let t = vec![0, 1, 2, 1, 0];
16258 let mut sa = vec![0, 4, 0];
16259 let mut buckets = vec![0; 3];
16260 buckets[1] = 2;
16261 let mut cache = vec![ThreadCache::default(); sa.len()];
16262
16263 partial_sorting_scan_right_to_left_32s_1k_block_omp(
16264 &t,
16265 &mut sa,
16266 &mut buckets,
16267 &mut cache,
16268 1,
16269 1,
16270 2,
16271 );
16272
16273 assert_eq!(sa[1], 3 | SAINT_MIN);
16274 assert_eq!(buckets[1], 1);
16275 }
16276
16277 #[test]
16278 fn libsais64_partial_sorting_scan_right_to_left_32s_1k_block_omp_uses_relative_cache() {
16279 let block_start = 20_000usize;
16280 let block_size = 16_384usize;
16281 let n = block_start + block_size + 8;
16282 let t = vec![1; n];
16283 let suffixes: Vec<SaSint> = (2..2 + block_size).map(|i| i as SaSint).collect();
16284
16285 let mut sa_single = vec![0; n];
16286 sa_single[block_start..block_start + block_size].copy_from_slice(&suffixes);
16287 let mut sa_threaded = sa_single.clone();
16288 let mut bucket_single = vec![0, block_size as SaSint];
16289 let mut bucket_threaded = bucket_single.clone();
16290 let mut cache = vec![ThreadCache::default(); 4 * LIBSAIS_PER_THREAD_CACHE_SIZE];
16291
16292 partial_sorting_scan_right_to_left_32s_1k(
16293 &t,
16294 &mut sa_single,
16295 &mut bucket_single,
16296 block_start as FastSint,
16297 block_size as FastSint,
16298 );
16299 partial_sorting_scan_right_to_left_32s_1k_block_omp(
16300 &t,
16301 &mut sa_threaded,
16302 &mut bucket_threaded,
16303 &mut cache,
16304 block_start as FastSint,
16305 block_size as FastSint,
16306 4,
16307 );
16308
16309 assert_eq!(sa_threaded, sa_single);
16310 assert_eq!(bucket_threaded, bucket_single);
16311 }
16312
16313 #[test]
16314 fn libsais64_partial_sorting_gather_lms_suffixes_32s_4k_compacts_negative_marked_entries() {
16315 let mut sa = vec![1 | SUFFIX_GROUP_MARKER, -3, 5 | SUFFIX_GROUP_MARKER, -7];
16316 let n = sa.len() as FastSint;
16317
16318 let l = partial_sorting_gather_lms_suffixes_32s_4k(&mut sa, 0, n);
16319
16320 assert_eq!(l, 2);
16321 assert_eq!(sa[0], (SAINT_MIN | SUFFIX_GROUP_MARKER) - 3);
16322 assert_eq!(sa[1], (SAINT_MIN | SUFFIX_GROUP_MARKER) - 7);
16323 }
16324
16325 #[test]
16326 fn libsais64_partial_sorting_gather_lms_suffixes_32s_1k_compacts_negative_marked_entries() {
16327 let mut sa = vec![1, -3, 5, -7];
16328 let n = sa.len() as FastSint;
16329
16330 let l = partial_sorting_gather_lms_suffixes_32s_1k(&mut sa, 0, n);
16331
16332 assert_eq!(l, 2);
16333 assert_eq!(sa[0], SAINT_MAX - 2);
16334 assert_eq!(sa[1], SAINT_MAX - 6);
16335 }
16336
16337 #[test]
16338 fn libsais64_partial_sorting_gather_lms_suffixes_32s_4k_omp_wraps_sequential_version() {
16339 let mut sa = vec![1 | SUFFIX_GROUP_MARKER, -3, 5 | SUFFIX_GROUP_MARKER, -7];
16340 let mut thread_state = alloc_thread_state(2).unwrap();
16341
16342 partial_sorting_gather_lms_suffixes_32s_4k_omp(&mut sa, 4, 2, &mut thread_state);
16343
16344 assert_eq!(sa[0], (SAINT_MIN | SUFFIX_GROUP_MARKER) - 3);
16345 assert_eq!(sa[1], (SAINT_MIN | SUFFIX_GROUP_MARKER) - 7);
16346 }
16347
16348 #[test]
16349 fn libsais64_partial_sorting_gather_lms_suffixes_32s_1k_omp_wraps_sequential_version() {
16350 let mut sa = vec![1, -3, 5, -7];
16351 let mut thread_state = alloc_thread_state(2).unwrap();
16352
16353 partial_sorting_gather_lms_suffixes_32s_1k_omp(&mut sa, 4, 2, &mut thread_state);
16354
16355 assert_eq!(sa[0], SAINT_MAX - 2);
16356 assert_eq!(sa[1], SAINT_MAX - 6);
16357 }
16358
16359 #[test]
16360 fn libsais64_partial_sorting_gather_lms_suffixes_32s_omp_uses_block_partition() {
16361 let n = 65_600usize;
16362 let input_4k: Vec<SaSint> = (0..n)
16363 .map(|i| {
16364 let value = (i as SaSint) | SUFFIX_GROUP_MARKER;
16365 if i % 5 == 0 {
16366 value | SAINT_MIN
16367 } else {
16368 value
16369 }
16370 })
16371 .collect();
16372 let count_4k = input_4k.iter().filter(|&&value| value < 0).count();
16373
16374 let mut single = input_4k.clone();
16375 let mut threaded = input_4k;
16376 let mut thread_state = alloc_thread_state(4).unwrap();
16377 partial_sorting_gather_lms_suffixes_32s_4k_omp(&mut single, n as SaSint, 1, &mut []);
16378 partial_sorting_gather_lms_suffixes_32s_4k_omp(
16379 &mut threaded,
16380 n as SaSint,
16381 4,
16382 &mut thread_state,
16383 );
16384 assert_eq!(&threaded[..count_4k], &single[..count_4k]);
16385
16386 let input_1k: Vec<SaSint> = (0..n)
16387 .map(|i| {
16388 let value = i as SaSint;
16389 if i % 7 == 0 {
16390 value | SAINT_MIN
16391 } else {
16392 value
16393 }
16394 })
16395 .collect();
16396 let count_1k = input_1k.iter().filter(|&&value| value < 0).count();
16397
16398 let mut single = input_1k.clone();
16399 let mut threaded = input_1k;
16400 partial_sorting_gather_lms_suffixes_32s_1k_omp(&mut single, n as SaSint, 1, &mut []);
16401 partial_sorting_gather_lms_suffixes_32s_1k_omp(
16402 &mut threaded,
16403 n as SaSint,
16404 4,
16405 &mut thread_state,
16406 );
16407 assert_eq!(&threaded[..count_1k], &single[..count_1k]);
16408 }
16409
16410 #[test]
16411 fn libsais64_partial_sorting_shift_markers_8u_omp_toggles_segment_markers() {
16412 let mut sa = vec![1 | SAINT_MIN, 2 | SAINT_MIN, 3, 4 | SAINT_MIN, 5];
16413 let mut buckets = vec![0; 6 * ALPHABET_SIZE];
16414 buckets[4 * ALPHABET_SIZE + buckets_index2(1, 0)] = 5;
16415 buckets[buckets_index2(0, 0)] = 0;
16416 let len = sa.len() as SaSint;
16417 partial_sorting_shift_markers_8u_omp(&mut sa, len, &buckets, 1);
16418 assert!(sa.iter().any(|&v| (v & SAINT_MIN) == 0));
16419 }
16420
16421 #[test]
16422 fn libsais64_partial_sorting_shift_markers_32s_6k_omp_toggles_segment_markers() {
16423 let mut sa = vec![1 | SAINT_MIN, 2 | SAINT_MIN, 3, 4 | SAINT_MIN, 5];
16424 let k = 3usize;
16425 let mut buckets = vec![0; 6 * k];
16426 buckets[buckets_index4(1, 0)] = 5;
16427 buckets[4 * k + buckets_index2(0, 0)] = 0;
16428 partial_sorting_shift_markers_32s_6k_omp(&mut sa, k as SaSint, &buckets, 1);
16429 assert!(sa.iter().any(|&v| (v & SAINT_MIN) == 0));
16430 }
16431
16432 #[test]
16433 fn libsais64_partial_sorting_shift_markers_32s_4k_toggles_group_markers() {
16434 let mut sa = vec![
16435 1 | SUFFIX_GROUP_MARKER,
16436 2 | SUFFIX_GROUP_MARKER,
16437 3,
16438 4 | SUFFIX_GROUP_MARKER,
16439 ];
16440 let len = sa.len() as SaSint;
16441 partial_sorting_shift_markers_32s_4k(&mut sa, len);
16442 assert!(sa.iter().any(|&v| (v & SUFFIX_GROUP_MARKER) == 0));
16443 }
16444
16445 #[test]
16446 fn libsais64_clear_lms_suffixes_omp_zeroes_requested_bucket_ranges() {
16447 let mut sa = vec![5, 4, 3, 2, 1, 9];
16448 let n = sa.len() as SaSint;
16449 let bucket_start = vec![1, 4, 5];
16450 let bucket_end = vec![3, 5, 5];
16451
16452 clear_lms_suffixes_omp(&mut sa, n, 3, &bucket_start, &bucket_end, 2);
16453
16454 assert_eq!(sa, vec![5, 0, 0, 2, 0, 9]);
16455 }
16456
16457 #[test]
16458 fn libsais64_final_bwt_scan_left_to_right_8u_rewrites_sa_and_induces_suffixes() {
16459 let t = vec![0_u8, 1, 2, 1, 0];
16460 let mut sa = vec![1, 0, 0];
16461 let mut induction_bucket = vec![0, 1, 3];
16462
16463 final_bwt_scan_left_to_right_8u(&t, &mut sa, &mut induction_bucket, 0, 1);
16464
16465 assert_eq!(sa[0], 0);
16466 assert_eq!(induction_bucket[0], 1);
16467 }
16468
16469 #[test]
16470 fn libsais64_final_bwt_aux_scan_left_to_right_8u_updates_sampling_array() {
16471 let t = vec![0_u8, 1, 2, 1, 0];
16472 let mut sa = vec![1, 0, 0];
16473 let mut induction_bucket = vec![0, 1, 3];
16474 let mut i_out = vec![0; 2];
16475
16476 final_bwt_aux_scan_left_to_right_8u(
16477 &t,
16478 &mut sa,
16479 0,
16480 &mut i_out,
16481 &mut induction_bucket,
16482 0,
16483 1,
16484 );
16485
16486 assert_eq!(i_out[0], 1);
16487 }
16488
16489 #[test]
16490 fn libsais64_final_sorting_scan_left_to_right_8u_clears_marker_and_places_suffix() {
16491 let t = vec![0_u8, 1, 2, 1, 0];
16492 let mut sa = vec![1, 0, 0];
16493 let mut induction_bucket = vec![0, 1, 3];
16494
16495 final_sorting_scan_left_to_right_8u(&t, &mut sa, &mut induction_bucket, 0, 1);
16496
16497 assert_eq!(sa[0], 0);
16498 assert_eq!(induction_bucket[0], 1);
16499 }
16500
16501 #[test]
16502 fn libsais64_final_sorting_scan_left_to_right_32s_clears_marker_and_places_suffix() {
16503 let t = vec![0, 1, 2, 1, 0];
16504 let mut sa = vec![1, 0, 0];
16505 let mut induction_bucket = vec![0, 1, 3];
16506
16507 final_sorting_scan_left_to_right_32s(&t, &mut sa, &mut induction_bucket, 0, 1);
16508
16509 assert_eq!(sa[0], 0);
16510 assert_eq!(induction_bucket[0], 1);
16511 }
16512
16513 #[test]
16514 fn libsais64_final_bwt_scan_left_to_right_8u_block_prepare_records_cache_and_counts() {
16515 let t = vec![0_u8, 1, 2, 1, 0];
16516 let mut sa = vec![1, 2, 0];
16517 let mut buckets = vec![99; ALPHABET_SIZE];
16518 let mut cache = vec![ThreadCache::default(); 4];
16519
16520 let count = final_bwt_scan_left_to_right_8u_block_prepare(
16521 &t,
16522 &mut sa,
16523 ALPHABET_SIZE as SaSint,
16524 &mut buckets,
16525 &mut cache,
16526 0,
16527 2,
16528 );
16529
16530 assert_eq!(count, 2);
16531 assert_eq!(sa[0] & SAINT_MAX, 0);
16532 assert_eq!(sa[1], 1 | SAINT_MIN);
16533 assert_eq!(buckets[0], 1);
16534 assert_eq!(buckets[1], 1);
16535 assert_eq!(cache[0].symbol, 0);
16536 assert_eq!(cache[0].index & SAINT_MAX, 0);
16537 assert_eq!(cache[1].symbol, 1);
16538 assert_eq!(cache[1].index & SAINT_MAX, 1);
16539 }
16540
16541 #[test]
16542 fn libsais64_final_sorting_scan_left_to_right_8u_omp_wraps_sequential_behavior() {
16543 let t = vec![0_u8, 1, 2, 1, 0];
16544 let mut sa = vec![0; t.len()];
16545 let mut induction_bucket = vec![0, 1, 3];
16546 let mut expected_sa = sa.clone();
16547 let mut expected_bucket = induction_bucket.clone();
16548
16549 final_sorting_scan_left_to_right_8u_omp(
16550 &t,
16551 &mut expected_sa,
16552 t.len() as FastSint,
16553 ALPHABET_SIZE as SaSint,
16554 &mut expected_bucket,
16555 1,
16556 &mut [],
16557 );
16558
16559 let mut thread_state = alloc_thread_state(2).unwrap();
16560
16561 final_sorting_scan_left_to_right_8u_omp(
16562 &t,
16563 &mut sa,
16564 t.len() as FastSint,
16565 ALPHABET_SIZE as SaSint,
16566 &mut induction_bucket,
16567 2,
16568 &mut thread_state,
16569 );
16570
16571 assert_eq!(sa, expected_sa);
16572 assert_eq!(induction_bucket, expected_bucket);
16573 }
16574
16575 #[test]
16576 fn libsais64_final_bwt_scan_right_to_left_8u_returns_zero_index_and_induces_suffixes() {
16577 let t = vec![0_u8, 1, 2, 1, 0];
16578 let mut sa = vec![0, 2, 0];
16579 let mut induction_bucket = vec![1, 2, 3];
16580
16581 let index = final_bwt_scan_right_to_left_8u(&t, &mut sa, &mut induction_bucket, 0, 2);
16582
16583 assert_eq!(index, 0);
16584 assert_eq!(sa[1], 1);
16585 assert_eq!(induction_bucket[1], 1);
16586 }
16587
16588 #[test]
16589 fn libsais64_final_sorting_scan_right_to_left_8u_omp_matches_sequential_path() {
16590 let t = vec![0_u8, 1, 2, 1, 0];
16591 let mut sa = vec![0, 2, 0, 0];
16592 let mut induction_bucket = vec![1, 2, 3];
16593 let mut expected_sa = sa.clone();
16594 let mut expected_bucket = induction_bucket.clone();
16595
16596 final_sorting_scan_right_to_left_8u_omp(
16597 &t,
16598 &mut expected_sa,
16599 0,
16600 2,
16601 ALPHABET_SIZE as SaSint,
16602 &mut expected_bucket,
16603 1,
16604 &mut [],
16605 );
16606
16607 let mut thread_state = alloc_thread_state(2).unwrap();
16608 final_sorting_scan_right_to_left_8u_omp(
16609 &t,
16610 &mut sa,
16611 0,
16612 2,
16613 ALPHABET_SIZE as SaSint,
16614 &mut induction_bucket,
16615 2,
16616 &mut thread_state,
16617 );
16618
16619 assert_eq!(sa, expected_sa);
16620 assert_eq!(induction_bucket, expected_bucket);
16621 }
16622
16623 #[test]
16624 fn libsais64_induce_final_order_8u_omp_non_bwt_matches_direct_final_scans() {
16625 let t = vec![0_u8, 1, 2, 1, 0];
16626 let mut sa = vec![0, 2, 0, 0, 0];
16627 let mut buckets = vec![0; 8 * ALPHABET_SIZE];
16628 buckets[6 * ALPHABET_SIZE..6 * ALPHABET_SIZE + 3].copy_from_slice(&[0, 1, 3]);
16629 buckets[7 * ALPHABET_SIZE..7 * ALPHABET_SIZE + 3].copy_from_slice(&[2, 4, 5]);
16630
16631 let mut expected_sa = sa.clone();
16632 let mut expected_left = vec![0, 1, 3];
16633 let mut expected_right = vec![2, 4, 5];
16634 final_sorting_scan_left_to_right_8u_omp(
16635 &t,
16636 &mut expected_sa,
16637 t.len() as FastSint,
16638 ALPHABET_SIZE as SaSint,
16639 &mut expected_left,
16640 1,
16641 &mut [],
16642 );
16643 final_sorting_scan_right_to_left_8u_omp(
16644 &t,
16645 &mut expected_sa,
16646 0,
16647 t.len() as FastSint,
16648 ALPHABET_SIZE as SaSint,
16649 &mut expected_right,
16650 1,
16651 &mut [],
16652 );
16653
16654 let mut thread_state = alloc_thread_state(2).unwrap();
16655 let result = induce_final_order_8u_omp(
16656 &t,
16657 &mut sa,
16658 t.len() as SaSint,
16659 ALPHABET_SIZE as SaSint,
16660 LIBSAIS_FLAGS_NONE,
16661 0,
16662 None,
16663 &mut buckets,
16664 2,
16665 &mut thread_state,
16666 );
16667
16668 assert_eq!(result, 0);
16669 assert_eq!(sa, expected_sa);
16670 assert_eq!(
16671 &buckets[6 * ALPHABET_SIZE..6 * ALPHABET_SIZE + 3],
16672 expected_left.as_slice()
16673 );
16674 assert_eq!(
16675 &buckets[7 * ALPHABET_SIZE..7 * ALPHABET_SIZE + 3],
16676 expected_right.as_slice()
16677 );
16678 }
16679
16680 #[test]
16681 fn libsais64_count_helpers_match_c_predicates() {
16682 let sa = [1, -1, 0, -3, 4, 0, -9];
16683 assert_eq!(
16684 count_negative_marked_suffixes(&sa, 0, sa.len() as FastSint),
16685 3
16686 );
16687 assert_eq!(count_zero_marked_suffixes(&sa, 0, sa.len() as FastSint), 2);
16688 assert_eq!(count_negative_marked_suffixes(&sa, 2, 3), 1);
16689 assert_eq!(count_zero_marked_suffixes(&sa, 2, 3), 1);
16690 }
16691
16692 #[test]
16693 fn libsais64_flip_suffix_markers_omp_toggles_saint_min_bits() {
16694 let mut sa = vec![1, -2, 3, -4];
16695 flip_suffix_markers_omp(&mut sa, 4, 1);
16696 assert_eq!(
16697 sa,
16698 vec![1 ^ SAINT_MIN, -2 ^ SAINT_MIN, 3 ^ SAINT_MIN, -4 ^ SAINT_MIN]
16699 );
16700 }
16701
16702 #[test]
16703 fn libsais64_place_cached_suffixes_writes_indices_to_symbol_slots() {
16704 let mut sa = vec![0; 8];
16705 let cache = vec![
16706 ThreadCache {
16707 symbol: 2,
16708 index: 10,
16709 },
16710 ThreadCache {
16711 symbol: 5,
16712 index: 20,
16713 },
16714 ThreadCache {
16715 symbol: 1,
16716 index: 30,
16717 },
16718 ];
16719
16720 place_cached_suffixes(&mut sa, &cache, 0, cache.len() as FastSint);
16721
16722 assert_eq!(sa[2], 10);
16723 assert_eq!(sa[5], 20);
16724 assert_eq!(sa[1], 30);
16725 }
16726
16727 #[test]
16728 fn libsais64_compact_and_place_cached_suffixes_discards_negative_symbols() {
16729 let mut sa = vec![0; 8];
16730 let mut cache = vec![
16731 ThreadCache {
16732 symbol: 2,
16733 index: 10,
16734 },
16735 ThreadCache {
16736 symbol: -1,
16737 index: 99,
16738 },
16739 ThreadCache {
16740 symbol: 5,
16741 index: 20,
16742 },
16743 ThreadCache {
16744 symbol: -4,
16745 index: 77,
16746 },
16747 ThreadCache {
16748 symbol: 1,
16749 index: 30,
16750 },
16751 ];
16752 let cache_len = cache.len() as FastSint;
16753
16754 compact_and_place_cached_suffixes(&mut sa, &mut cache, 0, cache_len);
16755
16756 assert_eq!(sa[2], 10);
16757 assert_eq!(sa[5], 20);
16758 assert_eq!(sa[1], 30);
16759 assert_eq!(
16760 cache[0],
16761 ThreadCache {
16762 symbol: 2,
16763 index: 10
16764 }
16765 );
16766 assert_eq!(
16767 cache[1],
16768 ThreadCache {
16769 symbol: 5,
16770 index: 20
16771 }
16772 );
16773 assert_eq!(
16774 cache[2],
16775 ThreadCache {
16776 symbol: 1,
16777 index: 30
16778 }
16779 );
16780 }
16781
16782 #[test]
16783 fn libsais64_gather_lms_suffixes_32s_collects_expected_suffix_starts() {
16784 let t = vec![2, 1, 3, 1, 0];
16785 let mut sa = vec![0; t.len()];
16786 let m = gather_lms_suffixes_32s(&t, &mut sa, t.len() as SaSint);
16787 assert!(m >= 0);
16788 assert!(sa
16789 .iter()
16790 .all(|&value| value >= 0 && value <= t.len() as SaSint));
16791 assert!(sa[t.len() - 1] >= 1 && sa[t.len() - 1] <= t.len() as SaSint - 1);
16792 }
16793
16794 #[test]
16795 fn libsais64_gather_compacted_lms_suffixes_32s_skips_negative_marked_symbols() {
16796 let t = vec![2, -1, 3, 1, 0];
16797 let mut sa = vec![0; t.len()];
16798 let m = gather_compacted_lms_suffixes_32s(&t, &mut sa, t.len() as SaSint);
16799 assert!(m >= 0);
16800 assert!(sa
16801 .iter()
16802 .all(|&value| value >= 0 && value <= t.len() as SaSint));
16803 }
16804
16805 #[test]
16806 fn libsais64_count_lms_suffixes_32s_2k_counts_two_bucket_categories() {
16807 let t = vec![2, 1, 3, 1, 0];
16808 let mut buckets = vec![0; 2 * 4];
16809 count_lms_suffixes_32s_2k(&t, t.len() as SaSint, 4, &mut buckets);
16810 assert_eq!(buckets.iter().sum::<SaSint>(), t.len() as SaSint);
16811 }
16812
16813 #[test]
16814 fn libsais64_count_lms_suffixes_32s_4k_counts_four_bucket_categories() {
16815 let t = vec![2, 1, 3, 1, 0];
16816 let mut buckets = vec![0; 4 * 4];
16817 count_lms_suffixes_32s_4k(&t, t.len() as SaSint, 4, &mut buckets);
16818 assert_eq!(buckets.iter().sum::<SaSint>(), t.len() as SaSint);
16819 }
16820
16821 #[test]
16822 fn libsais64_count_compacted_lms_suffixes_32s_2k_masks_saint_bits() {
16823 let t = vec![2, SAINT_MIN | 1, 3, 1, 0];
16824 let mut buckets = vec![0; 2 * 4];
16825 count_compacted_lms_suffixes_32s_2k(&t, t.len() as SaSint, 4, &mut buckets);
16826 assert_eq!(buckets.iter().sum::<SaSint>(), t.len() as SaSint);
16827 }
16828
16829 #[test]
16830 fn libsais64_count_and_gather_lms_suffixes_8u_updates_sa_and_buckets() {
16831 let t = vec![2_u8, 1, 3, 1, 0];
16832 let mut sa = vec![0; t.len()];
16833 let mut buckets = vec![0; 4 * ALPHABET_SIZE];
16834 let m = count_and_gather_lms_suffixes_8u(
16835 &t,
16836 &mut sa,
16837 t.len() as SaSint,
16838 &mut buckets,
16839 0,
16840 t.len() as FastSint,
16841 );
16842 assert_eq!(m, 1);
16843 assert_eq!(sa[t.len() - 1], 1);
16844 assert_eq!(buckets.iter().sum::<SaSint>(), t.len() as SaSint);
16845 }
16846
16847 #[test]
16848 fn libsais64_count_and_gather_lms_suffixes_8u_omp_preserves_sequential_wrapper_behavior() {
16849 let t = vec![2_u8, 1, 3, 1, 0];
16850 let mut sa = vec![0; t.len()];
16851 let mut buckets = vec![0; 4 * ALPHABET_SIZE];
16852 let mut thread_state = alloc_thread_state(2).unwrap();
16853 let m = count_and_gather_lms_suffixes_8u_omp(
16854 &t,
16855 &mut sa,
16856 t.len() as SaSint,
16857 &mut buckets,
16858 2,
16859 &mut thread_state,
16860 );
16861 assert_eq!(m, 1);
16862 assert_eq!(buckets.iter().sum::<SaSint>(), t.len() as SaSint);
16863 }
16864
16865 #[test]
16866 fn libsais64_get_bucket_stride_prefers_aligned_sizes_when_space_allows() {
16867 assert_eq!(get_bucket_stride(8192, 1000, 2), 1024);
16868 assert_eq!(get_bucket_stride(256, 17, 2), 32);
16869 assert_eq!(get_bucket_stride(8, 17, 2), 17);
16870 }
16871
16872 #[test]
16873 fn libsais64_count_suffixes_32s_counts_symbol_histogram() {
16874 let t = vec![2, 1, 2, 3, 1, 0, 2];
16875 let mut buckets = vec![0; 4];
16876 count_suffixes_32s(&t, t.len() as SaSint, 4, &mut buckets);
16877 assert_eq!(buckets, vec![1, 2, 3, 1]);
16878 }
16879
16880 #[test]
16881 fn libsais64_initialize_buckets_start_and_end_8u_sets_ranges_and_freq() {
16882 let mut buckets = vec![0; 8 * ALPHABET_SIZE];
16883 buckets[buckets_index4(0, 0)] = 1;
16884 buckets[buckets_index4(1, 1)] = 2;
16885 buckets[buckets_index4(2, 3)] = 3;
16886 let mut freq = vec![0; ALPHABET_SIZE];
16887 let k = initialize_buckets_start_and_end_8u(&mut buckets, Some(&mut freq));
16888 assert_eq!(k, 3);
16889 assert_eq!(freq[0], 1);
16890 assert_eq!(freq[1], 2);
16891 assert_eq!(freq[2], 3);
16892 assert_eq!(buckets[6 * ALPHABET_SIZE], 0);
16893 assert_eq!(buckets[7 * ALPHABET_SIZE], 1);
16894 assert_eq!(buckets[6 * ALPHABET_SIZE + 1], 1);
16895 assert_eq!(buckets[7 * ALPHABET_SIZE + 1], 3);
16896 }
16897
16898 #[test]
16899 fn libsais64_initialize_buckets_start_and_end_32s_6k_sets_prefix_ranges() {
16900 let k = 3;
16901 let mut buckets = vec![0; 6 * k];
16902 buckets[buckets_index4(0, 0)] = 1;
16903 buckets[buckets_index4(0, 1)] = 2;
16904 buckets[buckets_index4(1, 2)] = 3;
16905 buckets[buckets_index4(2, 3)] = 4;
16906 initialize_buckets_start_and_end_32s_6k(k as SaSint, &mut buckets);
16907 assert_eq!(&buckets[4 * k..5 * k], &[0, 3, 6]);
16908 assert_eq!(&buckets[5 * k..6 * k], &[3, 6, 10]);
16909 }
16910
16911 #[test]
16912 fn libsais64_initialize_buckets_start_and_end_32s_4k_sets_prefix_ranges() {
16913 let k = 3;
16914 let mut buckets = vec![0; 4 * k];
16915 buckets[buckets_index2(0, 0)] = 1;
16916 buckets[buckets_index2(0, 1)] = 2;
16917 buckets[buckets_index2(1, 0)] = 3;
16918 buckets[buckets_index2(2, 1)] = 4;
16919 initialize_buckets_start_and_end_32s_4k(k as SaSint, &mut buckets);
16920 assert_eq!(&buckets[2 * k..3 * k], &[0, 3, 6]);
16921 assert_eq!(&buckets[3 * k..4 * k], &[3, 6, 10]);
16922 }
16923
16924 #[test]
16925 fn libsais64_initialize_buckets_end_32s_2k_rewrites_first_lanes_to_end_positions() {
16926 let k = 3;
16927 let mut buckets = vec![1, 2, 3, 4, 5, 6];
16928 initialize_buckets_end_32s_2k(k as SaSint, &mut buckets);
16929 assert_eq!(buckets[0], 3);
16930 assert_eq!(buckets[2], 10);
16931 assert_eq!(buckets[4], 21);
16932 }
16933
16934 #[test]
16935 fn libsais64_initialize_buckets_start_and_end_32s_2k_copies_start_positions() {
16936 let k = 3;
16937 let mut buckets = vec![3, 2, 10, 4, 21, 6];
16938 initialize_buckets_start_and_end_32s_2k(k as SaSint, &mut buckets);
16939 assert_eq!(&buckets[..k], &[3, 10, 21]);
16940 assert_eq!(&buckets[k..2 * k], &[0, 3, 10]);
16941 }
16942
16943 #[test]
16944 fn libsais64_initialize_buckets_start_32s_1k_builds_prefix_starts() {
16945 let mut buckets = vec![1, 2, 3];
16946 initialize_buckets_start_32s_1k(3, &mut buckets);
16947 assert_eq!(buckets, vec![0, 1, 3]);
16948 }
16949
16950 #[test]
16951 fn libsais64_initialize_buckets_end_32s_1k_builds_prefix_ends() {
16952 let mut buckets = vec![1, 2, 3];
16953 initialize_buckets_end_32s_1k(3, &mut buckets);
16954 assert_eq!(buckets, vec![1, 3, 6]);
16955 }
16956
16957 #[test]
16958 fn libsais64_initialize_buckets_for_lms_suffixes_radix_sort_8u_returns_total_lms_slots() {
16959 let t = vec![2_u8, 1, 3, 1, 0];
16960 let mut buckets = vec![0; 6 * ALPHABET_SIZE];
16961 buckets[buckets_index4(0, 1)] = 1;
16962 buckets[buckets_index4(1, 3)] = 2;
16963 let sum = initialize_buckets_for_lms_suffixes_radix_sort_8u(&t, &mut buckets, 4);
16964 assert!(sum >= 0);
16965 }
16966
16967 #[test]
16968 fn libsais64_initialize_buckets_for_lms_suffixes_radix_sort_32s_2k_rewrites_two_lane_prefixes()
16969 {
16970 let t = vec![2, 1, 3, 1, 0];
16971 let mut buckets = vec![0; 2 * 4];
16972 initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(&t, 4, &mut buckets, 4);
16973 assert!(buckets.iter().any(|&v| v != 0));
16974 }
16975
16976 #[test]
16977 fn libsais64_initialize_buckets_for_lms_suffixes_radix_sort_32s_6k_returns_total_lms_slots() {
16978 let t = vec![2, 1, 3, 1, 0];
16979 let mut buckets = vec![0; 6 * 4];
16980 buckets[buckets_index4(0, 1)] = 1;
16981 buckets[buckets_index4(1, 3)] = 2;
16982 let sum = initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(&t, 4, &mut buckets, 4);
16983 assert!(sum >= 0);
16984 }
16985
16986 #[test]
16987 fn libsais64_initialize_buckets_for_radix_and_partial_sorting_32s_4k_sets_start_end_views() {
16988 let t = vec![2, 1, 3, 1, 0];
16989 let k = 4usize;
16990 let mut buckets = vec![0; 4 * k];
16991 buckets[buckets_index2(0, 0)] = 1;
16992 buckets[buckets_index2(0, 1)] = 2;
16993 buckets[buckets_index2(1, 0)] = 3;
16994 initialize_buckets_for_radix_and_partial_sorting_32s_4k(&t, k as SaSint, &mut buckets, 4);
16995 assert_eq!(buckets[2 * k], 0);
16996 assert!(buckets[3 * k] >= buckets[2 * k]);
16997 }
16998
16999 #[test]
17000 fn libsais64_initialize_buckets_for_partial_sorting_8u_sets_start_and_distinct_views() {
17001 let t = vec![2_u8, 1, 3, 1, 0];
17002 let mut buckets = vec![0; 6 * ALPHABET_SIZE];
17003 buckets[buckets_index4(0, 0)] = 1;
17004 buckets[buckets_index4(0, 2)] = 2;
17005 initialize_buckets_for_partial_sorting_8u(&t, &mut buckets, 4, 3);
17006 assert!(buckets[0] >= 4);
17007 assert!(buckets[1] >= 0);
17008 assert!(buckets[4 * ALPHABET_SIZE] >= 4);
17009 }
17010
17011 #[test]
17012 fn libsais64_initialize_buckets_for_partial_sorting_32s_6k_rewrites_bucket_views() {
17013 let t = vec![2, 1, 3, 1, 0];
17014 let k = 4usize;
17015 let mut buckets = vec![0; 6 * k];
17016 buckets[buckets_index4(0, 0)] = 1;
17017 buckets[buckets_index4(0, 1)] = 2;
17018 buckets[buckets_index4(1, 2)] = 3;
17019 initialize_buckets_for_partial_sorting_32s_6k(&t, k as SaSint, &mut buckets, 4, 3);
17020 assert!(buckets[0] >= 4);
17021 assert!(buckets[4 * k] >= 4);
17022 }
17023
17024 #[test]
17025 fn libsais64_place_lms_suffixes_interval_32s_4k_moves_suffixes_into_bucket_intervals() {
17026 let mut sa = vec![10, 11, 12, 13, 14];
17027 let k = 3usize;
17028 let mut buckets = vec![0; 4 * k];
17029 buckets[buckets_index2(0, 1)] = 0;
17030 buckets[buckets_index2(1, 1)] = 2;
17031 buckets[buckets_index2(2, 1)] = 3;
17032 buckets[3 * k] = 2;
17033 buckets[3 * k + 1] = 5;
17034
17035 place_lms_suffixes_interval_32s_4k(&mut sa, 5, k as SaSint, 5, &buckets);
17036
17037 assert_eq!(sa, vec![0, 0, 0, 0, 14]);
17038 }
17039
17040 #[test]
17041 fn libsais64_place_lms_suffixes_interval_32s_2k_moves_suffixes_into_bucket_intervals() {
17042 let mut sa = vec![10, 11, 12, 13, 14];
17043 let mut buckets = vec![0; 2 * 3];
17044 buckets[buckets_index2(0, 0)] = 2;
17045 buckets[buckets_index2(0, 1)] = 0;
17046 buckets[buckets_index2(1, 0)] = 5;
17047 buckets[buckets_index2(1, 1)] = 2;
17048 buckets[buckets_index2(2, 0)] = 5;
17049 buckets[buckets_index2(2, 1)] = 3;
17050
17051 place_lms_suffixes_interval_32s_2k(&mut sa, 5, 3, 5, &buckets);
17052
17053 assert_eq!(sa, vec![0, 0, 0, 0, 14]);
17054 }
17055
17056 #[test]
17057 fn libsais64_place_lms_suffixes_interval_32s_1k_places_suffixes_by_symbol_bucket() {
17058 let t = vec![0, 1, 1, 2, 2];
17059 let mut sa = vec![1, 2, 3, 4, 99];
17060 let buckets = vec![0, 2, 5];
17061
17062 place_lms_suffixes_interval_32s_1k(&t, &mut sa, 3, 4, &buckets);
17063
17064 assert_eq!(sa, vec![1, 2, 0, 3, 4]);
17065 }
17066
17067 #[test]
17068 fn libsais64_accumulate_counts_helpers_match_prefix_bucket_addition() {
17069 let mut bucket00 = vec![4, 5, 6];
17070 let bucket01 = vec![1, 2, 3];
17071 let bucket02 = vec![7, 8, 9];
17072 let bucket03 = vec![10, 11, 12];
17073 let bucket04 = vec![13, 14, 15];
17074 let bucket05 = vec![16, 17, 18];
17075 let bucket06 = vec![19, 20, 21];
17076 let bucket07 = vec![22, 23, 24];
17077 let bucket08 = vec![25, 26, 27];
17078
17079 accumulate_counts_s32_2(&mut bucket00, &bucket01);
17080 assert_eq!(bucket00, vec![5, 7, 9]);
17081
17082 accumulate_counts_s32_3(&mut bucket00, &bucket01, &bucket02);
17083 assert_eq!(bucket00, vec![13, 17, 21]);
17084
17085 accumulate_counts_s32_4(&mut bucket00, &bucket01, &bucket02, &bucket03);
17086 assert_eq!(bucket00, vec![31, 38, 45]);
17087
17088 accumulate_counts_s32_5(&mut bucket00, &bucket01, &bucket02, &bucket03, &bucket04);
17089 assert_eq!(bucket00, vec![62, 73, 84]);
17090
17091 accumulate_counts_s32_6(
17092 &mut bucket00,
17093 &bucket01,
17094 &bucket02,
17095 &bucket03,
17096 &bucket04,
17097 &bucket05,
17098 );
17099 assert_eq!(bucket00, vec![109, 125, 141]);
17100
17101 accumulate_counts_s32_7(
17102 &mut bucket00,
17103 &bucket01,
17104 &bucket02,
17105 &bucket03,
17106 &bucket04,
17107 &bucket05,
17108 &bucket06,
17109 );
17110 assert_eq!(bucket00, vec![175, 197, 219]);
17111
17112 accumulate_counts_s32_8(
17113 &mut bucket00,
17114 &bucket01,
17115 &bucket02,
17116 &bucket03,
17117 &bucket04,
17118 &bucket05,
17119 &bucket06,
17120 &bucket07,
17121 );
17122 assert_eq!(bucket00, vec![263, 292, 321]);
17123
17124 accumulate_counts_s32_9(
17125 &mut bucket00,
17126 &bucket01,
17127 &bucket02,
17128 &bucket03,
17129 &bucket04,
17130 &bucket05,
17131 &bucket06,
17132 &bucket07,
17133 &bucket08,
17134 );
17135 assert_eq!(bucket00, vec![376, 413, 450]);
17136 }
17137
17138 #[test]
17139 fn libsais64_accumulate_counts_s32_matches_dispatch_for_small_bucket_counts() {
17140 let mut buckets = vec![1, 2, 3, 4, 5, 6, 7, 8];
17141 accumulate_counts_s32(&mut buckets, 2, 2, 4);
17142 assert_eq!(buckets, vec![1, 2, 3, 4, 5, 6, 16, 20]);
17143 }
17144
17145 #[test]
17146 fn libsais64_accumulate_counts_s32_matches_dispatch_for_nine_buckets() {
17147 let mut buckets = vec![
17148 1, 10, 2, 20, 3, 30, 4, 40, 5, 50, 6, 60, 7, 70, 8, 80, 9, 90,
17149 ];
17150 accumulate_counts_s32(&mut buckets, 2, 2, 9);
17151 assert_eq!(
17152 buckets,
17153 vec![1, 10, 2, 20, 3, 30, 4, 40, 5, 50, 6, 60, 7, 70, 8, 80, 45, 450]
17154 );
17155 }
17156
17157 #[test]
17158 fn libsais64_accumulate_counts_s32_matches_chunked_nine_then_tail_behavior() {
17159 let mut buckets = (1..=11).collect::<Vec<SaSint>>();
17160 accumulate_counts_s32(&mut buckets, 1, 1, 11);
17161 assert_eq!(buckets, vec![1, 2, 3, 4, 5, 6, 7, 8, 45, 10, 66]);
17162 }
17163
17164 #[test]
17165 fn libsais64_final_sorting_scan_left_to_right_32s_block_omp_places_cached_suffixes() {
17166 let t = vec![0, 1, 2, 1, 0];
17167 let mut sa = vec![1, 2, 0, 0];
17168 let mut induction_bucket = vec![0, 1, 3];
17169 let mut cache = vec![ThreadCache::default(); LIBSAIS_PER_THREAD_CACHE_SIZE];
17170
17171 final_sorting_scan_left_to_right_32s_block_omp(
17172 &t,
17173 &mut sa,
17174 &mut induction_bucket,
17175 &mut cache,
17176 0,
17177 2,
17178 2,
17179 );
17180
17181 assert_eq!(sa[0] & SAINT_MAX, 0);
17182 assert_eq!(sa[1] & SAINT_MAX, 1);
17183 assert_eq!(induction_bucket[0], 1);
17184 assert_eq!(induction_bucket[1], 2);
17185 }
17186
17187 #[test]
17188 fn libsais64_final_sorting_scan_left_to_right_8u_block_omp_uses_thread_buckets() {
17189 let block_start = 20_000usize;
17190 let block_size = 16_384usize;
17191 let n = block_start + block_size + 8;
17192 let t = vec![1_u8; n];
17193 let suffixes: Vec<SaSint> = (2..2 + block_size).map(|i| i as SaSint).collect();
17194
17195 let mut expected_sa = vec![0; n];
17196 expected_sa[block_start..block_start + block_size].copy_from_slice(&suffixes);
17197 let mut threaded_sa = expected_sa.clone();
17198 let mut expected_bucket = vec![0; ALPHABET_SIZE];
17199 let mut threaded_bucket = expected_bucket.clone();
17200 let mut thread_state = alloc_thread_state(4).unwrap();
17201
17202 final_sorting_scan_left_to_right_8u(
17203 &t,
17204 &mut expected_sa,
17205 &mut expected_bucket,
17206 block_start as FastSint,
17207 block_size as FastSint,
17208 );
17209 final_sorting_scan_left_to_right_8u_block_omp(
17210 &t,
17211 &mut threaded_sa,
17212 ALPHABET_SIZE as SaSint,
17213 &mut threaded_bucket,
17214 block_start as FastSint,
17215 block_size as FastSint,
17216 4,
17217 &mut thread_state,
17218 );
17219
17220 assert_eq!(threaded_sa, expected_sa);
17221 assert_eq!(threaded_bucket, expected_bucket);
17222 }
17223
17224 #[test]
17225 fn libsais64_final_sorting_scan_right_to_left_32s_block_omp_runs_block_pipeline() {
17226 let t = vec![0, 1, 2, 1, 0];
17227 let mut sa = vec![0, 2, 0, 0];
17228 let mut induction_bucket = vec![1, 2, 3];
17229 let mut expected_sa = sa.clone();
17230 let mut expected_bucket = induction_bucket.clone();
17231 let mut cache = vec![ThreadCache::default(); LIBSAIS_PER_THREAD_CACHE_SIZE];
17232
17233 final_sorting_scan_right_to_left_32s(&t, &mut expected_sa, &mut expected_bucket, 0, 2);
17234 final_sorting_scan_right_to_left_32s_block_omp(
17235 &t,
17236 &mut sa,
17237 &mut induction_bucket,
17238 &mut cache,
17239 0,
17240 2,
17241 2,
17242 );
17243
17244 assert_eq!(sa, expected_sa);
17245 assert_eq!(induction_bucket, expected_bucket);
17246 }
17247
17248 #[test]
17249 fn libsais64_bwt_copy_8u_copies_low_bytes_from_suffix_array_storage() {
17250 let a = vec![65, 255, 256, -1];
17251 let mut u = vec![0_u8; 4];
17252
17253 bwt_copy_8u(&mut u, &a, 4);
17254
17255 assert_eq!(u, vec![65, 255, 0, 255]);
17256 }
17257
17258 #[test]
17259 fn libsais64_bwt_copy_8u_omp_matches_sequential_copy() {
17260 let a = vec![1, 2, 3, 4, 5];
17261 let mut u = vec![0_u8; 5];
17262
17263 bwt_copy_8u_omp(&mut u, &a, 5, 4);
17264
17265 assert_eq!(u, vec![1, 2, 3, 4, 5]);
17266 }
17267
17268 #[test]
17269 fn libsais64_conversion_helpers_use_little_endian_word_layout() {
17270 let s = vec![11_u32, 22, 33, 44];
17271 let mut d = vec![0_u64; 4];
17272 convert_32u_to_64u(&s, &mut d, 1, 2);
17273 assert_eq!(d, vec![0, 22, 33, 0]);
17274
17275 let mut words = vec![5_u32, 6, 7, 8, 0, 0, 0, 0];
17276 convert_inplace_32u_to_64u(&mut words, 0, 4);
17277 assert_eq!(words, vec![5, 0, 6, 0, 7, 0, 8, 0]);
17278 convert_inplace_64u_to_32u(&mut words, 0, 4);
17279 assert_eq!(&words[..4], &[5, 6, 7, 8]);
17280
17281 let mut words = vec![9_u32, 10, 11, 12, 0, 0, 0, 0];
17282 convert_inplace_32u_to_64u_omp(&mut words, 4, 2);
17283 assert_eq!(words, vec![9, 0, 10, 0, 11, 0, 12, 0]);
17284 }
17285
17286 #[test]
17287 fn libsais64_32bit_workspace_sizing_matches_upstream_capacity_rules() {
17288 assert_eq!(libsais64_new_32bit_fs(10, 4), Some(18));
17289 assert_eq!(libsais64_new_32bit_fs(i32::MAX as usize - 4, 100), Some(4));
17290 assert_eq!(libsais64_new_32bit_fs(i32::MAX as usize + 1, 0), None);
17291 }
17292
17293 #[test]
17294 fn libsais64_32bit_suffix_adapter_widens_suffix_array_and_frequency() {
17295 let text = b"banana";
17296 let fs = 4;
17297 let new_fs = libsais64_new_32bit_fs(text.len(), fs).expect("small workspace");
17298 let mut sa64 = vec![-1; text.len() + fs as usize];
17299 let mut freq64 = vec![-1; ALPHABET_SIZE];
17300 let rc64 = libsais64_run_32bit_omp(text, &mut sa64, fs, Some(&mut freq64), 2, false)
17301 .expect("small input uses 32-bit adapter");
17302
17303 let mut sa32 = vec![-1; text.len() + new_fs as usize];
17304 let mut freq32 = vec![-1; ALPHABET_SIZE];
17305 let rc32 = crate::libsais_omp(text, &mut sa32, new_fs, Some(&mut freq32), 2);
17306
17307 assert_eq!(rc64, SaSint::from(rc32));
17308 assert_eq!(
17309 &sa64[..text.len()],
17310 &sa32[..text.len()]
17311 .iter()
17312 .map(|&value| SaSint::from(value as u32))
17313 .collect::<Vec<_>>()
17314 );
17315 assert_eq!(freq64[b'a' as usize], 3);
17316 assert_eq!(freq64[b'b' as usize], 1);
17317 assert_eq!(freq64[b'n' as usize], 2);
17318 assert_eq!(
17319 freq64[..ALPHABET_SIZE],
17320 freq32
17321 .iter()
17322 .map(|&value| SaSint::from(value))
17323 .collect::<Vec<_>>()
17324 );
17325 }
17326
17327 #[test]
17328 fn libsais64_32bit_gsa_adapter_widens_suffix_array_and_frequency() {
17329 let text = b"ban\0ana\0";
17330 let fs = 2;
17331 let mut sa64 = vec![-1; text.len() + fs as usize];
17332 let mut freq64 = vec![-1; ALPHABET_SIZE];
17333
17334 let rc = libsais64_run_32bit_omp(text, &mut sa64, fs, Some(&mut freq64), 2, true)
17335 .expect("small GSA input uses 32-bit adapter");
17336
17337 let mut direct_sa = vec![0; text.len()];
17338 let mut direct_freq = vec![0; ALPHABET_SIZE];
17339 assert_eq!(
17340 crate::libsais_gsa(text, &mut direct_sa, 0, Some(&mut direct_freq)),
17341 0
17342 );
17343 assert_eq!(rc, 0);
17344 assert_eq!(
17345 sa64[..text.len()],
17346 direct_sa
17347 .iter()
17348 .map(|&value| SaSint::from(value as u32))
17349 .collect::<Vec<_>>()
17350 );
17351 assert_eq!(
17352 freq64[..ALPHABET_SIZE],
17353 direct_freq
17354 .iter()
17355 .map(|&value| SaSint::from(value))
17356 .collect::<Vec<_>>()
17357 );
17358 }
17359
17360 #[test]
17361 fn libsais64_32bit_bwt_adapters_widen_frequency_and_aux_samples() {
17362 let text = b"mississippi";
17363 let fs = 6;
17364 let r = 4;
17365
17366 let mut bwt64 = vec![0; text.len()];
17367 let mut freq64 = vec![-1; ALPHABET_SIZE];
17368 let primary64 = libsais64_bwt_run_32bit_omp(text, &mut bwt64, fs, Some(&mut freq64), 2)
17369 .expect("small input uses 32-bit BWT adapter");
17370
17371 let mut bwt32 = vec![0; text.len()];
17372 let mut work32 = vec![0; text.len() + fs as usize * 2 + text.len()];
17373 let mut freq32 = vec![-1; ALPHABET_SIZE];
17374 let primary32 =
17375 crate::libsais_bwt_omp(text, &mut bwt32, &mut work32, 23, Some(&mut freq32), 2);
17376
17377 assert_eq!(primary64, SaSint::from(primary32));
17378 assert_eq!(bwt64, bwt32);
17379 assert_eq!(
17380 freq64[..ALPHABET_SIZE],
17381 freq32
17382 .iter()
17383 .map(|&value| SaSint::from(value))
17384 .collect::<Vec<_>>()
17385 );
17386
17387 let mut aux_bwt64 = vec![0; text.len()];
17388 let mut aux64 = vec![-1; (text.len() - 1) / r as usize + 1];
17389 let mut aux_freq64 = vec![-1; ALPHABET_SIZE];
17390 let rc64 = libsais64_bwt_aux_run_32bit_omp(
17391 text,
17392 &mut aux_bwt64,
17393 fs,
17394 Some(&mut aux_freq64),
17395 r,
17396 &mut aux64,
17397 2,
17398 )
17399 .expect("small input uses 32-bit aux BWT adapter");
17400
17401 let mut aux_bwt32 = vec![0; text.len()];
17402 let mut aux_work32 = vec![0; text.len() + fs as usize * 2 + text.len()];
17403 let mut aux32 = vec![-1; aux64.len()];
17404 let mut aux_freq32 = vec![-1; ALPHABET_SIZE];
17405 let rc32 = crate::libsais_bwt_aux_omp(
17406 text,
17407 &mut aux_bwt32,
17408 &mut aux_work32,
17409 23,
17410 Some(&mut aux_freq32),
17411 r as i32,
17412 &mut aux32,
17413 2,
17414 );
17415
17416 assert_eq!(rc64, SaSint::from(rc32));
17417 assert_eq!(aux_bwt64, aux_bwt32);
17418 assert_eq!(
17419 aux64,
17420 aux32
17421 .iter()
17422 .map(|&value| SaSint::from(value))
17423 .collect::<Vec<_>>()
17424 );
17425 assert_eq!(
17426 aux_freq64[..ALPHABET_SIZE],
17427 aux_freq32
17428 .iter()
17429 .map(|&value| SaSint::from(value))
17430 .collect::<Vec<_>>()
17431 );
17432 }
17433
17434 #[test]
17435 fn libsais64_bwt_copy_8u_omp_uses_block_partition_for_large_inputs() {
17436 let n = 65_600usize;
17437 let a: Vec<SaSint> = (0..n).map(|i| (i * 17) as SaSint).collect();
17438 let mut threaded = vec![0; n];
17439 let mut sequential = vec![0; n];
17440
17441 bwt_copy_8u_omp(&mut threaded, &a, n as SaSint, 4);
17442 bwt_copy_8u(&mut sequential, &a, n as SaSint);
17443
17444 assert_eq!(threaded, sequential);
17445 }
17446
17447 #[test]
17448 fn libsais64_flip_suffix_markers_omp_uses_block_partition_for_large_inputs() {
17449 let n = 65_600usize;
17450 let mut single: Vec<SaSint> = (0..n).map(|i| (i as SaSint) ^ SAINT_MIN).collect();
17451 let mut threaded = single.clone();
17452
17453 flip_suffix_markers_omp(&mut single, n as SaSint, 1);
17454 flip_suffix_markers_omp(&mut threaded, n as SaSint, 4);
17455
17456 assert_eq!(threaded, single);
17457 }
17458
17459 #[test]
17460 fn libsais64_renumber_lms_suffixes_8u_writes_names_into_second_half() {
17461 let mut sa = vec![1 | SAINT_MIN, 3, 0, 0];
17462
17463 let name = renumber_lms_suffixes_8u(&mut sa, 2, 0, 0, 2);
17464
17465 assert_eq!(name, 1);
17466 assert_eq!(sa[2], SAINT_MIN);
17467 assert_eq!(sa[3], SAINT_MIN | 1);
17468 }
17469
17470 #[test]
17471 fn libsais64_renumber_lms_suffixes_8u_matches_upstream_c_helper() {
17472 let mut sa_rust = vec![1 | SAINT_MIN, 3, 0, 0];
17473 let mut sa_c = sa_rust.clone();
17474
17475 let rust_name = renumber_lms_suffixes_8u(&mut sa_rust, 2, 0, 0, 2);
17476 let c_name =
17477 unsafe { probe_libsais64_renumber_lms_suffixes_8u(sa_c.as_mut_ptr(), 2, 0, 0, 2) };
17478
17479 assert_eq!(rust_name, c_name);
17480 assert_eq!(sa_rust, sa_c);
17481 }
17482
17483 #[test]
17484 fn libsais64_gather_marked_lms_suffixes_moves_negative_marked_entries_to_tail() {
17485 let mut sa = vec![0, 0, 1 | SAINT_MIN, 3];
17486
17487 let l = gather_marked_lms_suffixes(&mut sa, 2, 4, 0, 2);
17488
17489 assert_eq!(l, 3);
17490 assert_eq!(sa[3], 1);
17491 }
17492
17493 #[test]
17494 fn libsais64_gather_marked_lms_suffixes_matches_upstream_c_helper() {
17495 let mut sa_rust = vec![0, 0, 1 | SAINT_MIN, 3];
17496 let mut sa_c = sa_rust.clone();
17497
17498 let rust_l = gather_marked_lms_suffixes(&mut sa_rust, 2, 4, 0, 2);
17499 let c_l =
17500 unsafe { probe_libsais64_gather_marked_lms_suffixes(sa_c.as_mut_ptr(), 2, 4, 0, 2) };
17501
17502 assert_eq!(rust_l, c_l);
17503 assert_eq!(sa_rust, sa_c);
17504 }
17505
17506 #[test]
17507 fn libsais64_renumber_lms_suffixes_8u_omp_wraps_sequential_version() {
17508 let mut sa = vec![1 | SAINT_MIN, 3, 0, 0];
17509 let mut thread_state = alloc_thread_state(2).unwrap();
17510
17511 let name = renumber_lms_suffixes_8u_omp(&mut sa, 2, 2, &mut thread_state);
17512
17513 assert_eq!(name, 1);
17514 assert_eq!(sa[2], SAINT_MIN);
17515 }
17516
17517 #[test]
17518 fn libsais64_renumber_lms_suffixes_8u_omp_uses_block_partition_for_large_inputs() {
17519 let m = 65_600usize;
17520 let mut input = vec![0; 2 * m];
17521 for (i, slot) in input[..m].iter_mut().enumerate() {
17522 let suffix = (2 * i + 1) as SaSint;
17523 *slot = if i % 5 == 0 {
17524 suffix | SAINT_MIN
17525 } else {
17526 suffix
17527 };
17528 }
17529
17530 let mut single = input.clone();
17531 let mut threaded = input;
17532 let mut thread_state = alloc_thread_state(4).unwrap();
17533 let single_name = renumber_lms_suffixes_8u(&mut single, m as SaSint, 0, 0, m as FastSint);
17534 let threaded_name =
17535 renumber_lms_suffixes_8u_omp(&mut threaded, m as SaSint, 4, &mut thread_state);
17536
17537 assert_eq!(threaded_name, single_name);
17538 assert_eq!(threaded, single);
17539 }
17540
17541 #[test]
17542 fn libsais64_gather_marked_lms_suffixes_omp_uses_block_partition_for_large_inputs() {
17543 let n = 131_200usize;
17544 let half_n = n >> 1;
17545 let mut input = vec![-77; n];
17546 for (i, slot) in input[..half_n].iter_mut().enumerate() {
17547 let suffix = (3 * i + 1) as SaSint;
17548 *slot = if i % 7 == 0 {
17549 suffix | SAINT_MIN
17550 } else {
17551 suffix
17552 };
17553 }
17554 let marked_count = input[..half_n].iter().filter(|&&value| value < 0).count();
17555
17556 let mut single = input.clone();
17557 let mut threaded = input;
17558 let mut thread_state = alloc_thread_state(4).unwrap();
17559 let _ = gather_marked_lms_suffixes(&mut single, 0, n as FastSint, 0, half_n as FastSint);
17560 gather_marked_lms_suffixes_omp(&mut threaded, n as SaSint, 0, 0, 4, &mut thread_state);
17561
17562 assert_eq!(&threaded[n - marked_count..], &single[n - marked_count..]);
17563 }
17564
17565 #[test]
17566 fn libsais64_renumber_and_gather_lms_suffixes_omp_uses_large_input_paths() {
17567 let m = 65_600usize;
17568 let n = 2 * m;
17569 let mut input = vec![0; n];
17570 for (i, slot) in input[..m].iter_mut().enumerate() {
17571 let suffix = (2 * i + 1) as SaSint;
17572 *slot = if i % 5 == 0 {
17573 suffix | SAINT_MIN
17574 } else {
17575 suffix
17576 };
17577 }
17578
17579 let mut single = input.clone();
17580 let mut threaded = input;
17581 let mut single_state = alloc_thread_state(1).unwrap();
17582 let mut threaded_state = alloc_thread_state(4).unwrap();
17583 let single_name = renumber_and_gather_lms_suffixes_omp(
17584 &mut single,
17585 n as SaSint,
17586 m as SaSint,
17587 0,
17588 1,
17589 &mut single_state,
17590 );
17591 let threaded_name = renumber_and_gather_lms_suffixes_omp(
17592 &mut threaded,
17593 n as SaSint,
17594 m as SaSint,
17595 0,
17596 4,
17597 &mut threaded_state,
17598 );
17599
17600 assert_eq!(threaded_name, single_name);
17601 assert_eq!(threaded, single);
17602 }
17603
17604 #[test]
17605 fn libsais64_renumber_and_gather_lms_suffixes_omp_gathers_when_names_are_not_distinct() {
17606 let mut sa = vec![1 | SAINT_MIN, 3, 0, 0];
17607 let mut thread_state = alloc_thread_state(2).unwrap();
17608
17609 let name = renumber_and_gather_lms_suffixes_omp(&mut sa, 4, 2, 0, 2, &mut thread_state);
17610
17611 assert_eq!(name, 1);
17612 assert_eq!(sa[3], 1);
17613 }
17614
17615 #[test]
17616 fn libsais64_renumber_and_gather_lms_suffixes_omp_matches_upstream_c_helper() {
17617 let mut sa_rust = vec![1 | SAINT_MIN, 3, 0, 0];
17618 let mut sa_c = sa_rust.clone();
17619 let mut thread_state = alloc_thread_state(2).unwrap();
17620
17621 let rust_name =
17622 renumber_and_gather_lms_suffixes_omp(&mut sa_rust, 4, 2, 0, 2, &mut thread_state);
17623 let c_name = unsafe {
17624 probe_libsais64_renumber_and_gather_lms_suffixes_omp(sa_c.as_mut_ptr(), 4, 2, 0, 2)
17625 };
17626
17627 assert_eq!(rust_name, c_name);
17628 assert_eq!(sa_rust, sa_c);
17629 }
17630
17631 #[test]
17632 fn libsais64_renumber_distinct_lms_suffixes_32s_4k_masks_sources_and_writes_second_half() {
17633 let mut sa = vec![1 | SAINT_MIN, 3 | SAINT_MIN, 0, 0];
17634
17635 let name = renumber_distinct_lms_suffixes_32s_4k(&mut sa, 2, 1, 0, 2);
17636
17637 assert_eq!(name, 3);
17638 assert_eq!(sa[0], 1);
17639 assert_eq!(sa[1], 3);
17640 assert_eq!(sa[2], 1);
17641 assert_eq!(sa[3], 2 | SAINT_MIN);
17642 }
17643
17644 #[test]
17645 fn libsais64_renumber_distinct_lms_suffixes_32s_4k_matches_upstream_c_helper() {
17646 let mut sa_rust = vec![1 | SAINT_MIN, 3 | SAINT_MIN, 0, 0];
17647 let mut sa_c = sa_rust.clone();
17648
17649 let rust_name = renumber_distinct_lms_suffixes_32s_4k(&mut sa_rust, 2, 1, 0, 2);
17650 let c_name = unsafe {
17651 probe_libsais64_renumber_distinct_lms_suffixes_32s_4k(sa_c.as_mut_ptr(), 2, 1, 0, 2)
17652 };
17653
17654 assert_eq!(rust_name, c_name);
17655 assert_eq!(sa_rust, sa_c);
17656 }
17657
17658 #[test]
17659 fn libsais64_mark_distinct_lms_suffixes_32s_propagates_previous_nonzero_marker() {
17660 let mut sa = vec![0, 0, SAINT_MIN | 5, 0, SAINT_MIN | 7];
17661
17662 mark_distinct_lms_suffixes_32s(&mut sa, 2, 0, 3);
17663
17664 assert_eq!(sa[2], 5);
17665 assert_eq!(sa[3], 0);
17666 assert_eq!(sa[4], SAINT_MIN | 7);
17667 }
17668
17669 #[test]
17670 fn libsais64_clamp_lms_suffixes_length_32s_keeps_only_negative_lengths() {
17671 let mut sa = vec![0, 0, SAINT_MIN | 5, 7, SAINT_MIN | 3];
17672
17673 clamp_lms_suffixes_length_32s(&mut sa, 2, 0, 3);
17674
17675 assert_eq!(sa[2], 5);
17676 assert_eq!(sa[3], 0);
17677 assert_eq!(sa[4], 3);
17678 }
17679
17680 #[test]
17681 fn libsais64_renumber_and_mark_distinct_lms_suffixes_32s_4k_omp_marks_second_half_when_names_repeat(
17682 ) {
17683 let mut sa = vec![1 | SAINT_MIN, 3 | SAINT_MIN, 0, 0];
17684 let mut thread_state = alloc_thread_state(2).unwrap();
17685
17686 let name =
17687 renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(&mut sa, 4, 2, 2, &mut thread_state);
17688
17689 assert_eq!(name, 2);
17690 assert_eq!(sa[2], 1);
17691 assert_eq!(sa[3], SAINT_MIN | 2);
17692 }
17693
17694 #[test]
17695 fn libsais64_renumber_and_mark_distinct_lms_suffixes_32s_4k_omp_matches_upstream_c_helper() {
17696 let mut sa_rust = vec![1 | SAINT_MIN, 3 | SAINT_MIN, 0, 0];
17697 let mut sa_c = sa_rust.clone();
17698 let mut thread_state = alloc_thread_state(2).unwrap();
17699
17700 let rust_name = renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(
17701 &mut sa_rust,
17702 4,
17703 2,
17704 2,
17705 &mut thread_state,
17706 );
17707 let c_name = unsafe {
17708 probe_libsais64_renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(
17709 sa_c.as_mut_ptr(),
17710 4,
17711 2,
17712 2,
17713 )
17714 };
17715
17716 assert_eq!(rust_name, c_name);
17717 assert_eq!(sa_rust, sa_c);
17718 }
17719
17720 #[test]
17721 fn libsais64_renumber_and_mark_distinct_lms_suffixes_32s_1k_omp_handles_single_lms_suffix() {
17722 let t = vec![2, 1, 0];
17723 let mut sa = vec![0; t.len()];
17724
17725 let name = renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(&t, &mut sa, 3, 1, 1);
17726
17727 assert_eq!(name, 1);
17728 assert_eq!(sa[1], SAINT_MIN | 1);
17729 }
17730
17731 #[test]
17732 fn libsais64_main_32s_entry_matches_public_c_long_on_6k_branch() {
17733 assert_libsais64_main_32s_entry_matches_public_c_long_for_branch(300);
17734 }
17735
17736 #[test]
17737 fn libsais64_main_32s_entry_matches_public_c_long_on_4k_branch() {
17738 assert_libsais64_main_32s_entry_matches_public_c_long_for_branch(400);
17739 }
17740
17741 #[test]
17742 fn libsais64_main_32s_entry_matches_public_c_long_on_2k_branch() {
17743 assert_libsais64_main_32s_entry_matches_public_c_long_for_branch(700);
17744 }
17745
17746 #[test]
17747 fn libsais64_main_32s_entry_matches_public_c_long_on_1k_branch() {
17748 assert_libsais64_main_32s_entry_matches_public_c_long_for_branch(1501);
17749 }
17750
17751 #[test]
17752 fn libsais64_main_32s_entry_matches_public_c_long_on_recursive_repetitive_6k_case() {
17753 assert_libsais64_main_32s_entry_matches_public_c_long(
17754 make_libsais64_recursive_main_32s_text(24),
17755 300,
17756 0,
17757 true,
17758 );
17759 }
17760
17761 #[test]
17762 fn libsais64_main_32s_entry_matches_public_c_long_on_recursive_repetitive_1k_case() {
17763 assert_libsais64_main_32s_entry_matches_public_c_long(
17764 make_libsais64_recursive_main_32s_text(24),
17765 1501,
17766 0,
17767 true,
17768 );
17769 }
17770
17771 #[test]
17772 fn libsais64_main_32s_entry_matches_public_c_long_on_large_generated_6k_case() {
17773 assert_libsais64_main_32s_entry_matches_public_c_long(
17774 make_libsais64_large_main_32s_stress_text(1024, 300),
17775 300,
17776 0,
17777 true,
17778 );
17779 }
17780
17781 #[test]
17782 fn libsais64_main_32s_entry_matches_public_c_long_on_large_generated_6k_case_with_fs() {
17783 assert_libsais64_main_32s_entry_matches_public_c_long(
17784 make_libsais64_large_main_32s_stress_text(1024, 300),
17785 300,
17786 2048,
17787 false,
17788 );
17789 }
17790
17791 #[test]
17792 fn libsais64_main_32s_entry_matches_public_c_long_on_large_generated_4k_case() {
17793 assert_libsais64_main_32s_entry_matches_public_c_long(
17794 make_libsais64_large_main_32s_stress_text(1024, 400),
17795 400,
17796 0,
17797 true,
17798 );
17799 }
17800
17801 #[test]
17802 fn libsais64_main_32s_entry_matches_public_c_long_on_large_generated_4k_case_with_fs() {
17803 assert_libsais64_main_32s_entry_matches_public_c_long(
17804 make_libsais64_large_main_32s_stress_text(1024, 400),
17805 400,
17806 2048,
17807 false,
17808 );
17809 }
17810
17811 #[test]
17812 fn libsais64_main_32s_entry_matches_public_c_long_on_large_generated_2k_case() {
17813 assert_libsais64_main_32s_entry_matches_public_c_long(
17814 make_libsais64_large_main_32s_stress_text(1024, 700),
17815 700,
17816 0,
17817 true,
17818 );
17819 }
17820
17821 #[test]
17822 fn libsais64_main_32s_entry_matches_public_c_long_on_large_generated_2k_case_with_fs() {
17823 assert_libsais64_main_32s_entry_matches_public_c_long(
17824 make_libsais64_large_main_32s_stress_text(1024, 700),
17825 700,
17826 2048,
17827 false,
17828 );
17829 }
17830
17831 #[test]
17832 fn libsais64_main_32s_entry_matches_public_c_long_on_large_generated_1k_case_with_fs() {
17833 assert_libsais64_main_32s_entry_matches_public_c_long(
17834 make_libsais64_large_main_32s_stress_text(1024, 1501),
17835 1501,
17836 2048,
17837 false,
17838 );
17839 }
17840
17841 #[test]
17842 fn libsais64_reconstruct_lms_suffixes_maps_indices_from_tail_interval() {
17843 let mut sa = vec![0, 1, 2, 7, 11, 13];
17844
17845 reconstruct_lms_suffixes(&mut sa, 6, 3, 0, 3);
17846
17847 assert_eq!(&sa[..3], &[7, 11, 13]);
17848 }
17849
17850 #[test]
17851 fn libsais64_reconstruct_lms_suffixes_omp_wraps_sequential_version() {
17852 let mut sa = vec![0, 1, 2, 7, 11, 13];
17853
17854 reconstruct_lms_suffixes_omp(&mut sa, 6, 3, 2);
17855
17856 assert_eq!(&sa[..3], &[7, 11, 13]);
17857 }
17858
17859 #[test]
17860 fn libsais64_lms_late_omp_wrappers_use_block_partitions_for_large_inputs() {
17861 let m = 65_600usize;
17862 let n = 2 * m;
17863 let mut input = vec![0; n];
17864 for (i, slot) in input[..m].iter_mut().enumerate() {
17865 let suffix = (2 * i + 1) as SaSint;
17866 *slot = if i % 5 == 0 {
17867 suffix | SAINT_MIN
17868 } else {
17869 suffix
17870 };
17871 }
17872
17873 let mut single = input.clone();
17874 let mut threaded = input.clone();
17875 let mut thread_state = alloc_thread_state(4).unwrap();
17876 let single_name = renumber_lms_suffixes_8u(&mut single, m as SaSint, 0, 0, m as FastSint);
17877 let threaded_name =
17878 renumber_lms_suffixes_8u_omp(&mut threaded, m as SaSint, 4, &mut thread_state);
17879 assert_eq!(threaded_name, single_name);
17880 assert_eq!(threaded, single);
17881
17882 let mut single = input.clone();
17883 let mut threaded = input.clone();
17884 let mut single_state = alloc_thread_state(1).unwrap();
17885 let mut threaded_state = alloc_thread_state(4).unwrap();
17886 let single_name = renumber_and_gather_lms_suffixes_omp(
17887 &mut single,
17888 n as SaSint,
17889 m as SaSint,
17890 0,
17891 1,
17892 &mut single_state,
17893 );
17894 let threaded_name = renumber_and_gather_lms_suffixes_omp(
17895 &mut threaded,
17896 n as SaSint,
17897 m as SaSint,
17898 0,
17899 4,
17900 &mut threaded_state,
17901 );
17902 assert_eq!(threaded_name, single_name);
17903 assert_eq!(threaded, single);
17904
17905 let mut single = input.clone();
17906 let mut threaded = input;
17907 let marked_count = single[..m].iter().filter(|&&value| value < 0).count();
17908 let _ = gather_marked_lms_suffixes(&mut single, 0, n as FastSint, 0, m as FastSint);
17909 gather_marked_lms_suffixes_omp(&mut threaded, n as SaSint, 0, 0, 4, &mut thread_state);
17910 assert_eq!(&threaded[n - marked_count..], &single[n - marked_count..]);
17911 }
17912
17913 #[test]
17914 fn libsais64_reconstruct_lms_suffixes_omp_uses_block_partition_for_large_inputs() {
17915 let m = 65_600usize;
17916 let n = 2 * m;
17917 let mut input = vec![0; n];
17918 for (i, slot) in input[..m].iter_mut().enumerate() {
17919 *slot = (m - 1 - i) as SaSint;
17920 }
17921 for (i, slot) in input[m..].iter_mut().enumerate() {
17922 *slot = (i * 17 + 3) as SaSint;
17923 }
17924
17925 let mut single = input.clone();
17926 let mut threaded = input;
17927 reconstruct_lms_suffixes(&mut single, n as SaSint, m as SaSint, 0, m as FastSint);
17928 reconstruct_lms_suffixes_omp(&mut threaded, n as SaSint, m as SaSint, 4);
17929
17930 assert_eq!(threaded, single);
17931 }
17932
17933 #[test]
17934 fn libsais64_renumber_unique_and_nonunique_lms_suffixes_32s_marks_new_unique_names() {
17935 let mut t = vec![0, 0, 0, 0];
17936 let mut sa = vec![0, 2, -1, 5];
17937
17938 let f = renumber_unique_and_nonunique_lms_suffixes_32s(&mut t, &mut sa, 2, 0, 0, 2);
17939
17940 assert_eq!(f, 1);
17941 assert_eq!(t[0], SAINT_MIN);
17942 assert_eq!(sa[2], SAINT_MIN);
17943 assert_eq!(sa[3], 4);
17944 }
17945
17946 #[test]
17947 fn libsais64_renumber_unique_and_nonunique_lms_suffixes_32s_matches_upstream_c_helper() {
17948 let mut t_rust = vec![0, 0, 0, 0];
17949 let mut sa_rust = vec![0, 2, -1, 5];
17950 let mut t_c = t_rust.clone();
17951 let mut sa_c = sa_rust.clone();
17952
17953 let rust_f =
17954 renumber_unique_and_nonunique_lms_suffixes_32s(&mut t_rust, &mut sa_rust, 2, 0, 0, 2);
17955 let c_f = unsafe {
17956 probe_libsais64_renumber_unique_and_nonunique_lms_suffixes_32s(
17957 t_c.as_mut_ptr(),
17958 sa_c.as_mut_ptr(),
17959 2,
17960 0,
17961 0,
17962 2,
17963 )
17964 };
17965
17966 assert_eq!(rust_f, c_f);
17967 assert_eq!(t_rust, t_c);
17968 assert_eq!(sa_rust, sa_c);
17969 }
17970
17971 #[test]
17972 fn libsais64_renumber_unique_and_nonunique_lms_suffixes_32s_omp_matches_upstream_c_helper() {
17973 let mut t_rust = vec![0, 0, 0, 0];
17974 let mut sa_rust = vec![0, 2, -1, 5];
17975 let mut t_c = t_rust.clone();
17976 let mut sa_c = sa_rust.clone();
17977 let mut thread_state = alloc_thread_state(1).unwrap();
17978
17979 let rust_f = renumber_unique_and_nonunique_lms_suffixes_32s_omp(
17980 &mut t_rust,
17981 &mut sa_rust,
17982 2,
17983 1,
17984 &mut thread_state,
17985 );
17986 let c_f = unsafe {
17987 probe_libsais64_renumber_unique_and_nonunique_lms_suffixes_32s_omp(
17988 t_c.as_mut_ptr(),
17989 sa_c.as_mut_ptr(),
17990 2,
17991 1,
17992 )
17993 };
17994
17995 assert_eq!(rust_f, c_f);
17996 assert_eq!(t_rust, t_c);
17997 assert_eq!(sa_rust, sa_c);
17998 }
17999
18000 #[test]
18001 fn libsais64_renumber_unique_and_nonunique_lms_suffixes_32s_omp_uses_block_partition() {
18002 let m = 65_600usize;
18003 let n = 2 * m;
18004 let t = vec![0; n];
18005 let mut sa = vec![0; n];
18006 for i in 0..m {
18007 sa[i] = (2 * i) as SaSint;
18008 sa[m + i] = if i % 5 == 0 {
18009 -((i as SaSint) + 1)
18010 } else {
18011 i as SaSint + 7
18012 };
18013 }
18014
18015 let mut single_t = t.clone();
18016 let mut single_sa = sa.clone();
18017 let mut threaded_t = t;
18018 let mut threaded_sa = sa;
18019 let mut thread_state = alloc_thread_state(4).unwrap();
18020 let single_f = renumber_unique_and_nonunique_lms_suffixes_32s(
18021 &mut single_t,
18022 &mut single_sa,
18023 m as SaSint,
18024 0,
18025 0,
18026 m as FastSint,
18027 );
18028 let threaded_f = renumber_unique_and_nonunique_lms_suffixes_32s_omp(
18029 &mut threaded_t,
18030 &mut threaded_sa,
18031 m as SaSint,
18032 4,
18033 &mut thread_state,
18034 );
18035
18036 assert_eq!(threaded_f, single_f);
18037 assert_eq!(threaded_t, single_t);
18038 assert_eq!(threaded_sa, single_sa);
18039 }
18040
18041 #[test]
18042 fn libsais64_compact_unique_and_nonunique_lms_suffixes_32s_splits_unique_and_nonunique_ranges()
18043 {
18044 let mut sa = vec![0, 0, 0, 0, SAINT_MIN, 4];
18045 let mut l = 2;
18046 let mut r = 6;
18047
18048 compact_unique_and_nonunique_lms_suffixes_32s(&mut sa, 2, &mut l, &mut r, 0, 2);
18049
18050 assert_eq!(l, 2);
18051 assert_eq!(r, 6);
18052 assert_eq!(sa[2], 0);
18053 assert_eq!(sa[3] & SAINT_MAX, 0);
18054 }
18055
18056 #[test]
18057 fn libsais64_compact_lms_suffixes_32s_omp_runs_renumber_then_compaction() {
18058 let mut t = vec![0, 0, 0, 0];
18059 let mut sa = vec![0, 2, -1, 5, 77, 88];
18060 let mut thread_state = alloc_thread_state(2).unwrap();
18061
18062 let f = compact_lms_suffixes_32s_omp(&mut t, &mut sa, 4, 2, 2, 2, &mut thread_state);
18063
18064 assert_eq!(f, 1);
18065 assert_eq!(sa[2] & SAINT_MAX, 0);
18066 assert_eq!(sa[5], 3);
18067 }
18068
18069 #[test]
18070 fn libsais64_compact_unique_and_nonunique_lms_suffixes_32s_omp_uses_block_partition() {
18071 let n = 131_200usize;
18072 let m = 65_600usize;
18073 let fs = m + 32;
18074 let half_n = n >> 1;
18075 let f = m / 5;
18076 let mut sa = vec![0; n + fs];
18077 for i in 0..half_n {
18078 sa[m + i] = if i % 5 == 0 {
18079 SAINT_MIN | i as SaSint
18080 } else {
18081 i as SaSint + 1
18082 };
18083 }
18084 for i in 0..f {
18085 sa[m - f + i] = (10_000 + i) as SaSint;
18086 }
18087
18088 let mut single = sa.clone();
18089 let mut threaded = sa;
18090 let mut single_state = alloc_thread_state(1).unwrap();
18091 let mut threaded_state = alloc_thread_state(4).unwrap();
18092 compact_unique_and_nonunique_lms_suffixes_32s_omp(
18093 &mut single,
18094 n as SaSint,
18095 m as SaSint,
18096 fs as SaSint,
18097 f as SaSint,
18098 1,
18099 &mut single_state,
18100 );
18101 compact_unique_and_nonunique_lms_suffixes_32s_omp(
18102 &mut threaded,
18103 n as SaSint,
18104 m as SaSint,
18105 fs as SaSint,
18106 f as SaSint,
18107 4,
18108 &mut threaded_state,
18109 );
18110
18111 let unique_dst = n + fs - m;
18112 assert_eq!(
18113 &threaded[unique_dst..unique_dst + f],
18114 &single[unique_dst..unique_dst + f]
18115 );
18116 }
18117
18118 #[test]
18119 fn libsais64_compact_lms_suffixes_32s_omp_uses_large_input_paths() {
18120 let n = 131_200usize;
18121 let m = 65_600usize;
18122 let fs = m + 32;
18123 let t = vec![0; n];
18124 let mut sa = vec![0; n + fs];
18125 for i in 0..m {
18126 sa[i] = (2 * i) as SaSint;
18127 sa[m + i] = if i % 5 == 0 {
18128 -((i as SaSint) + 1)
18129 } else {
18130 i as SaSint + 7
18131 };
18132 }
18133
18134 let mut single_t = t.clone();
18135 let mut single_sa = sa.clone();
18136 let mut threaded_t = t;
18137 let mut threaded_sa = sa;
18138 let mut single_state = alloc_thread_state(1).unwrap();
18139 let mut threaded_state = alloc_thread_state(4).unwrap();
18140 let single_f = compact_lms_suffixes_32s_omp(
18141 &mut single_t,
18142 &mut single_sa,
18143 n as SaSint,
18144 m as SaSint,
18145 fs as SaSint,
18146 1,
18147 &mut single_state,
18148 );
18149 let threaded_f = compact_lms_suffixes_32s_omp(
18150 &mut threaded_t,
18151 &mut threaded_sa,
18152 n as SaSint,
18153 m as SaSint,
18154 fs as SaSint,
18155 4,
18156 &mut threaded_state,
18157 );
18158
18159 assert_eq!(threaded_f, single_f);
18160 assert_eq!(threaded_t, single_t);
18161 let unique_dst = n + fs - m;
18162 let unique_len = usize::try_from(threaded_f).expect("f must be non-negative");
18163 assert_eq!(
18164 &threaded_sa[unique_dst..unique_dst + unique_len],
18165 &single_sa[unique_dst..unique_dst + unique_len]
18166 );
18167 }
18168
18169 #[test]
18170 fn libsais64_merge_unique_lms_suffixes_32s_noops_for_empty_block() {
18171 let mut t = vec![1, SAINT_MIN, 2, SAINT_MIN];
18172 let mut sa = vec![0, 0, 1, 3];
18173 let before_t = t.clone();
18174 let before_sa = sa.clone();
18175
18176 merge_unique_lms_suffixes_32s(&mut t, &mut sa, 4, 1, 0, 0, 0);
18177
18178 assert_eq!(t, before_t);
18179 assert_eq!(sa, before_sa);
18180 }
18181
18182 #[test]
18183 fn libsais64_merge_unique_lms_suffixes_32s_omp_uses_block_partition_for_large_inputs() {
18184 let n = 65_600usize;
18185 let m = 1_024usize;
18186 let mut t = vec![1; n];
18187 for i in (0..n).step_by(257) {
18188 t[i] = SAINT_MIN | ((i % 251) as SaSint);
18189 }
18190 let f = t.iter().filter(|&&value| value < 0).count();
18191 let mut sa = vec![-1; n];
18192 let src = n - m - 1;
18193 for i in 0..f {
18194 sa[src + i] = i as SaSint;
18195 }
18196
18197 let mut single_t = t.clone();
18198 let mut single_sa = sa.clone();
18199 let mut threaded_t = t;
18200 let mut threaded_sa = sa;
18201 let mut thread_state = alloc_thread_state(4).unwrap();
18202 merge_unique_lms_suffixes_32s_omp(
18203 &mut single_t,
18204 &mut single_sa,
18205 n as SaSint,
18206 m as SaSint,
18207 1,
18208 &mut [],
18209 );
18210 merge_unique_lms_suffixes_32s_omp(
18211 &mut threaded_t,
18212 &mut threaded_sa,
18213 n as SaSint,
18214 m as SaSint,
18215 4,
18216 &mut thread_state,
18217 );
18218
18219 assert_eq!(threaded_t, single_t);
18220 assert_eq!(threaded_sa, single_sa);
18221 }
18222
18223 #[test]
18224 fn libsais64_merge_nonunique_lms_suffixes_32s_noops_for_empty_block() {
18225 let mut sa = vec![0, 7, 0, 13, 11];
18226 let before = sa.clone();
18227
18228 merge_nonunique_lms_suffixes_32s(&mut sa, 4, 1, 0, 0, 0);
18229
18230 assert_eq!(sa, before);
18231 }
18232
18233 #[test]
18234 fn libsais64_merge_compacted_lms_suffixes_32s_omp_preserves_input_text_and_fills_zero_slots() {
18235 let mut t = vec![1, 2, 3, 4];
18236 let mut sa = vec![0, 1, 2, 3, 4, 5];
18237 let before_t = t.clone();
18238 let mut thread_state = alloc_thread_state(2).unwrap();
18239
18240 merge_compacted_lms_suffixes_32s_omp(&mut t, &mut sa, 4, 1, 1, 2, &mut thread_state);
18241
18242 assert_eq!(t, before_t);
18243 assert_eq!(sa[0], 3);
18244 assert_eq!(sa[1], 1);
18245 }
18246
18247 #[test]
18248 fn libsais64_merge_nonunique_lms_suffixes_32s_omp_uses_block_partition_for_large_inputs() {
18249 let n = 131_200usize;
18250 let m = 65_600usize;
18251 let f = 7usize;
18252 let mut sa = vec![1; n];
18253 let zero_count = (0..m).filter(|i| i % 17 == 0).count();
18254 for i in (0..m).step_by(17) {
18255 sa[i] = 0;
18256 }
18257 let src = n - m - 1 + f;
18258 for i in 0..zero_count {
18259 sa[src + i] = 10_000 + i as SaSint;
18260 }
18261
18262 let mut single = sa.clone();
18263 let mut threaded = sa;
18264 let mut thread_state = alloc_thread_state(4).unwrap();
18265 merge_nonunique_lms_suffixes_32s_omp(
18266 &mut single,
18267 n as SaSint,
18268 m as SaSint,
18269 f as SaSint,
18270 1,
18271 &mut [],
18272 );
18273 merge_nonunique_lms_suffixes_32s_omp(
18274 &mut threaded,
18275 n as SaSint,
18276 m as SaSint,
18277 f as SaSint,
18278 4,
18279 &mut thread_state,
18280 );
18281
18282 assert_eq!(threaded, single);
18283 }
18284
18285 #[test]
18286 fn libsais64_merge_compacted_lms_suffixes_32s_omp_uses_block_partition_for_large_inputs() {
18287 let n = 131_200usize;
18288 let m = 65_600usize;
18289 let mut t = vec![1; n];
18290 for i in (0..n).step_by(257) {
18291 t[i] = SAINT_MIN | ((i % 251) as SaSint);
18292 }
18293 let f = t.iter().filter(|&&value| value < 0).count();
18294
18295 let mut sa = vec![1; n];
18296 let zero_count = (0..m).filter(|i| i % 17 == 0).count();
18297 for i in (0..m).step_by(17) {
18298 sa[i] = 0;
18299 }
18300 let unique_src = n - m - 1;
18301 for i in 0..f {
18302 sa[unique_src + i] = i as SaSint;
18303 }
18304 for i in 0..zero_count {
18305 sa[unique_src + f + i] = 10_000 + i as SaSint;
18306 }
18307
18308 let mut single_t = t.clone();
18309 let mut single_sa = sa.clone();
18310 let mut threaded_t = t;
18311 let mut threaded_sa = sa;
18312 let mut single_state = alloc_thread_state(1).unwrap();
18313 let mut threaded_state = alloc_thread_state(4).unwrap();
18314 merge_compacted_lms_suffixes_32s_omp(
18315 &mut single_t,
18316 &mut single_sa,
18317 n as SaSint,
18318 m as SaSint,
18319 f as SaSint,
18320 1,
18321 &mut single_state,
18322 );
18323 merge_compacted_lms_suffixes_32s_omp(
18324 &mut threaded_t,
18325 &mut threaded_sa,
18326 n as SaSint,
18327 m as SaSint,
18328 f as SaSint,
18329 4,
18330 &mut threaded_state,
18331 );
18332
18333 assert_eq!(threaded_t, single_t);
18334 assert_eq!(threaded_sa, single_sa);
18335 }
18336
18337 #[test]
18338 fn libsais64_final_bwt_left_to_right_8u_block_omp_uses_thread_buckets() {
18339 let block_start = 20_000usize;
18340 let block_size = 16_384usize;
18341 let n = block_start + block_size + 8;
18342 let t = vec![1_u8; n];
18343 let suffixes: Vec<SaSint> = (2..2 + block_size).map(|i| i as SaSint).collect();
18344
18345 let mut expected_sa = vec![0; n];
18346 expected_sa[block_start..block_start + block_size].copy_from_slice(&suffixes);
18347 let mut threaded_sa = expected_sa.clone();
18348 let mut expected_bucket = vec![0; ALPHABET_SIZE];
18349 let mut threaded_bucket = expected_bucket.clone();
18350 let mut thread_state = alloc_thread_state(4).unwrap();
18351
18352 final_bwt_scan_left_to_right_8u(
18353 &t,
18354 &mut expected_sa,
18355 &mut expected_bucket,
18356 block_start as FastSint,
18357 block_size as FastSint,
18358 );
18359 final_bwt_scan_left_to_right_8u_block_omp(
18360 &t,
18361 &mut threaded_sa,
18362 ALPHABET_SIZE as SaSint,
18363 &mut threaded_bucket,
18364 block_start as FastSint,
18365 block_size as FastSint,
18366 4,
18367 &mut thread_state,
18368 );
18369
18370 assert_eq!(threaded_sa, expected_sa);
18371 assert_eq!(threaded_bucket, expected_bucket);
18372 }
18373
18374 #[test]
18375 fn libsais64_final_bwt_aux_left_to_right_8u_block_omp_uses_thread_buckets() {
18376 let block_start = 20_000usize;
18377 let block_size = 16_384usize;
18378 let n = block_start + block_size + 8;
18379 let t = vec![1_u8; n];
18380 let suffixes: Vec<SaSint> = (2..2 + block_size).map(|i| i as SaSint).collect();
18381
18382 let mut expected_sa = vec![0; n];
18383 expected_sa[block_start..block_start + block_size].copy_from_slice(&suffixes);
18384 let mut threaded_sa = expected_sa.clone();
18385 let mut expected_i = vec![0; n];
18386 let mut threaded_i = vec![0; n];
18387 let mut expected_bucket = vec![0; ALPHABET_SIZE];
18388 let mut threaded_bucket = expected_bucket.clone();
18389 let mut thread_state = alloc_thread_state(4).unwrap();
18390
18391 final_bwt_aux_scan_left_to_right_8u(
18392 &t,
18393 &mut expected_sa,
18394 0,
18395 &mut expected_i,
18396 &mut expected_bucket,
18397 block_start as FastSint,
18398 block_size as FastSint,
18399 );
18400 final_bwt_aux_scan_left_to_right_8u_block_omp(
18401 &t,
18402 &mut threaded_sa,
18403 ALPHABET_SIZE as SaSint,
18404 0,
18405 &mut threaded_i,
18406 &mut threaded_bucket,
18407 block_start as FastSint,
18408 block_size as FastSint,
18409 4,
18410 &mut thread_state,
18411 );
18412
18413 assert_eq!(threaded_sa, expected_sa);
18414 assert_eq!(threaded_i, expected_i);
18415 assert_eq!(threaded_bucket, expected_bucket);
18416 }
18417
18418 #[test]
18419 fn libsais64_final_sorting_right_to_left_8u_block_omp_uses_thread_buckets() {
18420 let block_start = 20_000usize;
18421 let block_size = 16_384usize;
18422 let n = block_start + block_size + 8;
18423 let t = vec![1_u8; n];
18424 let suffixes: Vec<SaSint> = (2..2 + block_size).map(|i| i as SaSint).collect();
18425
18426 let mut expected_sa = vec![0; n];
18427 expected_sa[block_start..block_start + block_size].copy_from_slice(&suffixes);
18428 let mut threaded_sa = expected_sa.clone();
18429 let mut expected_bucket = vec![0; ALPHABET_SIZE];
18430 expected_bucket[1] = n as SaSint;
18431 let mut threaded_bucket = expected_bucket.clone();
18432 let mut thread_state = alloc_thread_state(4).unwrap();
18433
18434 final_sorting_scan_right_to_left_8u(
18435 &t,
18436 &mut expected_sa,
18437 &mut expected_bucket,
18438 block_start as FastSint,
18439 block_size as FastSint,
18440 );
18441 final_sorting_scan_right_to_left_8u_block_omp(
18442 &t,
18443 &mut threaded_sa,
18444 ALPHABET_SIZE as SaSint,
18445 &mut threaded_bucket,
18446 block_start as FastSint,
18447 block_size as FastSint,
18448 4,
18449 &mut thread_state,
18450 );
18451
18452 assert_eq!(threaded_sa, expected_sa);
18453 assert_eq!(threaded_bucket, expected_bucket);
18454 }
18455
18456 #[test]
18457 fn libsais64_final_bwt_right_to_left_8u_block_omp_uses_thread_buckets() {
18458 let block_start = 20_000usize;
18459 let block_size = 16_384usize;
18460 let n = block_start + block_size + 8;
18461 let t = vec![1_u8; n];
18462 let suffixes: Vec<SaSint> = (2..2 + block_size).map(|i| i as SaSint).collect();
18463
18464 let mut expected_sa = vec![0; n];
18465 expected_sa[block_start..block_start + block_size].copy_from_slice(&suffixes);
18466 let mut threaded_sa = expected_sa.clone();
18467 let mut expected_bucket = vec![0; ALPHABET_SIZE];
18468 expected_bucket[1] = n as SaSint;
18469 let mut threaded_bucket = expected_bucket.clone();
18470 let mut thread_state = alloc_thread_state(4).unwrap();
18471
18472 final_bwt_scan_right_to_left_8u(
18473 &t,
18474 &mut expected_sa,
18475 &mut expected_bucket,
18476 block_start as FastSint,
18477 block_size as FastSint,
18478 );
18479 final_bwt_scan_right_to_left_8u_block_omp(
18480 &t,
18481 &mut threaded_sa,
18482 ALPHABET_SIZE as SaSint,
18483 &mut threaded_bucket,
18484 block_start as FastSint,
18485 block_size as FastSint,
18486 4,
18487 &mut thread_state,
18488 );
18489
18490 assert_eq!(threaded_sa, expected_sa);
18491 assert_eq!(threaded_bucket, expected_bucket);
18492 }
18493
18494 #[test]
18495 fn libsais64_final_bwt_aux_right_to_left_8u_block_omp_uses_thread_buckets() {
18496 let block_start = 20_000usize;
18497 let block_size = 16_384usize;
18498 let n = block_start + block_size + 8;
18499 let t = vec![1_u8; n];
18500 let suffixes: Vec<SaSint> = (2..2 + block_size).map(|i| i as SaSint).collect();
18501
18502 let mut expected_sa = vec![0; n];
18503 expected_sa[block_start..block_start + block_size].copy_from_slice(&suffixes);
18504 let mut threaded_sa = expected_sa.clone();
18505 let mut expected_i = vec![0; n];
18506 let mut threaded_i = vec![0; n];
18507 let mut expected_bucket = vec![0; ALPHABET_SIZE];
18508 expected_bucket[1] = n as SaSint;
18509 let mut threaded_bucket = expected_bucket.clone();
18510 let mut thread_state = alloc_thread_state(4).unwrap();
18511
18512 final_bwt_aux_scan_right_to_left_8u(
18513 &t,
18514 &mut expected_sa,
18515 0,
18516 &mut expected_i,
18517 &mut expected_bucket,
18518 block_start as FastSint,
18519 block_size as FastSint,
18520 );
18521 final_bwt_aux_scan_right_to_left_8u_block_omp(
18522 &t,
18523 &mut threaded_sa,
18524 ALPHABET_SIZE as SaSint,
18525 0,
18526 &mut threaded_i,
18527 &mut threaded_bucket,
18528 block_start as FastSint,
18529 block_size as FastSint,
18530 4,
18531 &mut thread_state,
18532 );
18533
18534 assert_eq!(threaded_sa, expected_sa);
18535 assert_eq!(threaded_i, expected_i);
18536 assert_eq!(threaded_bucket, expected_bucket);
18537 }
18538
18539 #[test]
18540 fn libsais64_final_gsa_right_to_left_8u_block_omp_uses_thread_buckets() {
18541 let block_start = 20_000usize;
18542 let block_size = 16_384usize;
18543 let n = block_start + block_size + 8;
18544 let t = vec![1_u8; n];
18545 let suffixes: Vec<SaSint> = (2..2 + block_size).map(|i| i as SaSint).collect();
18546
18547 let mut expected_sa = vec![0; n];
18548 expected_sa[block_start..block_start + block_size].copy_from_slice(&suffixes);
18549 let mut threaded_sa = expected_sa.clone();
18550 let mut expected_bucket = vec![0; ALPHABET_SIZE];
18551 expected_bucket[1] = n as SaSint;
18552 let mut threaded_bucket = expected_bucket.clone();
18553 let mut thread_state = alloc_thread_state(4).unwrap();
18554
18555 final_gsa_scan_right_to_left_8u(
18556 &t,
18557 &mut expected_sa,
18558 &mut expected_bucket,
18559 block_start as FastSint,
18560 block_size as FastSint,
18561 );
18562 final_gsa_scan_right_to_left_8u_block_omp(
18563 &t,
18564 &mut threaded_sa,
18565 ALPHABET_SIZE as SaSint,
18566 &mut threaded_bucket,
18567 block_start as FastSint,
18568 block_size as FastSint,
18569 4,
18570 &mut thread_state,
18571 );
18572
18573 assert_eq!(threaded_sa, expected_sa);
18574 assert_eq!(threaded_bucket, expected_bucket);
18575 }
18576
18577 #[test]
18578 fn libsais64_count_and_gather_lms_suffixes_8u_omp_uses_block_partition_for_large_inputs() {
18579 let n = 65_600usize;
18580 let text: Vec<u8> = (0..n)
18581 .map(|i| 1 + ((i * 37 + i / 17) % 251) as u8)
18582 .collect();
18583
18584 let mut sa_threaded = vec![-99; n];
18585 let mut sa_scalar = vec![-99; n];
18586 let mut buckets_threaded = vec![0; 4 * ALPHABET_SIZE];
18587 let mut buckets_scalar = vec![0; 4 * ALPHABET_SIZE];
18588 let mut thread_state = alloc_thread_state(4).unwrap();
18589
18590 let m_threaded = count_and_gather_lms_suffixes_8u_omp(
18591 &text,
18592 &mut sa_threaded,
18593 n as SaSint,
18594 &mut buckets_threaded,
18595 4,
18596 &mut thread_state,
18597 );
18598 let m_scalar = count_and_gather_lms_suffixes_8u(
18599 &text,
18600 &mut sa_scalar,
18601 n as SaSint,
18602 &mut buckets_scalar,
18603 0,
18604 n as FastSint,
18605 );
18606
18607 assert_eq!(m_threaded, m_scalar);
18608 assert_eq!(
18609 &sa_threaded[n - m_threaded as usize..],
18610 &sa_scalar[n - m_scalar as usize..]
18611 );
18612 assert_eq!(buckets_threaded, buckets_scalar);
18613 }
18614
18615 #[test]
18616 fn libsais64_gather_lms_suffixes_8u_omp_uses_thread_state_for_large_inputs() {
18617 let n = 65_600usize;
18618 let text: Vec<u8> = (0..n)
18619 .map(|i| 1 + ((i * 37 + i / 17) % 251) as u8)
18620 .collect();
18621 let mut thread_state = alloc_thread_state(4).unwrap();
18622 let mut count_sa = vec![-99; n];
18623 let mut buckets = vec![0; 4 * ALPHABET_SIZE];
18624 let m = count_and_gather_lms_suffixes_8u_omp(
18625 &text,
18626 &mut count_sa,
18627 n as SaSint,
18628 &mut buckets,
18629 4,
18630 &mut thread_state,
18631 );
18632
18633 let mut threaded = vec![-99; n];
18634 let mut scalar = vec![-99; n];
18635 gather_lms_suffixes_8u_omp(&text, &mut threaded, n as SaSint, 4, &mut thread_state);
18636 gather_lms_suffixes_8u(
18637 &text,
18638 &mut scalar,
18639 n as SaSint,
18640 n as FastSint - 1,
18641 0,
18642 n as FastSint,
18643 );
18644
18645 assert_eq!(&threaded[n - m as usize..], &scalar[n - m as usize..]);
18646 }
18647
18648 #[test]
18649 fn libsais64_count_and_gather_lms_suffixes_32s_4k_updates_counts_and_suffixes() {
18650 let t = vec![2, 1, 3, 1, 0];
18651 let mut sa = vec![0; t.len()];
18652 let mut buckets = vec![0; 4 * 4];
18653 let m = count_and_gather_lms_suffixes_32s_4k(
18654 &t,
18655 &mut sa,
18656 t.len() as SaSint,
18657 4,
18658 &mut buckets,
18659 0,
18660 t.len() as FastSint,
18661 );
18662 assert!(m >= 0);
18663 assert_eq!(buckets.iter().sum::<SaSint>(), t.len() as SaSint);
18664 }
18665
18666 #[test]
18667 fn libsais64_count_and_gather_lms_suffixes_32s_2k_updates_counts_and_suffixes() {
18668 let t = vec![2, 1, 3, 1, 0];
18669 let mut sa = vec![0; t.len()];
18670 let mut buckets = vec![0; 2 * 4];
18671 let m = count_and_gather_lms_suffixes_32s_2k(
18672 &t,
18673 &mut sa,
18674 t.len() as SaSint,
18675 4,
18676 &mut buckets,
18677 0,
18678 t.len() as FastSint,
18679 );
18680 assert!(m >= 0);
18681 assert_eq!(buckets.iter().sum::<SaSint>(), t.len() as SaSint);
18682 }
18683
18684 #[test]
18685 fn libsais64_count_and_gather_compacted_lms_suffixes_32s_2k_updates_counts_and_suffixes() {
18686 let t = vec![2, SAINT_MIN | 1, 3, 1, 0];
18687 let mut sa = vec![0; t.len()];
18688 let mut buckets = vec![0; 2 * 4];
18689 let m = count_and_gather_compacted_lms_suffixes_32s_2k(
18690 &t,
18691 &mut sa,
18692 t.len() as SaSint,
18693 4,
18694 &mut buckets,
18695 0,
18696 t.len() as FastSint,
18697 );
18698 assert!(m >= 0);
18699 assert_eq!(buckets.iter().sum::<SaSint>(), t.len() as SaSint);
18700 }
18701
18702 #[test]
18703 fn libsais64_count_and_gather_lms_suffixes_32s_4k_nofs_omp_wraps_sequential_version() {
18704 let t = vec![2, 1, 3, 1, 0];
18705 let mut sa = vec![0; t.len()];
18706 let mut buckets = vec![0; 4 * 4];
18707 let m = count_and_gather_lms_suffixes_32s_4k_nofs_omp(
18708 &t,
18709 &mut sa,
18710 t.len() as SaSint,
18711 4,
18712 &mut buckets,
18713 2,
18714 );
18715 assert!(m >= 0);
18716 assert_eq!(buckets.iter().sum::<SaSint>(), t.len() as SaSint);
18717 }
18718
18719 #[test]
18720 fn libsais64_count_and_gather_lms_suffixes_32s_2k_nofs_omp_wraps_sequential_version() {
18721 let t = vec![2, 1, 3, 1, 0];
18722 let mut sa = vec![0; t.len()];
18723 let mut buckets = vec![0; 2 * 4];
18724 let m = count_and_gather_lms_suffixes_32s_2k_nofs_omp(
18725 &t,
18726 &mut sa,
18727 t.len() as SaSint,
18728 4,
18729 &mut buckets,
18730 2,
18731 );
18732 assert!(m >= 0);
18733 assert_eq!(buckets.iter().sum::<SaSint>(), t.len() as SaSint);
18734 }
18735
18736 #[test]
18737 fn libsais64_count_and_gather_compacted_lms_suffixes_32s_2k_nofs_omp_wraps_sequential_version()
18738 {
18739 let t = vec![2, SAINT_MIN | 1, 3, 1, 0];
18740 let mut sa = vec![0; t.len()];
18741 let mut buckets = vec![0; 2 * 4];
18742 let m = count_and_gather_compacted_lms_suffixes_32s_2k_nofs_omp(
18743 &t,
18744 &mut sa,
18745 t.len() as SaSint,
18746 4,
18747 &mut buckets,
18748 2,
18749 );
18750 assert!(m >= 0);
18751 assert_eq!(buckets.iter().sum::<SaSint>(), t.len() as SaSint);
18752 }
18753
18754 #[test]
18755 fn libsais64_count_and_gather_lms_suffixes_32s_nofs_omp_uses_large_input_paths() {
18756 let n = 65_600usize;
18757 let k = 257usize;
18758 let text: Vec<SaSint> = (0..n)
18759 .map(|i| 1 + ((i * 37 + i / 17) % (k - 1)) as SaSint)
18760 .collect();
18761
18762 let mut sa_threaded = vec![-99; n];
18763 let mut sa_scalar = vec![-99; n];
18764 let mut buckets_threaded = vec![0; 4 * k];
18765 let mut buckets_scalar = vec![0; 4 * k];
18766 let m_threaded = count_and_gather_lms_suffixes_32s_4k_nofs_omp(
18767 &text,
18768 &mut sa_threaded,
18769 n as SaSint,
18770 k as SaSint,
18771 &mut buckets_threaded,
18772 4,
18773 );
18774 let m_scalar = count_and_gather_lms_suffixes_32s_4k(
18775 &text,
18776 &mut sa_scalar,
18777 n as SaSint,
18778 k as SaSint,
18779 &mut buckets_scalar,
18780 0,
18781 n as FastSint,
18782 );
18783 assert_eq!(m_threaded, m_scalar);
18784 assert_eq!(
18785 &sa_threaded[n - m_threaded as usize..],
18786 &sa_scalar[n - m_scalar as usize..]
18787 );
18788 assert_eq!(buckets_threaded, buckets_scalar);
18789
18790 let mut sa_threaded = vec![-99; n];
18791 let mut sa_scalar = vec![-99; n];
18792 let mut buckets_threaded = vec![0; 2 * k];
18793 let mut buckets_scalar = vec![0; 2 * k];
18794 let m_threaded = count_and_gather_lms_suffixes_32s_2k_nofs_omp(
18795 &text,
18796 &mut sa_threaded,
18797 n as SaSint,
18798 k as SaSint,
18799 &mut buckets_threaded,
18800 4,
18801 );
18802 let m_scalar = count_and_gather_lms_suffixes_32s_2k(
18803 &text,
18804 &mut sa_scalar,
18805 n as SaSint,
18806 k as SaSint,
18807 &mut buckets_scalar,
18808 0,
18809 n as FastSint,
18810 );
18811 assert_eq!(m_threaded, m_scalar);
18812 assert_eq!(
18813 &sa_threaded[n - m_threaded as usize..],
18814 &sa_scalar[n - m_scalar as usize..]
18815 );
18816 assert_eq!(buckets_threaded, buckets_scalar);
18817 }
18818
18819 #[test]
18820 fn libsais64_count_and_gather_lms_suffixes_32s_fs_omp_uses_large_input_paths() {
18821 let n = 65_600usize;
18822 let k = 257usize;
18823 let text: Vec<SaSint> = (0..n)
18824 .map(|i| 1 + ((i * 37 + i / 17) % (k - 1)) as SaSint)
18825 .collect();
18826 let mut thread_state = alloc_thread_state(4).unwrap();
18827
18828 let mut sa_threaded = vec![-99; n];
18829 let mut sa_scalar = vec![-99; n];
18830 let mut buckets_threaded = vec![0; 4 * k];
18831 let mut buckets_scalar = vec![0; 4 * k];
18832 let m_threaded = count_and_gather_lms_suffixes_32s_4k_fs_omp(
18833 &text,
18834 &mut sa_threaded,
18835 n as SaSint,
18836 k as SaSint,
18837 &mut buckets_threaded,
18838 0,
18839 4,
18840 &mut thread_state,
18841 );
18842 let m_scalar = count_and_gather_lms_suffixes_32s_4k(
18843 &text,
18844 &mut sa_scalar,
18845 n as SaSint,
18846 k as SaSint,
18847 &mut buckets_scalar,
18848 0,
18849 n as FastSint,
18850 );
18851 assert_eq!(m_threaded, m_scalar);
18852 assert_eq!(
18853 &sa_threaded[n - m_threaded as usize..],
18854 &sa_scalar[n - m_scalar as usize..]
18855 );
18856 assert_eq!(buckets_threaded, buckets_scalar);
18857
18858 let mut sa_threaded = vec![-99; n];
18859 let mut sa_scalar = vec![-99; n];
18860 let mut buckets_threaded = vec![0; 2 * k];
18861 let mut buckets_scalar = vec![0; 2 * k];
18862 let m_threaded = count_and_gather_lms_suffixes_32s_2k_fs_omp(
18863 &text,
18864 &mut sa_threaded,
18865 n as SaSint,
18866 k as SaSint,
18867 &mut buckets_threaded,
18868 0,
18869 4,
18870 &mut thread_state,
18871 );
18872 let m_scalar = count_and_gather_lms_suffixes_32s_2k(
18873 &text,
18874 &mut sa_scalar,
18875 n as SaSint,
18876 k as SaSint,
18877 &mut buckets_scalar,
18878 0,
18879 n as FastSint,
18880 );
18881 assert_eq!(m_threaded, m_scalar);
18882 assert_eq!(
18883 &sa_threaded[n - m_threaded as usize..],
18884 &sa_scalar[n - m_scalar as usize..]
18885 );
18886 assert_eq!(buckets_threaded, buckets_scalar);
18887 }
18888
18889 #[test]
18890 fn libsais64_count_and_gather_compacted_lms_suffixes_32s_nofs_omp_uses_large_input_path() {
18891 let n = 65_600usize;
18892 let k = 257usize;
18893 let text: Vec<SaSint> = (0..n)
18894 .map(|i| {
18895 let value = 1 + ((i * 37 + i / 17) % (k - 1)) as SaSint;
18896 if i % 19 == 0 {
18897 value | SAINT_MIN
18898 } else {
18899 value
18900 }
18901 })
18902 .collect();
18903
18904 let mut sa_threaded = vec![-99; n];
18905 let mut sa_split = vec![-99; n];
18906 let mut buckets_threaded = vec![0; 2 * k];
18907 let mut buckets_split = vec![0; 2 * k];
18908 let m_threaded = count_and_gather_compacted_lms_suffixes_32s_2k_nofs_omp(
18909 &text,
18910 &mut sa_threaded,
18911 n as SaSint,
18912 k as SaSint,
18913 &mut buckets_threaded,
18914 4,
18915 );
18916 count_compacted_lms_suffixes_32s_2k(&text, n as SaSint, k as SaSint, &mut buckets_split);
18917 let m_split = gather_compacted_lms_suffixes_32s(&text, &mut sa_split, n as SaSint);
18918
18919 assert_eq!(m_threaded, m_split);
18920 assert_eq!(
18921 &sa_threaded[n - m_threaded as usize..],
18922 &sa_split[n - m_split as usize..]
18923 );
18924 assert_eq!(buckets_threaded, buckets_split);
18925 }
18926
18927 #[test]
18928 fn libsais64_count_and_gather_compacted_lms_suffixes_32s_fs_omp_uses_large_input_path() {
18929 let n = 65_600usize;
18930 let k = 257usize;
18931 let text: Vec<SaSint> = (0..n)
18932 .map(|i| {
18933 let value = 1 + ((i * 37 + i / 17) % (k - 1)) as SaSint;
18934 if i % 19 == 0 {
18935 value | SAINT_MIN
18936 } else {
18937 value
18938 }
18939 })
18940 .collect();
18941
18942 let mut sa_threaded = vec![-99; 2 * n];
18943 let mut sa_scalar = vec![-99; n];
18944 let mut buckets_threaded = vec![0; 2 * k];
18945 let mut buckets_scalar = vec![0; 2 * k];
18946 let mut thread_state = alloc_thread_state(4).unwrap();
18947 count_and_gather_compacted_lms_suffixes_32s_2k_fs_omp(
18948 &text,
18949 &mut sa_threaded,
18950 n as SaSint,
18951 k as SaSint,
18952 &mut buckets_threaded,
18953 0,
18954 4,
18955 &mut thread_state,
18956 );
18957 let m_scalar = count_and_gather_compacted_lms_suffixes_32s_2k(
18958 &text,
18959 &mut sa_scalar,
18960 n as SaSint,
18961 k as SaSint,
18962 &mut buckets_scalar,
18963 0,
18964 n as FastSint,
18965 );
18966
18967 assert_eq!(
18968 &sa_threaded[n - m_scalar as usize..n],
18969 &sa_scalar[n - m_scalar as usize..]
18970 );
18971 assert_eq!(buckets_threaded, buckets_scalar);
18972 }
18973
18974 #[test]
18975 fn libsais64_plcp_lcp_omp_wrappers_match_single_thread_on_large_inputs() {
18976 let n = 65_600usize;
18977 let text: Vec<u8> = (0..n).map(|i| (1 + (i % 251)) as u8).collect();
18978 let sa: Vec<SaSint> = (0..n as SaSint).collect();
18979
18980 let mut plcp_single = vec![0; n];
18981 let mut plcp_threaded = vec![0; n];
18982 compute_phi_omp(&sa, &mut plcp_single, n as SaSint, 1);
18983 compute_phi_omp(&sa, &mut plcp_threaded, n as SaSint, 4);
18984 assert_eq!(plcp_threaded, plcp_single);
18985
18986 compute_plcp_omp(&text, &mut plcp_single, n as SaSint, 1);
18987 compute_plcp_omp(&text, &mut plcp_threaded, n as SaSint, 4);
18988 assert_eq!(plcp_threaded, plcp_single);
18989
18990 let mut lcp_single = vec![0; n];
18991 let mut lcp_threaded = vec![0; n];
18992 compute_lcp_omp(&plcp_single, &sa, &mut lcp_single, n as SaSint, 1);
18993 compute_lcp_omp(&plcp_threaded, &sa, &mut lcp_threaded, n as SaSint, 4);
18994 assert_eq!(lcp_threaded, lcp_single);
18995 }
18996
18997 fn assert_libsais64_matches_c(text: &[u8]) {
18998 let mut rust_sa = vec![0; text.len()];
18999 let mut c_sa = vec![0; text.len()];
19000
19001 let rust_rc = libsais64(text, &mut rust_sa, 0, None);
19002 let c_rc = unsafe {
19003 probe_public_libsais64(text.as_ptr(), c_sa.as_mut_ptr(), text.len() as SaSint, 0)
19004 };
19005
19006 assert_eq!(rust_rc, c_rc);
19007 assert_eq!(rust_sa, c_sa);
19008 }
19009
19010 fn assert_libsais64_gsa_matches_c(text: &[u8]) {
19011 let mut rust_sa = vec![0; text.len()];
19012 let mut c_sa = vec![0; text.len()];
19013
19014 let rust_rc = libsais64_gsa(text, &mut rust_sa, 0, None);
19015 let c_rc = unsafe {
19016 probe_public_libsais64_gsa(text.as_ptr(), c_sa.as_mut_ptr(), text.len() as SaSint, 0)
19017 };
19018
19019 assert_eq!(rust_rc, c_rc);
19020 assert_eq!(rust_sa, c_sa);
19021 }
19022
19023 fn assert_libsais64_long_matches_c(text: &[SaSint], k: SaSint) {
19024 let mut rust_t = text.to_vec();
19025 let mut c_t = text.to_vec();
19026 let mut rust_sa = vec![0; text.len()];
19027 let mut c_sa = vec![0; text.len()];
19028
19029 let rust_rc = libsais64_long(&mut rust_t, &mut rust_sa, k, 0);
19030 let c_rc = unsafe {
19031 probe_public_libsais64_long(
19032 c_t.as_mut_ptr(),
19033 c_sa.as_mut_ptr(),
19034 c_t.len() as SaSint,
19035 k,
19036 0,
19037 )
19038 };
19039
19040 assert_eq!(rust_rc, c_rc);
19041 assert_eq!(rust_t, c_t);
19042 assert_eq!(rust_sa, c_sa);
19043 }
19044
19045 fn assert_libsais64_bwt_matches_c(text: &[u8]) {
19046 let mut rust_u = vec![0; text.len()];
19047 let mut rust_a = vec![0; text.len()];
19048 let mut c_u = vec![0; text.len()];
19049 let mut c_a = vec![0; text.len()];
19050
19051 let rust_rc = libsais64_bwt(text, &mut rust_u, &mut rust_a, 0, None);
19052 let c_rc = unsafe {
19053 probe_public_libsais64_bwt(
19054 text.as_ptr(),
19055 c_u.as_mut_ptr(),
19056 c_a.as_mut_ptr(),
19057 text.len() as SaSint,
19058 0,
19059 )
19060 };
19061
19062 assert_eq!(rust_rc, c_rc);
19063 assert_eq!(rust_u, c_u);
19064 }
19065
19066 fn assert_libsais64_bwt_aux_matches_c(text: &[u8], r: SaSint) {
19067 let aux_len = if text.is_empty() {
19068 0
19069 } else {
19070 (text.len() - 1) / r as usize + 1
19071 };
19072 let mut rust_u = vec![0; text.len()];
19073 let mut rust_a = vec![0; text.len()];
19074 let mut rust_i = vec![0; aux_len];
19075 let mut c_u = vec![0; text.len()];
19076 let mut c_a = vec![0; text.len()];
19077 let mut c_i = vec![0; aux_len];
19078
19079 let rust_rc = libsais64_bwt_aux(text, &mut rust_u, &mut rust_a, 0, None, r, &mut rust_i);
19080 let c_rc = unsafe {
19081 probe_public_libsais64_bwt_aux(
19082 text.as_ptr(),
19083 c_u.as_mut_ptr(),
19084 c_a.as_mut_ptr(),
19085 text.len() as SaSint,
19086 0,
19087 r,
19088 c_i.as_mut_ptr(),
19089 )
19090 };
19091
19092 assert_eq!(rust_rc, c_rc);
19093 assert_eq!(rust_u, c_u);
19094 assert_eq!(rust_i, c_i);
19095 }
19096
19097 fn assert_libsais64_freq_outputs_match_c(text: &[u8], gsa_text: &[u8]) {
19098 let mut rust_sa = vec![0; text.len()];
19099 let mut c_sa = vec![0; text.len()];
19100 let mut rust_freq = vec![-1; ALPHABET_SIZE];
19101 let mut c_freq = vec![-1; ALPHABET_SIZE];
19102
19103 let rust_rc = libsais64(text, &mut rust_sa, 0, Some(&mut rust_freq));
19104 let c_rc = unsafe {
19105 probe_public_libsais64_freq(
19106 text.as_ptr(),
19107 c_sa.as_mut_ptr(),
19108 text.len() as SaSint,
19109 0,
19110 c_freq.as_mut_ptr(),
19111 )
19112 };
19113 assert_eq!(rust_rc, c_rc);
19114 assert_eq!(rust_sa, c_sa);
19115 assert_eq!(rust_freq, c_freq);
19116
19117 let mut rust_gsa = vec![0; gsa_text.len()];
19118 let mut c_gsa = vec![0; gsa_text.len()];
19119 rust_freq.fill(-1);
19120 c_freq.fill(-1);
19121 let rust_rc = libsais64_gsa(gsa_text, &mut rust_gsa, 0, Some(&mut rust_freq));
19122 let c_rc = unsafe {
19123 probe_public_libsais64_gsa_freq(
19124 gsa_text.as_ptr(),
19125 c_gsa.as_mut_ptr(),
19126 gsa_text.len() as SaSint,
19127 0,
19128 c_freq.as_mut_ptr(),
19129 )
19130 };
19131 assert_eq!(rust_rc, c_rc);
19132 assert_eq!(rust_gsa, c_gsa);
19133 assert_eq!(rust_freq, c_freq);
19134
19135 let mut rust_u = vec![0; text.len()];
19136 let mut rust_a = vec![0; text.len()];
19137 let mut c_u = vec![0; text.len()];
19138 let mut c_a = vec![0; text.len()];
19139 rust_freq.fill(-1);
19140 c_freq.fill(-1);
19141 let rust_rc = libsais64_bwt(text, &mut rust_u, &mut rust_a, 0, Some(&mut rust_freq));
19142 let c_rc = unsafe {
19143 probe_public_libsais64_bwt_freq(
19144 text.as_ptr(),
19145 c_u.as_mut_ptr(),
19146 c_a.as_mut_ptr(),
19147 text.len() as SaSint,
19148 0,
19149 c_freq.as_mut_ptr(),
19150 )
19151 };
19152 assert_eq!(rust_rc, c_rc);
19153 assert_eq!(rust_u, c_u);
19154 assert_eq!(rust_freq, c_freq);
19155
19156 let r = 4;
19157 let aux_len = (text.len() - 1) / r as usize + 1;
19158 let mut rust_i = vec![0; aux_len];
19159 let mut c_i = vec![0; aux_len];
19160 rust_freq.fill(-1);
19161 c_freq.fill(-1);
19162 let rust_rc = libsais64_bwt_aux(
19163 text,
19164 &mut rust_u,
19165 &mut rust_a,
19166 0,
19167 Some(&mut rust_freq),
19168 r,
19169 &mut rust_i,
19170 );
19171 let c_rc = unsafe {
19172 probe_public_libsais64_bwt_aux_freq(
19173 text.as_ptr(),
19174 c_u.as_mut_ptr(),
19175 c_a.as_mut_ptr(),
19176 text.len() as SaSint,
19177 0,
19178 c_freq.as_mut_ptr(),
19179 r,
19180 c_i.as_mut_ptr(),
19181 )
19182 };
19183 assert_eq!(rust_rc, c_rc);
19184 assert_eq!(rust_u, c_u);
19185 assert_eq!(rust_i, c_i);
19186 assert_eq!(rust_freq, c_freq);
19187 }
19188
19189 fn assert_libsais64_unbwt_matches_c(text: &[u8]) {
19190 let mut bwt = vec![0; text.len()];
19191 let mut work = vec![0; text.len()];
19192 let primary = libsais64_bwt(text, &mut bwt, &mut work, 0, None);
19193 assert!(primary >= 0);
19194
19195 let mut rust_u = vec![0; text.len()];
19196 let mut rust_a = vec![0; text.len() + 1];
19197 let mut c_u = vec![0; text.len()];
19198 let mut c_a = vec![0; text.len() + 1];
19199
19200 let rust_rc = libsais64_unbwt(&bwt, &mut rust_u, &mut rust_a, None, primary);
19201 let c_rc = unsafe {
19202 probe_public_libsais64_unbwt(
19203 bwt.as_ptr(),
19204 c_u.as_mut_ptr(),
19205 c_a.as_mut_ptr(),
19206 bwt.len() as SaSint,
19207 primary,
19208 )
19209 };
19210
19211 assert_eq!(rust_rc, c_rc);
19212 assert_eq!(rust_u, c_u);
19213 assert_eq!(rust_u, text);
19214 }
19215
19216 fn assert_libsais64_unbwt_aux_matches_c(text: &[u8], r: SaSint) {
19217 let mut bwt = vec![0; text.len()];
19218 let mut work = vec![0; text.len()];
19219 let mut aux = vec![0; (text.len() - 1) / r as usize + 1];
19220 let bwt_rc = libsais64_bwt_aux(text, &mut bwt, &mut work, 0, None, r, &mut aux);
19221 assert_eq!(bwt_rc, 0);
19222
19223 let mut rust_u = vec![0; text.len()];
19224 let mut rust_a = vec![0; text.len() + 1];
19225 let mut c_u = vec![0; text.len()];
19226 let mut c_a = vec![0; text.len() + 1];
19227
19228 let rust_rc = libsais64_unbwt_aux(&bwt, &mut rust_u, &mut rust_a, None, r, &aux);
19229 let c_rc = unsafe {
19230 probe_public_libsais64_unbwt_aux(
19231 bwt.as_ptr(),
19232 c_u.as_mut_ptr(),
19233 c_a.as_mut_ptr(),
19234 bwt.len() as SaSint,
19235 r,
19236 aux.as_ptr(),
19237 )
19238 };
19239
19240 assert_eq!(rust_rc, c_rc);
19241 assert_eq!(rust_u, c_u);
19242 assert_eq!(rust_u, text);
19243 }
19244
19245 fn assert_libsais64_unbwt_freq_matches_c(text: &[u8]) {
19246 let mut freq = vec![0; ALPHABET_SIZE];
19247 let mut bwt = vec![0; text.len()];
19248 let mut work = vec![0; text.len()];
19249 let primary = libsais64_bwt(text, &mut bwt, &mut work, 0, Some(&mut freq));
19250 assert!(primary >= 0);
19251
19252 let mut rust_u = vec![0; text.len()];
19253 let mut rust_a = vec![0; text.len() + 1];
19254 let mut c_u = vec![0; text.len()];
19255 let mut c_a = vec![0; text.len() + 1];
19256
19257 let rust_rc = libsais64_unbwt(&bwt, &mut rust_u, &mut rust_a, Some(&freq), primary);
19258 let c_rc = unsafe {
19259 probe_public_libsais64_unbwt_freq(
19260 bwt.as_ptr(),
19261 c_u.as_mut_ptr(),
19262 c_a.as_mut_ptr(),
19263 bwt.len() as SaSint,
19264 freq.as_ptr(),
19265 primary,
19266 )
19267 };
19268 assert_eq!(rust_rc, c_rc);
19269 assert_eq!(rust_u, c_u);
19270 assert_eq!(rust_u, text);
19271
19272 let r = 4;
19273 let mut aux = vec![0; (text.len() - 1) / r as usize + 1];
19274 let bwt_rc = libsais64_bwt_aux(text, &mut bwt, &mut work, 0, Some(&mut freq), r, &mut aux);
19275 assert_eq!(bwt_rc, 0);
19276
19277 rust_u.fill(0);
19278 rust_a.fill(0);
19279 c_u.fill(0);
19280 c_a.fill(0);
19281 let rust_rc = libsais64_unbwt_aux(&bwt, &mut rust_u, &mut rust_a, Some(&freq), r, &aux);
19282 let c_rc = unsafe {
19283 probe_public_libsais64_unbwt_aux_freq(
19284 bwt.as_ptr(),
19285 c_u.as_mut_ptr(),
19286 c_a.as_mut_ptr(),
19287 bwt.len() as SaSint,
19288 freq.as_ptr(),
19289 r,
19290 aux.as_ptr(),
19291 )
19292 };
19293 assert_eq!(rust_rc, c_rc);
19294 assert_eq!(rust_u, c_u);
19295 assert_eq!(rust_u, text);
19296 }
19297
19298 fn assert_libsais64_plcp_lcp_matches_c(text: &[u8]) {
19299 let mut sa = vec![0; text.len()];
19300 let sa_rc = libsais64(text, &mut sa, 0, None);
19301 assert_eq!(sa_rc, 0);
19302
19303 let mut rust_plcp = vec![0; text.len()];
19304 let mut c_plcp = vec![0; text.len()];
19305 let rust_plcp_rc = libsais64_plcp(text, &sa, &mut rust_plcp);
19306 let c_plcp_rc = unsafe {
19307 probe_public_libsais64_plcp(
19308 text.as_ptr(),
19309 sa.as_ptr(),
19310 c_plcp.as_mut_ptr(),
19311 text.len() as SaSint,
19312 )
19313 };
19314 assert_eq!(rust_plcp_rc, c_plcp_rc);
19315 assert_eq!(rust_plcp, c_plcp);
19316
19317 let mut rust_lcp = vec![0; text.len()];
19318 let mut c_lcp = vec![0; text.len()];
19319 let rust_lcp_rc = libsais64_lcp(&rust_plcp, &sa, &mut rust_lcp);
19320 let c_lcp_rc = unsafe {
19321 probe_public_libsais64_lcp(
19322 c_plcp.as_ptr(),
19323 sa.as_ptr(),
19324 c_lcp.as_mut_ptr(),
19325 text.len() as SaSint,
19326 )
19327 };
19328 assert_eq!(rust_lcp_rc, c_lcp_rc);
19329 assert_eq!(rust_lcp, c_lcp);
19330 }
19331
19332 fn assert_libsais64_plcp_gsa_matches_c(text: &[u8]) {
19333 let mut sa = vec![0; text.len()];
19334 assert_eq!(libsais64_gsa(text, &mut sa, 0, None), 0);
19335
19336 let mut rust_plcp = vec![0; text.len()];
19337 let mut c_plcp = vec![0; text.len()];
19338 let rust_rc = libsais64_plcp_gsa(text, &sa, &mut rust_plcp);
19339 let c_rc = unsafe {
19340 probe_public_libsais64_plcp_gsa(
19341 text.as_ptr(),
19342 sa.as_ptr(),
19343 c_plcp.as_mut_ptr(),
19344 text.len() as SaSint,
19345 )
19346 };
19347
19348 assert_eq!(rust_rc, c_rc);
19349 assert_eq!(rust_plcp, c_plcp);
19350 }
19351
19352 fn assert_libsais64_bwt_aux_round_trips(text: &[u8], r: SaSint) {
19353 let mut bwt = vec![0; text.len()];
19354 let mut work = vec![0; text.len()];
19355 let mut restored = vec![0; text.len()];
19356 let mut aux = vec![0; (text.len() - 1) / r as usize + 1];
19357
19358 let bwt_rc = libsais64_bwt_aux(text, &mut bwt, &mut work, 0, None, r, &mut aux);
19359 assert_eq!(bwt_rc, 0);
19360
19361 let unbwt_rc = libsais64_unbwt_aux(&bwt, &mut restored, &mut work, None, r, &aux);
19362 assert_eq!(unbwt_rc, 0);
19363 assert_eq!(restored, text);
19364 }
19365
19366 #[test]
19367 fn public_libsais64_matches_upstream_c() {
19368 for text in [
19369 b"".as_slice(),
19370 b"a",
19371 b"banana",
19372 b"mississippi",
19373 b"abracadabra",
19374 b"AAAAAAAAAAAAAAAA",
19375 b"zyxwvutsrqponmlk",
19376 ] {
19377 assert_libsais64_matches_c(text);
19378 }
19379 }
19380
19381 #[test]
19382 fn public_libsais64_bwt_matches_upstream_c() {
19383 for text in [
19384 b"".as_slice(),
19385 b"a",
19386 b"banana",
19387 b"mississippi",
19388 b"abracadabra",
19389 b"AAAAAAAAAAAAAAAA",
19390 b"zyxwvutsrqponmlk",
19391 ] {
19392 assert_libsais64_bwt_matches_c(text);
19393 }
19394 }
19395
19396 #[test]
19397 fn public_libsais64_gsa_matches_upstream_c() {
19398 for text in [
19399 b"\0".as_slice(),
19400 b"banana\0",
19401 b"ban\0ana\0",
19402 b"miss\0issippi\0",
19403 b"a\0a\0a\0",
19404 ] {
19405 assert_libsais64_gsa_matches_c(text);
19406 }
19407 }
19408
19409 #[test]
19410 fn public_libsais64_long_matches_upstream_c() {
19411 for (text, k) in [
19412 (&[][..], 0),
19413 (&[0][..], 1),
19414 (&[1, 2, 1, 0][..], 3),
19415 (&[2, 1, 2, 1, 0][..], 3),
19416 (&[3, 3, 3, 2, 1, 0][..], 4),
19417 ] {
19418 assert_libsais64_long_matches_c(text, k);
19419 }
19420 }
19421
19422 #[test]
19423 fn public_libsais64_plcp_lcp_matches_upstream_c() {
19424 for text in [
19425 b"".as_slice(),
19426 b"a",
19427 b"banana",
19428 b"mississippi",
19429 b"abracadabra",
19430 b"AAAAAAAAAAAAAAAA",
19431 b"zyxwvutsrqponmlk",
19432 ] {
19433 assert_libsais64_plcp_lcp_matches_c(text);
19434 }
19435 }
19436
19437 #[test]
19438 fn public_libsais64_plcp_gsa_matches_upstream_c() {
19439 for text in [
19440 b"\0".as_slice(),
19441 b"banana\0",
19442 b"ban\0ana\0",
19443 b"miss\0issippi\0",
19444 b"a\0a\0a\0",
19445 ] {
19446 assert_libsais64_plcp_gsa_matches_c(text);
19447 }
19448 }
19449
19450 #[test]
19451 fn libsais64_bwt_and_unbwt_round_trip_small_text() {
19452 let t = b"banana";
19453 let mut bwt = vec![0u8; t.len()];
19454 let mut a = vec![0; t.len()];
19455
19456 let primary = libsais64_bwt(t, &mut bwt, &mut a, 0, None);
19457 assert!(primary > 0);
19458
19459 let mut restored = vec![0u8; t.len()];
19460 let result = libsais64_unbwt(&bwt, &mut restored, &mut a, None, primary);
19461
19462 assert_eq!(result, 0);
19463 assert_eq!(restored, t);
19464 }
19465
19466 #[test]
19467 fn libsais64_bwt_aux_and_unbwt_aux_round_trip_small_text() {
19468 let t = b"mississippi";
19469 let mut bwt = vec![0u8; t.len()];
19470 let mut a = vec![0; t.len()];
19471 let mut samples = vec![0; 4];
19472
19473 let result = libsais64_bwt_aux(t, &mut bwt, &mut a, 0, None, 4, &mut samples);
19474 assert_eq!(result, 0);
19475
19476 let mut restored = vec![0u8; t.len()];
19477 let result = libsais64_unbwt_aux(&bwt, &mut restored, &mut a, None, 4, &samples);
19478
19479 assert_eq!(result, 0);
19480 assert_eq!(restored, t);
19481 }
19482
19483 #[test]
19484 fn libsais64_bwt_aux_and_unbwt_aux_omp_round_trip_small_text() {
19485 let t = b"mississippi";
19486 let mut bwt = vec![0u8; t.len()];
19487 let mut a = vec![0; t.len()];
19488 let mut samples = vec![0; 4];
19489
19490 let result = libsais64_bwt_aux(t, &mut bwt, &mut a, 0, None, 4, &mut samples);
19491 assert_eq!(result, 0);
19492
19493 let mut restored = vec![0u8; t.len()];
19494 let result = libsais64_unbwt_aux_omp(&bwt, &mut restored, &mut a, None, 4, &samples, 2);
19495
19496 assert_eq!(result, 0);
19497 assert_eq!(restored, t);
19498 }
19499
19500 #[test]
19501 fn libsais64_real_world_round_trip_on_upstream_readme() {
19502 let t = include_bytes!("../libsais/README.md");
19503 let mut bwt = vec![0u8; t.len()];
19504 let mut a = vec![0; t.len()];
19505
19506 let primary = libsais64_bwt(t, &mut bwt, &mut a, 0, None);
19507 assert!(primary > 0);
19508
19509 let mut restored = vec![0u8; t.len()];
19510 let result = libsais64_unbwt(&bwt, &mut restored, &mut a, None, primary);
19511
19512 assert_eq!(result, 0);
19513 assert_eq!(restored, t);
19514 }
19515
19516 #[test]
19517 fn libsais64_real_world_aux_omp_round_trip_on_upstream_c_source() {
19518 let t = include_bytes!("../libsais/src/libsais.c");
19519 let mut bwt = vec![0u8; t.len()];
19520 let mut a = vec![0; t.len()];
19521 let r = 128;
19522 let mut samples = vec![0; (t.len() - 1) / usize::try_from(r).expect("fits") + 1];
19523
19524 let result = libsais64_bwt_aux(t, &mut bwt, &mut a, 0, None, r, &mut samples);
19525 assert_eq!(result, 0);
19526
19527 let mut restored = vec![0u8; t.len()];
19528 let result = libsais64_unbwt_aux_omp(&bwt, &mut restored, &mut a, None, r, &samples, 2);
19529
19530 assert_eq!(result, 0);
19531 assert_eq!(restored, t);
19532 }
19533
19534 #[test]
19535 fn libsais64_bwt_aux_rejects_undersized_sampling_array() {
19536 let t = b"upstream source text";
19537 let mut bwt = vec![0u8; t.len()];
19538 let mut a = vec![0; t.len()];
19539 let mut samples = vec![0; 1];
19540
19541 let result = libsais64_bwt_aux(t, &mut bwt, &mut a, 0, None, 2, &mut samples);
19542
19543 assert_eq!(result, -1);
19544
19545 let result = libsais64_bwt_aux(t, &mut bwt, &mut a, 0, None, 0, &mut samples);
19546
19547 assert_eq!(result, -1);
19548 }
19549
19550 #[test]
19551 fn libsais64_bwt_aux_omp_rejects_invalid_sampling_rate_without_panicking() {
19552 let t = b"upstream source text";
19553 let mut bwt = vec![0u8; t.len()];
19554 let mut a = vec![0; t.len()];
19555 let mut samples = vec![0; 4];
19556
19557 let result = libsais64_bwt_aux_omp(t, &mut bwt, &mut a, 0, None, 0, &mut samples, 2);
19558
19559 assert_eq!(result, -1);
19560 }
19561
19562 #[test]
19563 fn public_libsais64_empty_and_singleton_inputs_follow_public_contract() {
19564 let mut empty_sa = Vec::new();
19565 let mut empty_freq = vec![-1; ALPHABET_SIZE];
19566 assert_eq!(libsais64(b"", &mut empty_sa, 0, Some(&mut empty_freq)), 0);
19567 assert!(empty_freq.iter().all(|&value| value == 0));
19568
19569 empty_freq.fill(-1);
19570 assert_eq!(
19571 libsais64_omp(b"", &mut empty_sa, 0, Some(&mut empty_freq), 2),
19572 0
19573 );
19574 assert!(empty_freq.iter().all(|&value| value == 0));
19575
19576 empty_freq.fill(-1);
19577 assert_eq!(
19578 libsais64_gsa(b"", &mut empty_sa, 0, Some(&mut empty_freq)),
19579 0
19580 );
19581 assert!(empty_freq.iter().all(|&value| value == 0));
19582
19583 let mut empty_bwt = Vec::new();
19584 let mut empty_work = Vec::new();
19585 empty_freq.fill(-1);
19586 assert_eq!(
19587 libsais64_bwt(
19588 b"",
19589 &mut empty_bwt,
19590 &mut empty_work,
19591 0,
19592 Some(&mut empty_freq)
19593 ),
19594 0
19595 );
19596 assert!(empty_freq.iter().all(|&value| value == 0));
19597
19598 let mut empty_aux = vec![-1];
19599 empty_freq.fill(-1);
19600 assert_eq!(
19601 libsais64_bwt_aux(
19602 b"",
19603 &mut empty_bwt,
19604 &mut empty_work,
19605 0,
19606 Some(&mut empty_freq),
19607 2,
19608 &mut empty_aux
19609 ),
19610 0
19611 );
19612 assert_eq!(empty_aux[0], 0);
19613 assert!(empty_freq.iter().all(|&value| value == 0));
19614
19615 let text = b"z";
19616 let mut sa = vec![-1; 1];
19617 let mut freq = vec![-1; ALPHABET_SIZE];
19618 assert_eq!(libsais64(text, &mut sa, 0, Some(&mut freq)), 0);
19619 assert_eq!(sa, vec![0]);
19620 assert_eq!(freq[b'z' as usize], 1);
19621 assert_eq!(freq.iter().sum::<SaSint>(), 1);
19622
19623 sa.fill(-1);
19624 freq.fill(-1);
19625 let mut ctx = create_ctx().expect("context");
19626 assert_eq!(
19627 libsais64_ctx(&mut ctx, text, &mut sa, 0, Some(&mut freq)),
19628 0
19629 );
19630 assert_eq!(sa, vec![0]);
19631 assert_eq!(freq[b'z' as usize], 1);
19632 assert_eq!(freq.iter().sum::<SaSint>(), 1);
19633
19634 sa.fill(-1);
19635 freq.fill(-1);
19636 assert_eq!(libsais64_omp(text, &mut sa, 0, Some(&mut freq), 2), 0);
19637 assert_eq!(sa, vec![0]);
19638 assert_eq!(freq[b'z' as usize], 1);
19639 assert_eq!(freq.iter().sum::<SaSint>(), 1);
19640
19641 let mut gsa_sa = vec![-1; 1];
19642 let mut gsa_freq = vec![-1; ALPHABET_SIZE];
19643 assert_eq!(libsais64_gsa(b"\0", &mut gsa_sa, 0, Some(&mut gsa_freq)), 0);
19644 assert_eq!(gsa_sa, vec![0]);
19645 assert_eq!(gsa_freq[0], 1);
19646 assert_eq!(gsa_freq.iter().sum::<SaSint>(), 1);
19647
19648 let mut bwt = vec![0; 1];
19649 let mut work = vec![0; 1];
19650 freq.fill(-1);
19651 assert_eq!(
19652 libsais64_bwt(text, &mut bwt, &mut work, 0, Some(&mut freq)),
19653 1
19654 );
19655 assert_eq!(bwt, text);
19656 assert_eq!(freq[b'z' as usize], 1);
19657 assert_eq!(freq.iter().sum::<SaSint>(), 1);
19658
19659 let mut aux = vec![-1];
19660 bwt.fill(0);
19661 work.fill(0);
19662 freq.fill(-1);
19663 assert_eq!(
19664 libsais64_bwt_aux(text, &mut bwt, &mut work, 0, Some(&mut freq), 2, &mut aux),
19665 0
19666 );
19667 assert_eq!(bwt, text);
19668 assert_eq!(aux[0], 1);
19669 assert_eq!(freq[b'z' as usize], 1);
19670 assert_eq!(freq.iter().sum::<SaSint>(), 1);
19671 }
19672
19673 #[test]
19674 fn public_libsais64_rejects_invalid_aux_sampling_without_panicking() {
19675 let text = b"banana";
19676 let mut u = vec![0; text.len()];
19677 let mut a = vec![0; text.len() + 1];
19678 let mut aux = vec![0; 2];
19679
19680 assert_eq!(
19681 libsais64_bwt_aux(text, &mut u, &mut a, 0, None, 0, &mut aux),
19682 -1
19683 );
19684 assert_eq!(
19685 libsais64_bwt_aux(text, &mut u, &mut a, 0, None, 3, &mut aux),
19686 -1
19687 );
19688 assert_eq!(libsais64_unbwt_aux(text, &mut u, &mut a, None, 0, &aux), -1);
19689 assert_eq!(
19690 libsais64_unbwt_aux_omp(text, &mut u, &mut a, None, 0, &aux, 1),
19691 -1
19692 );
19693 }
19694
19695 #[test]
19696 fn libsais64_unbwt_aux_rejects_invalid_sampling_range() {
19697 let t = b"abc";
19698 let mut u = vec![0u8; t.len()];
19699 let mut a = vec![0; t.len()];
19700
19701 let result = libsais64_unbwt_aux(t, &mut u, &mut a, None, 2, &[0, 4]);
19702
19703 assert_eq!(result, -1);
19704
19705 assert_eq!(libsais64_unbwt_aux(t, &mut u, &mut a, None, 0, &[1]), -1);
19706
19707 let mut ctx = unbwt_create_ctx().expect("context");
19708 assert_eq!(
19709 libsais64_unbwt_aux_ctx(&mut ctx, t, &mut u, &mut a, None, 0, &[1]),
19710 -1
19711 );
19712 assert_eq!(
19713 libsais64_unbwt_aux_omp(t, &mut u, &mut a, None, 0, &[1], 2),
19714 -1
19715 );
19716 }
19717
19718 #[test]
19719 fn public_libsais64_omp_rejects_undersized_suffix_arrays() {
19720 let text = b"banana";
19721 let mut short_sa = vec![0; text.len() - 1];
19722 let mut int_text = vec![1, 2, 1, 0];
19723 let mut short_int_sa = vec![0; int_text.len() - 1];
19724
19725 assert_eq!(libsais64_omp(text, &mut short_sa, 0, None, 1), -1);
19726 assert_eq!(
19727 libsais64_gsa_omp(b"banana\0", &mut short_sa, 0, None, 1),
19728 -1
19729 );
19730 assert_eq!(
19731 libsais64_int_omp(&mut int_text, &mut short_int_sa, 3, 0, 1),
19732 -1
19733 );
19734 }
19735
19736 #[test]
19737 #[ignore = "large real-data regression; requires local yeast FASTA fixture"]
19738 fn public_libsais64_omp_handles_minibwa_yeast_two_strand_index_input() {
19739 let path = "/data/henriksson/github/claude/star/.tmp/yeast_conformance/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa";
19740 let Ok(fasta) = std::fs::read_to_string(path) else {
19741 eprintln!("skipping missing fixture: {path}");
19742 return;
19743 };
19744 let mut forward = Vec::new();
19745 for line in fasta.lines() {
19746 if line.starts_with('>') {
19747 continue;
19748 }
19749 forward.extend(line.as_bytes().iter().filter_map(|&c| match c {
19750 b'A' | b'a' => Some(0),
19751 b'C' | b'c' => Some(1),
19752 b'G' | b'g' => Some(2),
19753 b'T' | b't' => Some(3),
19754 _ => None,
19755 }));
19756 }
19757 assert!(
19758 forward.len() > 12_000_000,
19759 "fixture should exercise the large-input 64-bit path"
19760 );
19761
19762 let mut text = Vec::with_capacity(forward.len() * 2);
19763 text.extend_from_slice(&forward);
19764 text.extend(forward.iter().rev().map(|&c| 3 - c));
19765
19766 const FS: SaSint = 10_000;
19767 let mut sa = vec![0; text.len() + FS as usize];
19768 assert_eq!(libsais64_omp(&text, &mut sa, FS, None, 4), 0);
19769 }
19770
19771 #[test]
19772 #[ignore = "large real-data regression; requires local minibwa yeast fixture"]
19773 fn public_libsais64_omp_matches_plain_on_minibwa_yeast_two_strand_index_input() {
19774 let l2b_path =
19775 "/data/henriksson/github/claude/minibwa/.tmp/compare-yeast-now/ref.split.rust.l2b";
19776 let fasta_path =
19777 "/data/henriksson/github/claude/minibwa/.tmp/large-real/yeast/ref.sanitized.fa";
19778 let forward = if let Ok(bytes) = std::fs::read(l2b_path) {
19779 assert!(bytes.len() >= 64, "short l2b fixture: {l2b_path}");
19780 assert_eq!(&bytes[..4], b"L2B\x01", "bad l2b magic in {l2b_path}");
19781 let n_ctg = u64::from_le_bytes(bytes[8..16].try_into().unwrap()) as usize;
19782 let tot_len = u64::from_le_bytes(bytes[16..24].try_into().unwrap()) as usize;
19783 let n_ambi = u64::from_le_bytes(bytes[24..32].try_into().unwrap()) as usize;
19784 let n_mask = u64::from_le_bytes(bytes[32..40].try_into().unwrap()) as usize;
19785 let n_pac = u64::from_le_bytes(bytes[56..64].try_into().unwrap()) as usize;
19786 let pac_start = 64 + 8 * n_ctg + 16 * n_ambi + 16 * n_mask;
19787 assert!(
19788 bytes.len() >= pac_start + 8 * n_pac,
19789 "truncated l2b pac in {l2b_path}"
19790 );
19791 let mut pac = Vec::with_capacity(n_pac);
19792 for chunk in bytes[pac_start..pac_start + 8 * n_pac].chunks_exact(8) {
19793 pac.push(u64::from_le_bytes(chunk.try_into().unwrap()));
19794 }
19795 (0..tot_len)
19796 .map(|i| ((pac[i >> 5] >> ((i & 31) << 1)) & 3) as u8)
19797 .collect::<Vec<_>>()
19798 } else if let Ok(fasta) = std::fs::read_to_string(fasta_path) {
19799 let mut rng = 11u64;
19800 let mut forward = Vec::new();
19801 for line in fasta.lines() {
19802 if line.starts_with('>') {
19803 continue;
19804 }
19805 forward.extend(line.bytes().map(|b| {
19806 let mut c = match b {
19807 b'A' | b'a' => 0,
19808 b'C' | b'c' => 1,
19809 b'G' | b'g' => 2,
19810 b'T' | b't' | b'U' | b'u' => 3,
19811 _ => {
19812 rng = rng.wrapping_add(0x9e3779b97f4a7c15);
19813 let mut z = rng;
19814 z = (z ^ (z >> 30)).wrapping_mul(0xbf58476d1ce4e5b9);
19815 z = (z ^ (z >> 27)).wrapping_mul(0x94d049bb133111eb);
19816 4 | ((z ^ (z >> 31)) & 3) as u8
19817 }
19818 };
19819 if b < b'A' || b > b'Z' {
19820 c |= 1 << 3;
19821 }
19822 c & 3
19823 }));
19824 }
19825 forward
19826 } else {
19827 eprintln!("skipping missing fixtures: {l2b_path} and {fasta_path}");
19828 return;
19829 };
19830 assert!(
19831 forward.len() > 12_000_000,
19832 "fixture should exercise the minibwa yeast index workload"
19833 );
19834
19835 let mut text = Vec::with_capacity(forward.len() * 2);
19836 text.extend_from_slice(&forward);
19837 text.extend(forward.iter().rev().map(|&c| 3 - c));
19838
19839 const FS: SaSint = 10_000;
19840 let mut plain_sa = vec![0; text.len() + FS as usize + 1];
19841 let mut omp_sa = vec![0; text.len() + FS as usize + 1];
19842 assert_eq!(libsais64(&text, &mut plain_sa[1..], FS, None), 0);
19843 assert_eq!(libsais64_omp(&text, &mut omp_sa[1..], FS, None, 4), 0);
19844 plain_sa[0] = text.len() as SaSint;
19845 omp_sa[0] = text.len() as SaSint;
19846 if let Some(i) = plain_sa[..=text.len()]
19847 .iter()
19848 .zip(&omp_sa[..=text.len()])
19849 .position(|(plain, omp)| plain != omp)
19850 {
19851 panic!(
19852 "first suffix-array diff at {i}: plain={} omp={}",
19853 plain_sa[i], omp_sa[i]
19854 );
19855 }
19856 }
19857
19858 #[test]
19859 #[ignore = "large real-data regression; requires local minibwa or STAR yeast FASTA fixture"]
19860 fn direct_libsais64_main_handles_minibwa_yeast_two_strand_index_input() {
19861 let minibwa_path =
19862 "/data/henriksson/github/claude/minibwa/.tmp/large-real/yeast/ref.sanitized.fa";
19863 let star_path = "/data/henriksson/github/claude/star/.tmp/yeast_conformance/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa";
19864 let (path, fasta) = if let Ok(fasta) = std::fs::read_to_string(minibwa_path) {
19865 (minibwa_path, fasta)
19866 } else if let Ok(fasta) = std::fs::read_to_string(star_path) {
19867 (star_path, fasta)
19868 } else {
19869 eprintln!("skipping missing fixtures: {minibwa_path} and {star_path}");
19870 return;
19871 };
19872 let mut forward = Vec::new();
19873 for line in fasta.lines() {
19874 if line.starts_with('>') {
19875 continue;
19876 }
19877 forward.extend(line.as_bytes().iter().filter_map(|&c| match c {
19878 b'A' | b'a' => Some(0),
19879 b'C' | b'c' => Some(1),
19880 b'G' | b'g' => Some(2),
19881 b'T' | b't' => Some(3),
19882 _ => None,
19883 }));
19884 }
19885 assert!(
19886 forward.len() > 12_000_000,
19887 "fixture {path} should exercise the minibwa yeast index workload"
19888 );
19889
19890 let mut text = Vec::with_capacity(forward.len() * 2);
19891 text.extend_from_slice(&forward);
19892 text.extend(forward.iter().rev().map(|&c| 3 - c));
19893
19894 const FS: SaSint = 10_000;
19895 let mut sa = vec![0; text.len() + FS as usize];
19896 assert_eq!(
19897 libsais64_main(&text, &mut sa, LIBSAIS_FLAGS_NONE, 0, None, FS, None, 1),
19898 0
19899 );
19900 }
19901
19902 #[test]
19903 #[ignore = "large real-data regression; requires local yeast FASTA fixture"]
19904 fn public_libsais64_matches_c_on_minibwa_yeast_two_strand_index_input() {
19905 let path = "/data/henriksson/github/claude/star/.tmp/yeast_conformance/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa";
19906 let Ok(fasta) = std::fs::read_to_string(path) else {
19907 eprintln!("skipping missing fixture: {path}");
19908 return;
19909 };
19910 let mut forward = Vec::new();
19911 for line in fasta.lines() {
19912 if line.starts_with('>') {
19913 continue;
19914 }
19915 forward.extend(line.as_bytes().iter().filter_map(|&c| match c {
19916 b'A' | b'a' => Some(0),
19917 b'C' | b'c' => Some(1),
19918 b'G' | b'g' => Some(2),
19919 b'T' | b't' => Some(3),
19920 _ => None,
19921 }));
19922 }
19923 let mut text = Vec::with_capacity(forward.len() * 2);
19924 text.extend_from_slice(&forward);
19925 text.extend(forward.iter().rev().map(|&c| 3 - c));
19926
19927 const FS: SaSint = 10_000;
19928 let mut rust_sa = vec![0; text.len() + FS as usize];
19929 let mut c_sa = vec![0; text.len() + FS as usize];
19930 let rust_rc = libsais64(&text, &mut rust_sa, FS, None);
19931 let c_rc = unsafe {
19932 probe_public_libsais64(text.as_ptr(), c_sa.as_mut_ptr(), text.len() as SaSint, FS)
19933 };
19934 assert_eq!(rust_rc, c_rc);
19935 if let Some(i) = rust_sa[..text.len()]
19936 .iter()
19937 .zip(&c_sa[..text.len()])
19938 .position(|(r, c)| r != c)
19939 {
19940 panic!(
19941 "first suffix-array diff at {i}: rust={} c={}",
19942 rust_sa[i], c_sa[i]
19943 );
19944 }
19945 }
19946
19947 #[test]
19948 #[ignore = "large real-data regression; requires local yeast FASTA fixture"]
19949 fn public_libsais64_omp_matches_c_on_minibwa_yeast_two_strand_index_input() {
19950 let path = "/data/henriksson/github/claude/star/.tmp/yeast_conformance/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa";
19951 let Ok(fasta) = std::fs::read_to_string(path) else {
19952 eprintln!("skipping missing fixture: {path}");
19953 return;
19954 };
19955 let mut forward = Vec::new();
19956 for line in fasta.lines() {
19957 if line.starts_with('>') {
19958 continue;
19959 }
19960 forward.extend(line.as_bytes().iter().filter_map(|&c| match c {
19961 b'A' | b'a' => Some(0),
19962 b'C' | b'c' => Some(1),
19963 b'G' | b'g' => Some(2),
19964 b'T' | b't' => Some(3),
19965 _ => None,
19966 }));
19967 }
19968 let mut text = Vec::with_capacity(forward.len() * 2);
19969 text.extend_from_slice(&forward);
19970 text.extend(forward.iter().rev().map(|&c| 3 - c));
19971
19972 const FS: SaSint = 10_000;
19973 let mut rust_sa = vec![0; text.len() + FS as usize];
19974 let mut c_sa = vec![0; text.len() + FS as usize];
19975 let rust_rc = libsais64_omp(&text, &mut rust_sa, FS, None, 4);
19976 let c_rc = unsafe {
19977 probe_public_libsais64(text.as_ptr(), c_sa.as_mut_ptr(), text.len() as SaSint, FS)
19978 };
19979 assert_eq!(rust_rc, c_rc);
19980 if let Some(i) = rust_sa[..text.len()]
19981 .iter()
19982 .zip(&c_sa[..text.len()])
19983 .position(|(r, c)| r != c)
19984 {
19985 panic!(
19986 "first omp suffix-array diff at {i}: rust={} c={}",
19987 rust_sa[i], c_sa[i]
19988 );
19989 }
19990 }
19991
19992 #[test]
19993 fn public_libsais64_ctx_rejects_invalid_public_arguments() {
19994 let text = b"banana";
19995 let mut ctx = create_ctx().unwrap();
19996 let mut short_sa = vec![0; text.len() - 1];
19997 let mut full_sa = vec![0; text.len()];
19998 let mut short_freq = vec![0; ALPHABET_SIZE - 1];
19999 let mut short_u = vec![0; text.len() - 1];
20000 let mut full_u = vec![0; text.len()];
20001 let mut short_a = vec![0; text.len() - 1];
20002 let mut full_a = vec![0; text.len()];
20003 let mut aux = vec![0; 2];
20004
20005 assert_eq!(libsais64_ctx(&mut ctx, text, &mut short_sa, 0, None), -1);
20006 assert_eq!(
20007 libsais64_ctx(&mut ctx, text, &mut full_sa, 0, Some(&mut short_freq)),
20008 -1
20009 );
20010 assert_eq!(
20011 libsais64_gsa_ctx(&mut ctx, b"banana", &mut full_sa, 0, None),
20012 -1
20013 );
20014 assert_eq!(
20015 libsais64_gsa_ctx(&mut ctx, b"banana\0", &mut short_sa, 0, None),
20016 -1
20017 );
20018 assert_eq!(
20019 libsais64_bwt_ctx(&mut ctx, text, &mut short_u, &mut full_a, 0, None),
20020 -1
20021 );
20022 assert_eq!(
20023 libsais64_bwt_ctx(&mut ctx, text, &mut full_u, &mut short_a, 0, None),
20024 -1
20025 );
20026 assert_eq!(
20027 libsais64_bwt_ctx(
20028 &mut ctx,
20029 text,
20030 &mut full_u,
20031 &mut full_a,
20032 0,
20033 Some(&mut short_freq)
20034 ),
20035 -1
20036 );
20037 assert_eq!(
20038 libsais64_bwt_aux_ctx(
20039 &mut ctx,
20040 text,
20041 &mut full_u,
20042 &mut full_a,
20043 0,
20044 None,
20045 0,
20046 &mut aux
20047 ),
20048 -1
20049 );
20050 assert_eq!(
20051 libsais64_bwt_aux_ctx(
20052 &mut ctx,
20053 text,
20054 &mut full_u,
20055 &mut full_a,
20056 0,
20057 None,
20058 3,
20059 &mut aux
20060 ),
20061 -1
20062 );
20063 assert_eq!(
20064 libsais64_bwt_aux_ctx(
20065 &mut ctx,
20066 text,
20067 &mut full_u,
20068 &mut full_a,
20069 0,
20070 None,
20071 4,
20072 &mut []
20073 ),
20074 -1
20075 );
20076
20077 let mut missing_thread_state_ctx = Context {
20078 buckets: vec![0; 8 * ALPHABET_SIZE],
20079 thread_state: None,
20080 threads: 2,
20081 };
20082 assert_eq!(
20083 libsais64_ctx(&mut missing_thread_state_ctx, text, &mut full_sa, 0, None),
20084 -2
20085 );
20086
20087 let mut zero_thread_ctx = Context {
20088 buckets: vec![0; 8 * ALPHABET_SIZE],
20089 thread_state: None,
20090 threads: 0,
20091 };
20092 assert_eq!(
20093 libsais64_ctx(&mut zero_thread_ctx, text, &mut full_sa, 0, None),
20094 -2
20095 );
20096
20097 let mut short_thread_state_ctx = create_ctx_main(2).expect("context");
20098 short_thread_state_ctx
20099 .thread_state
20100 .as_mut()
20101 .expect("thread state")
20102 .truncate(1);
20103 assert_eq!(
20104 libsais64_ctx(&mut short_thread_state_ctx, text, &mut full_sa, 0, None),
20105 -2
20106 );
20107 }
20108
20109 #[test]
20110 fn public_libsais64_unbwt_ctx_rejects_invalid_public_arguments() {
20111 let text = b"banana";
20112 let mut bwt = vec![0; text.len()];
20113 let mut work = vec![0; text.len()];
20114 let primary = libsais64_bwt(text, &mut bwt, &mut work, 0, None);
20115 let mut ctx = unbwt_create_ctx().unwrap();
20116
20117 let mut short_u = vec![0; text.len() - 1];
20118 let mut full_u = vec![0; text.len()];
20119 let mut short_a = vec![0; text.len() - 1];
20120 let mut full_a = vec![0; text.len()];
20121 let short_freq = vec![0; ALPHABET_SIZE - 1];
20122 let good_aux = vec![primary, 4];
20123
20124 assert_eq!(
20125 libsais64_unbwt_ctx(&mut ctx, &bwt, &mut short_u, &mut full_a, None, primary),
20126 -1
20127 );
20128 assert_eq!(
20129 libsais64_unbwt_ctx(&mut ctx, &bwt, &mut full_u, &mut short_a, None, primary),
20130 -1
20131 );
20132 assert_eq!(
20133 libsais64_unbwt_ctx(
20134 &mut ctx,
20135 &bwt,
20136 &mut full_u,
20137 &mut full_a,
20138 Some(&short_freq),
20139 primary
20140 ),
20141 -1
20142 );
20143 assert_eq!(
20144 libsais64_unbwt_ctx(&mut ctx, &bwt, &mut full_u, &mut full_a, None, 0),
20145 -1
20146 );
20147 assert_eq!(
20148 libsais64_unbwt_aux_ctx(&mut ctx, &bwt, &mut full_u, &mut full_a, None, 3, &good_aux),
20149 -1
20150 );
20151 assert_eq!(
20152 libsais64_unbwt_aux_ctx(
20153 &mut ctx,
20154 &bwt,
20155 &mut full_u,
20156 &mut full_a,
20157 None,
20158 4,
20159 &[primary]
20160 ),
20161 -1
20162 );
20163
20164 let mut malformed_ctx = UnbwtContext {
20165 bucket2: Vec::new(),
20166 fastbits: Vec::new(),
20167 buckets: None,
20168 threads: 1,
20169 };
20170 assert_eq!(
20171 libsais64_unbwt_ctx(
20172 &mut malformed_ctx,
20173 &bwt,
20174 &mut full_u,
20175 &mut full_a,
20176 None,
20177 primary
20178 ),
20179 -2
20180 );
20181
20182 let mut missing_parallel_buckets_ctx = UnbwtContext {
20183 bucket2: vec![0; ALPHABET_SIZE * ALPHABET_SIZE],
20184 fastbits: vec![0; 1 + (1 << UNBWT_FASTBITS)],
20185 buckets: None,
20186 threads: 2,
20187 };
20188 assert_eq!(
20189 libsais64_unbwt_ctx(
20190 &mut missing_parallel_buckets_ctx,
20191 &bwt,
20192 &mut full_u,
20193 &mut full_a,
20194 None,
20195 primary
20196 ),
20197 -2
20198 );
20199 }
20200
20201 #[test]
20202 fn public_libsais64_lcp_helpers_reject_invalid_suffix_entries() {
20203 let text = b"banana";
20204 let mut plcp = vec![0; text.len()];
20205 let mut lcp = vec![0; text.len()];
20206 let int_text = vec![1, 2, 1, 0];
20207 let mut int_plcp = vec![0; int_text.len()];
20208
20209 assert_eq!(libsais64_plcp(text, &[0, 1, -1, 3, 4, 5], &mut plcp), -1);
20210 assert_eq!(libsais64_plcp(text, &[0, 1, 2, 3, 4, 6], &mut plcp), -1);
20211 assert_eq!(libsais64_lcp(&plcp, &[0, 1, -1, 3, 4, 5], &mut lcp), -1);
20212 assert_eq!(libsais64_lcp(&plcp, &[0, 1, 2, 3, 4, 6], &mut lcp), -1);
20213 assert_eq!(
20214 libsais64_plcp_int(&int_text, &[0, 1, -1, 3], &mut int_plcp),
20215 -1
20216 );
20217 assert_eq!(
20218 libsais64_plcp_int_omp(&int_text, &[0, 1, 2, 4], &mut int_plcp, 1),
20219 -1
20220 );
20221 }
20222
20223 #[test]
20224 fn public_libsais64_context_wrappers_match_direct_calls() {
20225 let text = b"banana";
20226 let gsa_text = b"ban\0ana\0";
20227 let mut ctx = create_ctx().unwrap();
20228
20229 let mut direct_sa = vec![0; text.len()];
20230 let mut ctx_sa = vec![0; text.len()];
20231 assert_eq!(libsais64(text, &mut direct_sa, 0, None), 0);
20232 assert_eq!(libsais64_ctx(&mut ctx, text, &mut ctx_sa, 0, None), 0);
20233 assert_eq!(ctx_sa, direct_sa);
20234
20235 let mut direct_gsa = vec![0; gsa_text.len()];
20236 let mut ctx_gsa = vec![0; gsa_text.len()];
20237 assert_eq!(libsais64_gsa(gsa_text, &mut direct_gsa, 0, None), 0);
20238 assert_eq!(
20239 libsais64_gsa_ctx(&mut ctx, gsa_text, &mut ctx_gsa, 0, None),
20240 0
20241 );
20242 assert_eq!(ctx_gsa, direct_gsa);
20243
20244 let mut direct_bwt = vec![0; text.len()];
20245 let mut direct_work = vec![0; text.len()];
20246 let mut ctx_bwt = vec![0; text.len()];
20247 let mut ctx_work = vec![0; text.len()];
20248 assert_eq!(
20249 libsais64_bwt(text, &mut direct_bwt, &mut direct_work, 0, None),
20250 libsais64_bwt_ctx(&mut ctx, text, &mut ctx_bwt, &mut ctx_work, 0, None)
20251 );
20252 assert_eq!(ctx_bwt, direct_bwt);
20253
20254 let mut direct_aux = vec![0; 2];
20255 let mut ctx_aux = vec![0; 2];
20256 assert_eq!(
20257 libsais64_bwt_aux(
20258 text,
20259 &mut direct_bwt,
20260 &mut direct_work,
20261 0,
20262 None,
20263 4,
20264 &mut direct_aux
20265 ),
20266 libsais64_bwt_aux_ctx(
20267 &mut ctx,
20268 text,
20269 &mut ctx_bwt,
20270 &mut ctx_work,
20271 0,
20272 None,
20273 4,
20274 &mut ctx_aux
20275 )
20276 );
20277 assert_eq!(ctx_bwt, direct_bwt);
20278 assert_eq!(ctx_aux, direct_aux);
20279 }
20280
20281 #[test]
20282 fn libsais64_ctx_matches_plain_entry_point_for_small_text() {
20283 let t = b"mississippi";
20284 let mut sa_plain = vec![0; t.len()];
20285 let mut sa_ctx = vec![0; t.len()];
20286 let plain = libsais64(t, &mut sa_plain, 0, None);
20287
20288 let mut ctx = create_ctx().expect("context");
20289 let with_ctx = libsais64_ctx(&mut ctx, t, &mut sa_ctx, 0, None);
20290
20291 assert_eq!(plain, 0);
20292 assert_eq!(with_ctx, 0);
20293 assert_eq!(sa_ctx, sa_plain);
20294 }
20295
20296 #[test]
20297 fn public_libsais64_unbwt_context_wrappers_match_direct_calls() {
20298 let text = b"banana";
20299 let mut bwt = vec![0; text.len()];
20300 let mut work = vec![0; text.len()];
20301 let primary = libsais64_bwt(text, &mut bwt, &mut work, 0, None);
20302 let mut ctx = unbwt_create_ctx().unwrap();
20303
20304 let mut direct = vec![0; text.len()];
20305 let mut direct_work = vec![0; text.len() + 1];
20306 let mut via_ctx = vec![0; text.len()];
20307 let mut ctx_work = vec![0; text.len() + 1];
20308 assert_eq!(
20309 libsais64_unbwt(&bwt, &mut direct, &mut direct_work, None, primary),
20310 0
20311 );
20312 assert_eq!(
20313 libsais64_unbwt_ctx(&mut ctx, &bwt, &mut via_ctx, &mut ctx_work, None, primary),
20314 0
20315 );
20316 assert_eq!(via_ctx, direct);
20317
20318 let mut aux = vec![0; 2];
20319 assert_eq!(
20320 libsais64_bwt_aux(text, &mut bwt, &mut work, 0, None, 4, &mut aux),
20321 0
20322 );
20323 assert_eq!(
20324 libsais64_unbwt_aux(&bwt, &mut direct, &mut direct_work, None, 4, &aux),
20325 0
20326 );
20327 assert_eq!(
20328 libsais64_unbwt_aux_ctx(&mut ctx, &bwt, &mut via_ctx, &mut ctx_work, None, 4, &aux),
20329 0
20330 );
20331 assert_eq!(via_ctx, direct);
20332 }
20333
20334 #[test]
20335 fn public_libsais64_ctx_frequency_wrappers_match_direct_calls() {
20336 let text = b"banana";
20337 let gsa_text = b"ban\0ana\0";
20338 let mut ctx = create_ctx().unwrap();
20339
20340 let mut direct_sa = vec![0; text.len()];
20341 let mut ctx_sa = vec![0; text.len()];
20342 let mut direct_freq = vec![-1; ALPHABET_SIZE];
20343 let mut ctx_freq = vec![-1; ALPHABET_SIZE];
20344 assert_eq!(
20345 libsais64(text, &mut direct_sa, 0, Some(&mut direct_freq)),
20346 0
20347 );
20348 assert_eq!(
20349 libsais64_ctx(&mut ctx, text, &mut ctx_sa, 0, Some(&mut ctx_freq)),
20350 0
20351 );
20352 assert_eq!(ctx_sa, direct_sa);
20353 assert_eq!(ctx_freq, direct_freq);
20354
20355 let mut direct_gsa = vec![0; gsa_text.len()];
20356 let mut ctx_gsa = vec![0; gsa_text.len()];
20357 direct_freq.fill(-1);
20358 ctx_freq.fill(-1);
20359 assert_eq!(
20360 libsais64_gsa(gsa_text, &mut direct_gsa, 0, Some(&mut direct_freq)),
20361 0
20362 );
20363 assert_eq!(
20364 libsais64_gsa_ctx(&mut ctx, gsa_text, &mut ctx_gsa, 0, Some(&mut ctx_freq)),
20365 0
20366 );
20367 assert_eq!(ctx_gsa, direct_gsa);
20368 assert_eq!(ctx_freq, direct_freq);
20369
20370 let mut direct_bwt = vec![0; text.len()];
20371 let mut direct_work = vec![0; text.len()];
20372 let mut ctx_bwt = vec![0; text.len()];
20373 let mut ctx_work = vec![0; text.len()];
20374 direct_freq.fill(-1);
20375 ctx_freq.fill(-1);
20376 assert_eq!(
20377 libsais64_bwt(
20378 text,
20379 &mut direct_bwt,
20380 &mut direct_work,
20381 0,
20382 Some(&mut direct_freq)
20383 ),
20384 libsais64_bwt_ctx(
20385 &mut ctx,
20386 text,
20387 &mut ctx_bwt,
20388 &mut ctx_work,
20389 0,
20390 Some(&mut ctx_freq)
20391 )
20392 );
20393 assert_eq!(ctx_bwt, direct_bwt);
20394 assert_eq!(ctx_freq, direct_freq);
20395
20396 let mut direct_aux = vec![0; 2];
20397 let mut ctx_aux = vec![0; 2];
20398 direct_freq.fill(-1);
20399 ctx_freq.fill(-1);
20400 assert_eq!(
20401 libsais64_bwt_aux(
20402 text,
20403 &mut direct_bwt,
20404 &mut direct_work,
20405 0,
20406 Some(&mut direct_freq),
20407 4,
20408 &mut direct_aux
20409 ),
20410 libsais64_bwt_aux_ctx(
20411 &mut ctx,
20412 text,
20413 &mut ctx_bwt,
20414 &mut ctx_work,
20415 0,
20416 Some(&mut ctx_freq),
20417 4,
20418 &mut ctx_aux
20419 )
20420 );
20421 assert_eq!(ctx_bwt, direct_bwt);
20422 assert_eq!(ctx_aux, direct_aux);
20423 assert_eq!(ctx_freq, direct_freq);
20424 }
20425
20426 #[test]
20427 fn public_libsais64_unbwt_ctx_frequency_wrappers_match_direct_calls() {
20428 let text = b"abracadabra";
20429 let mut freq = vec![0; ALPHABET_SIZE];
20430 let mut bwt = vec![0; text.len()];
20431 let mut work = vec![0; text.len()];
20432 let primary = libsais64_bwt(text, &mut bwt, &mut work, 0, Some(&mut freq));
20433 assert!(primary >= 0);
20434
20435 let mut ctx = unbwt_create_ctx().unwrap();
20436 let mut direct = vec![0; text.len()];
20437 let mut direct_work = vec![0; text.len() + 1];
20438 let mut via_ctx = vec![0; text.len()];
20439 let mut ctx_work = vec![0; text.len() + 1];
20440 assert_eq!(
20441 libsais64_unbwt(&bwt, &mut direct, &mut direct_work, Some(&freq), primary),
20442 libsais64_unbwt_ctx(
20443 &mut ctx,
20444 &bwt,
20445 &mut via_ctx,
20446 &mut ctx_work,
20447 Some(&freq),
20448 primary
20449 )
20450 );
20451 assert_eq!(via_ctx, direct);
20452 assert_eq!(via_ctx, text);
20453
20454 let mut aux = vec![0; (text.len() - 1) / 4 + 1];
20455 assert_eq!(
20456 libsais64_bwt_aux(text, &mut bwt, &mut work, 0, Some(&mut freq), 4, &mut aux),
20457 0
20458 );
20459 direct.fill(0);
20460 direct_work.fill(0);
20461 via_ctx.fill(0);
20462 ctx_work.fill(0);
20463 assert_eq!(
20464 libsais64_unbwt_aux(&bwt, &mut direct, &mut direct_work, Some(&freq), 4, &aux),
20465 libsais64_unbwt_aux_ctx(
20466 &mut ctx,
20467 &bwt,
20468 &mut via_ctx,
20469 &mut ctx_work,
20470 Some(&freq),
20471 4,
20472 &aux
20473 )
20474 );
20475 assert_eq!(via_ctx, direct);
20476 assert_eq!(via_ctx, text);
20477 }
20478
20479 #[test]
20480 fn public_libsais64_omp_wrappers_match_direct_calls() {
20481 let text = b"banana";
20482 let gsa_text = b"ban\0ana\0";
20483
20484 let mut direct_sa = vec![0; text.len()];
20485 let mut omp_sa = vec![0; text.len()];
20486 assert_eq!(libsais64(text, &mut direct_sa, 0, None), 0);
20487 assert_eq!(libsais64_omp(text, &mut omp_sa, 0, None, 2), 0);
20488 assert_eq!(omp_sa, direct_sa);
20489 assert_eq!(libsais64_omp(text, &mut omp_sa, 0, None, -1), -1);
20490
20491 let mut direct_gsa = vec![0; gsa_text.len()];
20492 let mut omp_gsa = vec![0; gsa_text.len()];
20493 assert_eq!(libsais64_gsa(gsa_text, &mut direct_gsa, 0, None), 0);
20494 assert_eq!(libsais64_gsa_omp(gsa_text, &mut omp_gsa, 0, None, 2), 0);
20495 assert_eq!(omp_gsa, direct_gsa);
20496 assert_eq!(libsais64_gsa_omp(gsa_text, &mut omp_gsa, 0, None, -1), -1);
20497
20498 let int_text = vec![2, 1, 3, 1, 0];
20499 let mut direct_int_text = int_text.clone();
20500 let mut omp_int_text = int_text.clone();
20501 let mut direct_int_sa = vec![0; int_text.len()];
20502 let mut omp_int_sa = vec![0; int_text.len()];
20503 assert_eq!(
20504 libsais64_int(&mut direct_int_text, &mut direct_int_sa, 4, 0),
20505 0
20506 );
20507 assert_eq!(
20508 libsais64_int_omp(&mut omp_int_text, &mut omp_int_sa, 4, 0, 2),
20509 0
20510 );
20511 assert_eq!(omp_int_sa, direct_int_sa);
20512 assert_eq!(
20513 libsais64_int_omp(&mut omp_int_text, &mut omp_int_sa, 4, 0, -1),
20514 -1
20515 );
20516
20517 let long_text = vec![3, 1, 4, 1, 5, 0];
20518 let mut direct_long_text = long_text.clone();
20519 let mut omp_long_text = long_text.clone();
20520 let mut direct_long_sa = vec![0; long_text.len()];
20521 let mut omp_long_sa = vec![0; long_text.len()];
20522 assert_eq!(
20523 libsais64_long(&mut direct_long_text, &mut direct_long_sa, 6, 0),
20524 0
20525 );
20526 assert_eq!(
20527 libsais64_long_omp(&mut omp_long_text, &mut omp_long_sa, 6, 0, 2),
20528 0
20529 );
20530 assert_eq!(omp_long_sa, direct_long_sa);
20531 assert_eq!(
20532 libsais64_long_omp(&mut omp_long_text, &mut omp_long_sa, 6, 0, -1),
20533 -1
20534 );
20535
20536 let mut direct_bwt = vec![0; text.len()];
20537 let mut direct_work = vec![0; text.len()];
20538 let mut omp_bwt = vec![0; text.len()];
20539 let mut omp_work = vec![0; text.len()];
20540 assert_eq!(
20541 libsais64_bwt(text, &mut direct_bwt, &mut direct_work, 0, None),
20542 libsais64_bwt_omp(text, &mut omp_bwt, &mut omp_work, 0, None, 2)
20543 );
20544 assert_eq!(omp_bwt, direct_bwt);
20545 assert_eq!(
20546 libsais64_bwt_omp(text, &mut omp_bwt, &mut omp_work, 0, None, -1),
20547 -1
20548 );
20549
20550 let mut direct_aux = vec![0; 2];
20551 let mut omp_aux = vec![0; 2];
20552 assert_eq!(
20553 libsais64_bwt_aux(
20554 text,
20555 &mut direct_bwt,
20556 &mut direct_work,
20557 0,
20558 None,
20559 4,
20560 &mut direct_aux
20561 ),
20562 libsais64_bwt_aux_omp(
20563 text,
20564 &mut omp_bwt,
20565 &mut omp_work,
20566 0,
20567 None,
20568 4,
20569 &mut omp_aux,
20570 2
20571 )
20572 );
20573 assert_eq!(omp_bwt, direct_bwt);
20574 assert_eq!(omp_aux, direct_aux);
20575 assert_eq!(
20576 libsais64_bwt_aux_omp(
20577 text,
20578 &mut omp_bwt,
20579 &mut omp_work,
20580 0,
20581 None,
20582 4,
20583 &mut omp_aux,
20584 -1
20585 ),
20586 -1
20587 );
20588 }
20589
20590 #[test]
20591 fn public_libsais64_plcp_omp_wrappers_match_direct_calls() {
20592 let text = b"banana";
20593 let mut sa = vec![0; text.len()];
20594 assert_eq!(libsais64(text, &mut sa, 0, None), 0);
20595
20596 let mut direct_plcp = vec![0; text.len()];
20597 let mut omp_plcp = vec![0; text.len()];
20598 assert_eq!(libsais64_plcp(text, &sa, &mut direct_plcp), 0);
20599 assert_eq!(libsais64_plcp_omp(text, &sa, &mut omp_plcp, 2), 0);
20600 assert_eq!(omp_plcp, direct_plcp);
20601 assert_eq!(libsais64_plcp_omp(text, &sa, &mut omp_plcp, -1), -1);
20602
20603 let mut direct_lcp = vec![0; text.len()];
20604 let mut omp_lcp = vec![0; text.len()];
20605 assert_eq!(libsais64_lcp(&direct_plcp, &sa, &mut direct_lcp), 0);
20606 assert_eq!(libsais64_lcp_omp(&direct_plcp, &sa, &mut omp_lcp, 2), 0);
20607 assert_eq!(omp_lcp, direct_lcp);
20608 assert_eq!(libsais64_lcp_omp(&direct_plcp, &sa, &mut omp_lcp, -1), -1);
20609
20610 let gsa_text = b"ban\0ana\0";
20611 let mut gsa = vec![0; gsa_text.len()];
20612 assert_eq!(libsais64_gsa(gsa_text, &mut gsa, 0, None), 0);
20613 let mut direct_gsa_plcp = vec![0; gsa_text.len()];
20614 let mut omp_gsa_plcp = vec![0; gsa_text.len()];
20615 assert_eq!(libsais64_plcp_gsa(gsa_text, &gsa, &mut direct_gsa_plcp), 0);
20616 assert_eq!(
20617 libsais64_plcp_gsa_omp(gsa_text, &gsa, &mut omp_gsa_plcp, 2),
20618 0
20619 );
20620 assert_eq!(omp_gsa_plcp, direct_gsa_plcp);
20621 assert_eq!(
20622 libsais64_plcp_gsa_omp(gsa_text, &gsa, &mut omp_gsa_plcp, -1),
20623 -1
20624 );
20625
20626 let int_text = vec![2, 1, 3, 1, 0];
20627 let mut int_text_for_sa = int_text.clone();
20628 let mut int_sa = vec![0; int_text.len()];
20629 assert_eq!(libsais64_int(&mut int_text_for_sa, &mut int_sa, 4, 0), 0);
20630 let mut direct_int_plcp = vec![0; int_text.len()];
20631 let mut omp_int_plcp = vec![0; int_text.len()];
20632 assert_eq!(
20633 libsais64_plcp_int(&int_text, &int_sa, &mut direct_int_plcp),
20634 0
20635 );
20636 assert_eq!(
20637 libsais64_plcp_int_omp(&int_text, &int_sa, &mut omp_int_plcp, 2),
20638 0
20639 );
20640 assert_eq!(omp_int_plcp, direct_int_plcp);
20641 assert_eq!(
20642 libsais64_plcp_int_omp(&int_text, &int_sa, &mut omp_int_plcp, -1),
20643 -1
20644 );
20645 }
20646
20647 #[test]
20648 fn public_libsais64_omp_frequency_wrappers_match_direct_calls() {
20649 let text = b"banana";
20650 let gsa_text = b"ban\0ana\0";
20651
20652 let mut direct_sa = vec![0; text.len()];
20653 let mut omp_sa = vec![0; text.len()];
20654 let mut direct_freq = vec![-1; ALPHABET_SIZE];
20655 let mut omp_freq = vec![-1; ALPHABET_SIZE];
20656 assert_eq!(
20657 libsais64(text, &mut direct_sa, 0, Some(&mut direct_freq)),
20658 0
20659 );
20660 assert_eq!(
20661 libsais64_omp(text, &mut omp_sa, 0, Some(&mut omp_freq), 2),
20662 0
20663 );
20664 assert_eq!(omp_sa, direct_sa);
20665 assert_eq!(omp_freq, direct_freq);
20666
20667 let mut direct_gsa = vec![0; gsa_text.len()];
20668 let mut omp_gsa = vec![0; gsa_text.len()];
20669 direct_freq.fill(-1);
20670 omp_freq.fill(-1);
20671 assert_eq!(
20672 libsais64_gsa(gsa_text, &mut direct_gsa, 0, Some(&mut direct_freq)),
20673 0
20674 );
20675 assert_eq!(
20676 libsais64_gsa_omp(gsa_text, &mut omp_gsa, 0, Some(&mut omp_freq), 2),
20677 0
20678 );
20679 assert_eq!(omp_gsa, direct_gsa);
20680 assert_eq!(omp_freq, direct_freq);
20681
20682 let mut direct_bwt = vec![0; text.len()];
20683 let mut direct_work = vec![0; text.len()];
20684 let mut omp_bwt = vec![0; text.len()];
20685 let mut omp_work = vec![0; text.len()];
20686 direct_freq.fill(-1);
20687 omp_freq.fill(-1);
20688 assert_eq!(
20689 libsais64_bwt(
20690 text,
20691 &mut direct_bwt,
20692 &mut direct_work,
20693 0,
20694 Some(&mut direct_freq)
20695 ),
20696 libsais64_bwt_omp(text, &mut omp_bwt, &mut omp_work, 0, Some(&mut omp_freq), 2)
20697 );
20698 assert_eq!(omp_bwt, direct_bwt);
20699 assert_eq!(omp_freq, direct_freq);
20700
20701 let mut direct_aux = vec![0; 2];
20702 let mut omp_aux = vec![0; 2];
20703 direct_freq.fill(-1);
20704 omp_freq.fill(-1);
20705 assert_eq!(
20706 libsais64_bwt_aux(
20707 text,
20708 &mut direct_bwt,
20709 &mut direct_work,
20710 0,
20711 Some(&mut direct_freq),
20712 4,
20713 &mut direct_aux
20714 ),
20715 libsais64_bwt_aux_omp(
20716 text,
20717 &mut omp_bwt,
20718 &mut omp_work,
20719 0,
20720 Some(&mut omp_freq),
20721 4,
20722 &mut omp_aux,
20723 2
20724 )
20725 );
20726 assert_eq!(omp_bwt, direct_bwt);
20727 assert_eq!(omp_aux, direct_aux);
20728 assert_eq!(omp_freq, direct_freq);
20729 }
20730
20731 #[test]
20732 fn public_libsais64_unbwt_omp_frequency_wrappers_match_direct_calls() {
20733 let text = b"abracadabra";
20734 let mut freq = vec![0; ALPHABET_SIZE];
20735 let mut bwt = vec![0; text.len()];
20736 let mut work = vec![0; text.len()];
20737 let primary = libsais64_bwt(text, &mut bwt, &mut work, 0, Some(&mut freq));
20738 assert!(primary >= 0);
20739
20740 let mut direct = vec![0; text.len()];
20741 let mut direct_work = vec![0; text.len() + 1];
20742 let mut omp = vec![0; text.len()];
20743 let mut omp_work = vec![0; text.len() + 1];
20744 assert_eq!(
20745 libsais64_unbwt(&bwt, &mut direct, &mut direct_work, Some(&freq), primary),
20746 libsais64_unbwt_omp(&bwt, &mut omp, &mut omp_work, Some(&freq), primary, 2)
20747 );
20748 assert_eq!(omp, direct);
20749 assert_eq!(omp, text);
20750
20751 let mut aux = vec![0; (text.len() - 1) / 4 + 1];
20752 assert_eq!(
20753 libsais64_bwt_aux(text, &mut bwt, &mut work, 0, Some(&mut freq), 4, &mut aux),
20754 0
20755 );
20756 direct.fill(0);
20757 direct_work.fill(0);
20758 omp.fill(0);
20759 omp_work.fill(0);
20760 assert_eq!(
20761 libsais64_unbwt_aux(&bwt, &mut direct, &mut direct_work, Some(&freq), 4, &aux),
20762 libsais64_unbwt_aux_omp(&bwt, &mut omp, &mut omp_work, Some(&freq), 4, &aux, 2)
20763 );
20764 assert_eq!(omp, direct);
20765 assert_eq!(omp, text);
20766 }
20767
20768 #[test]
20769 fn public_libsais64_bwt_aux_matches_upstream_c() {
20770 for text in [
20771 b"banana".as_slice(),
20772 b"mississippi",
20773 b"abracadabra",
20774 b"AAAAAAAAAAAAAAAA",
20775 b"zyxwvutsrqponmlk",
20776 ] {
20777 assert_libsais64_bwt_aux_matches_c(text, 4);
20778 }
20779 }
20780
20781 #[test]
20782 fn public_libsais64_frequency_outputs_match_upstream_c() {
20783 assert_libsais64_freq_outputs_match_c(b"banana", b"ban\0ana\0");
20784 }
20785
20786 #[test]
20787 fn public_libsais64_unbwt_with_frequency_matches_upstream_c() {
20788 assert_libsais64_unbwt_freq_matches_c(b"abracadabra");
20789 }
20790
20791 #[test]
20792 fn public_libsais64_unbwt_matches_upstream_c() {
20793 for text in [
20794 b"a".as_slice(),
20795 b"banana",
20796 b"mississippi",
20797 b"abracadabra",
20798 b"AAAAAAAAAAAAAAAA",
20799 b"zyxwvutsrqponmlk",
20800 ] {
20801 assert_libsais64_unbwt_matches_c(text);
20802 }
20803 }
20804
20805 #[test]
20806 fn public_libsais64_unbwt_aux_matches_upstream_c() {
20807 for text in [
20808 b"banana".as_slice(),
20809 b"mississippi",
20810 b"abracadabra",
20811 b"AAAAAAAAAAAAAAAA",
20812 b"zyxwvutsrqponmlk",
20813 ] {
20814 assert_libsais64_unbwt_aux_matches_c(text, 4);
20815 }
20816 }
20817
20818 #[test]
20819 fn public_libsais64_bwt_aux_round_trips() {
20820 for text in [
20821 b"banana".as_slice(),
20822 b"mississippi",
20823 b"abracadabra",
20824 b"AAAAAAAAAAAAAAAA",
20825 b"zyxwvutsrqponmlk",
20826 ] {
20827 assert_libsais64_bwt_aux_round_trips(text, 4);
20828 }
20829 }
20830}