1use std::marker::PhantomData;
10use std::mem;
11
12use rayon::prelude::*;
13
14pub mod libsais16;
15pub mod libsais16x64;
16pub mod libsais64;
17pub use libsais16::{libsais16, SaSint as SaSint16, SaUint as SaUint16};
18pub use libsais16x64::{libsais16x64, SaSint as SaSint16x64, SaUint as SaUint16x64};
19pub use libsais64::{libsais64, SaSint as SaSint64, SaUint as SaUint64};
20
21pub type SaSint = i32;
22pub type SaUint = u32;
23pub type FastSint = isize;
24pub type FastUint = usize;
25
26pub const SAINT_BIT: u32 = 32;
27pub const SAINT_MAX: SaSint = i32::MAX;
28pub const SAINT_MIN: SaSint = i32::MIN;
29
30pub const ALPHABET_SIZE: usize = 1usize << 8;
31pub const UNBWT_FASTBITS: usize = 17;
32
33pub const SUFFIX_GROUP_BIT: u32 = SAINT_BIT - 1;
34pub const SUFFIX_GROUP_MARKER: SaSint = 1_i32 << (SUFFIX_GROUP_BIT - 1);
35
36pub const LIBSAIS_LOCAL_BUFFER_SIZE: usize = 2000;
37pub const LIBSAIS_PER_THREAD_CACHE_SIZE: usize = 24_576;
38
39pub const LIBSAIS_FLAGS_NONE: SaSint = 0;
40pub const LIBSAIS_FLAGS_BWT: SaSint = 1;
41pub const LIBSAIS_FLAGS_GSA: SaSint = 2;
42
43pub(crate) fn run_rayon_with_threads<R: Send>(_threads: usize, f: impl FnOnce() -> R + Send) -> R {
44 f()
45}
46
47#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
48pub struct ThreadCache {
49 pub symbol: SaSint,
50 pub index: SaSint,
51}
52
53#[derive(Clone, Debug, PartialEq, Eq)]
54pub struct ThreadState {
55 pub position: FastSint,
56 pub count: FastSint,
57 pub m: FastSint,
58 pub last_lms_suffix: FastSint,
59 pub buckets: Vec<SaSint>,
60 pub cache: Vec<ThreadCache>,
61}
62
63impl ThreadState {
64 fn new() -> Self {
65 Self {
66 position: 0,
67 count: 0,
68 m: 0,
69 last_lms_suffix: 0,
70 buckets: vec![0; 4 * ALPHABET_SIZE],
71 cache: vec![ThreadCache::default(); LIBSAIS_PER_THREAD_CACHE_SIZE],
72 }
73 }
74}
75
76#[derive(Clone, Debug, PartialEq, Eq)]
77pub struct Context {
78 pub buckets: Vec<SaSint>,
79 pub thread_state: Option<Vec<ThreadState>>,
80 pub threads: FastSint,
81}
82
83#[derive(Clone, Debug, PartialEq, Eq)]
84pub struct UnbwtContext {
85 pub bucket2: Vec<SaUint>,
86 pub fastbits: Vec<u16>,
87 pub buckets: Option<Vec<SaUint>>,
88 pub threads: FastSint,
89}
90
91#[doc(hidden)]
93pub fn buckets_index2(c: FastUint, s: FastUint) -> FastUint {
94 (c << 1) + s
95}
96
97#[doc(hidden)]
99pub fn buckets_index4(c: FastUint, s: FastUint) -> FastUint {
100 (c << 2) + s
101}
102
103#[doc(hidden)]
105pub fn align_up(value: usize, alignment: usize) -> usize {
106 debug_assert!(alignment.is_power_of_two());
107 (value + alignment - 1) & !(alignment - 1)
108}
109
110#[doc(hidden)]
112pub fn alloc_thread_state(threads: SaSint) -> Option<Vec<ThreadState>> {
113 if threads <= 0 {
114 return None;
115 }
116
117 let len = usize::try_from(threads).ok()?;
118 Some((0..len).map(|_| ThreadState::new()).collect())
119}
120
121#[doc(hidden)]
123pub fn create_ctx_main(threads: SaSint) -> Option<Context> {
124 if threads <= 0 {
125 return None;
126 }
127
128 let thread_state = if threads > 1 {
129 Some(alloc_thread_state(threads)?)
130 } else {
131 None
132 };
133
134 Some(Context {
135 buckets: vec![0; 8 * ALPHABET_SIZE],
136 thread_state,
137 threads: threads as FastSint,
138 })
139}
140
141pub fn create_ctx() -> Option<Context> {
147 create_ctx_main(1)
148}
149
150pub fn free_ctx(_ctx: Context) {}
152
153#[doc(hidden)]
155pub fn unbwt_create_ctx_main(threads: SaSint) -> Option<UnbwtContext> {
156 if threads <= 0 {
157 return None;
158 }
159
160 let buckets = if threads > 1 {
161 let len = usize::try_from(threads).ok()? * (ALPHABET_SIZE + ALPHABET_SIZE * ALPHABET_SIZE);
162 Some(vec![0; len])
163 } else {
164 None
165 };
166
167 Some(UnbwtContext {
168 bucket2: vec![0; ALPHABET_SIZE * ALPHABET_SIZE],
169 fastbits: vec![0; 1 + (1 << UNBWT_FASTBITS)],
170 buckets,
171 threads: threads as FastSint,
172 })
173}
174
175#[doc(hidden)]
177pub fn unbwt_free_ctx_main(_ctx: UnbwtContext) {}
178
179pub fn unbwt_create_ctx() -> Option<UnbwtContext> {
185 unbwt_create_ctx_main(1)
186}
187
188pub fn unbwt_free_ctx(_ctx: UnbwtContext) {}
190
191#[doc(hidden)]
193pub fn count_negative_marked_suffixes(
194 sa: &[SaSint],
195 block_start: FastSint,
196 block_size: FastSint,
197) -> SaSint {
198 block_slice(sa, block_start, block_size)
199 .iter()
200 .map(|&value| SaSint::from(value < 0))
201 .sum()
202}
203
204#[doc(hidden)]
206pub fn count_zero_marked_suffixes(
207 sa: &[SaSint],
208 block_start: FastSint,
209 block_size: FastSint,
210) -> SaSint {
211 block_slice(sa, block_start, block_size)
212 .iter()
213 .map(|&value| SaSint::from(value == 0))
214 .sum()
215}
216
217#[doc(hidden)]
219pub fn place_cached_suffixes(
220 sa: &mut [SaSint],
221 cache: &[ThreadCache],
222 block_start: FastSint,
223 block_size: FastSint,
224) {
225 let start = usize::try_from(block_start).expect("block_start must be non-negative");
226 let len = usize::try_from(block_size).expect("block_size must be non-negative");
227 let entries = if cache.len() >= start + len {
228 &cache[start..start + len]
229 } else {
230 &cache[..len]
231 };
232
233 for entry in entries {
234 let slot = usize::try_from(entry.symbol).expect("cache symbol must be non-negative");
235 sa[slot] = entry.index;
236 }
237}
238
239#[doc(hidden)]
241pub fn compact_and_place_cached_suffixes(
242 sa: &mut [SaSint],
243 cache: &mut [ThreadCache],
244 block_start: FastSint,
245 block_size: FastSint,
246) {
247 let start = usize::try_from(block_start).expect("block_start must be non-negative");
248 let len = usize::try_from(block_size).expect("block_size must be non-negative");
249 let read_start = if cache.len() >= start + len { start } else { 0 };
250 let read_end = read_start + len;
251
252 let mut write = read_start;
253 for read in read_start..read_end {
254 let entry = cache[read];
255 if entry.symbol >= 0 {
256 cache[write] = entry;
257 write += 1;
258 }
259 }
260
261 place_cached_suffixes(sa, cache, block_start, (write - read_start) as FastSint);
262}
263
264#[doc(hidden)]
266pub fn flip_suffix_markers_omp(sa: &mut [SaSint], l: SaSint, threads: SaSint) {
267 let len = usize::try_from(l).expect("l must be non-negative");
268 let omp_num_threads = if threads > 1 && l >= 65_536 {
269 usize::try_from(threads).expect("threads must be non-negative")
270 } else {
271 1
272 };
273 if omp_num_threads > 1 {
274 let chunk_size = ((len / omp_num_threads) & !15usize).max(16);
275 run_rayon_with_threads(omp_num_threads, || {
276 sa[..len].par_chunks_mut(chunk_size).for_each(|chunk| {
277 for value in chunk {
278 *value ^= SAINT_MIN;
279 }
280 });
281 });
282 return;
283 }
284
285 let omp_block_stride = (len / omp_num_threads) & !15usize;
286 for omp_thread_num in 0..omp_num_threads {
287 let omp_block_start = omp_thread_num * omp_block_stride;
288 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
289 omp_block_stride
290 } else {
291 len - omp_block_start
292 };
293 for value in &mut sa[omp_block_start..omp_block_start + omp_block_size] {
294 *value ^= SAINT_MIN;
295 }
296 }
297}
298
299#[doc(hidden)]
301pub fn gather_lms_suffixes_8u(
302 t: &[u8],
303 sa: &mut [SaSint],
304 n: SaSint,
305 mut m: FastSint,
306 omp_block_start: FastSint,
307 omp_block_size: FastSint,
308) {
309 if omp_block_size <= 0 {
310 return;
311 }
312
313 let n = usize::try_from(n).expect("n must be non-negative");
314 let block_start =
315 usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
316 let block_size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
317
318 let mut j = block_start + block_size;
319 let mut c0 = t[block_start + block_size - 1] as FastSint;
320 let mut c1 = -1;
321 while j < n {
322 c1 = t[j] as FastSint;
323 if c1 != c0 {
324 break;
325 }
326 j += 1;
327 }
328
329 let mut f0 = usize::from(c0 >= c1);
330 let mut f1: usize;
331 let mut i = block_start + block_size - 2;
332 let limit = block_start + 3;
333
334 while i >= limit {
335 c1 = t[i] as FastSint;
336 f1 = usize::from(c1 > (c0 - f0 as FastSint));
337 sa[usize::try_from(m).expect("m must be non-negative")] = (i + 1) as SaSint;
338 m -= (f1 & !f0) as FastSint;
339
340 c0 = t[i - 1] as FastSint;
341 f0 = usize::from(c0 > (c1 - f1 as FastSint));
342 sa[usize::try_from(m).expect("m must be non-negative")] = i as SaSint;
343 m -= (f0 & !f1) as FastSint;
344
345 c1 = t[i - 2] as FastSint;
346 f1 = usize::from(c1 > (c0 - f0 as FastSint));
347 sa[usize::try_from(m).expect("m must be non-negative")] = (i - 1) as SaSint;
348 m -= (f1 & !f0) as FastSint;
349
350 c0 = t[i - 3] as FastSint;
351 f0 = usize::from(c0 > (c1 - f1 as FastSint));
352 sa[usize::try_from(m).expect("m must be non-negative")] = (i - 2) as SaSint;
353 m -= (f0 & !f1) as FastSint;
354
355 if i < 4 {
356 break;
357 }
358 i -= 4;
359 }
360
361 let tail_limit = limit - 3;
362 while i >= tail_limit {
363 c1 = c0;
364 c0 = t[i] as FastSint;
365 f1 = f0;
366 f0 = usize::from(c0 > (c1 - f1 as FastSint));
367 sa[usize::try_from(m).expect("m must be non-negative")] = (i + 1) as SaSint;
368 m -= (f0 & !f1) as FastSint;
369 if i == 0 {
370 break;
371 }
372 i -= 1;
373 }
374
375 sa[usize::try_from(m).expect("m must be non-negative")] = (i + 1) as SaSint;
376}
377
378#[doc(hidden)]
380pub fn gather_lms_suffixes_8u_omp(
381 t: &[u8],
382 sa: &mut [SaSint],
383 n: SaSint,
384 threads: SaSint,
385 thread_state: &mut [ThreadState],
386) {
387 let n_usize = usize::try_from(n).expect("n must be non-negative");
388 let omp_num_threads = if threads > 1 && n >= 65_536 {
389 usize::try_from(threads)
390 .expect("threads must be non-negative")
391 .min(thread_state.len())
392 .max(1)
393 } else {
394 1
395 };
396 if omp_num_threads == 1 {
397 gather_lms_suffixes_8u(t, sa, n, n as FastSint - 1, 0, n as FastSint);
398 return;
399 }
400
401 let omp_block_stride = (n_usize / omp_num_threads) & !15usize;
402 let mut suffix_counts_after = vec![0 as FastSint; omp_num_threads];
403 let mut m = 0 as FastSint;
404 for omp_thread_num in (0..omp_num_threads).rev() {
405 suffix_counts_after[omp_thread_num] = m;
406 m += thread_state[omp_thread_num].m;
407 }
408
409 for omp_thread_num in 0..omp_num_threads {
410 let omp_block_start = omp_thread_num * omp_block_stride;
411 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
412 omp_block_stride
413 } else {
414 n_usize - omp_block_start
415 };
416 gather_lms_suffixes_8u(
417 t,
418 sa,
419 n,
420 n as FastSint - 1 - suffix_counts_after[omp_thread_num],
421 omp_block_start as FastSint,
422 omp_block_size as FastSint,
423 );
424 }
425
426 for omp_thread_num in 0..omp_num_threads {
427 if thread_state[omp_thread_num].m > 0 {
428 let dst = usize::try_from(n as FastSint - 1 - suffix_counts_after[omp_thread_num])
429 .expect("destination must be non-negative");
430 sa[dst] = thread_state[omp_thread_num].last_lms_suffix as SaSint;
431 }
432 }
433}
434
435#[doc(hidden)]
437pub fn gather_lms_suffixes_32s(t: &[SaSint], sa: &mut [SaSint], n: SaSint) -> SaSint {
438 let n_usize = usize::try_from(n).expect("n must be non-negative");
439 let mut i = n as FastSint - 2;
440 let mut m = n_usize - 1;
441 let mut f0 = 1usize;
442 let mut f1: usize;
443 let mut c0 = t[n_usize - 1] as FastSint;
444 let mut c1: FastSint;
445
446 while i >= 3 {
447 c1 = t[i as usize] as FastSint;
448 f1 = usize::from(c1 > (c0 - f0 as FastSint));
449 sa[m] = (i + 1) as SaSint;
450 m -= f1 & !f0;
451
452 c0 = t[(i - 1) as usize] as FastSint;
453 f0 = usize::from(c0 > (c1 - f1 as FastSint));
454 sa[m] = i as SaSint;
455 m -= f0 & !f1;
456
457 c1 = t[(i - 2) as usize] as FastSint;
458 f1 = usize::from(c1 > (c0 - f0 as FastSint));
459 sa[m] = (i - 1) as SaSint;
460 m -= f1 & !f0;
461
462 c0 = t[(i - 3) as usize] as FastSint;
463 f0 = usize::from(c0 > (c1 - f1 as FastSint));
464 sa[m] = (i - 2) as SaSint;
465 m -= f0 & !f1;
466
467 i -= 4;
468 }
469
470 while i >= 0 {
471 c1 = c0;
472 c0 = t[i as usize] as FastSint;
473 f1 = f0;
474 f0 = usize::from(c0 > (c1 - f1 as FastSint));
475 sa[m] = (i + 1) as SaSint;
476 m -= f0 & !f1;
477 i -= 1;
478 }
479
480 (n_usize - 1 - m) as SaSint
481}
482
483#[doc(hidden)]
485pub fn gather_compacted_lms_suffixes_32s(t: &[SaSint], sa: &mut [SaSint], n: SaSint) -> SaSint {
486 let n_usize = usize::try_from(n).expect("n must be non-negative");
487 let mut i = n as FastSint - 2;
488 let mut m = n_usize - 1;
489 let mut f0 = 1usize;
490 let mut f1: usize;
491 let mut c0 = t[n_usize - 1] as FastSint;
492 let mut c1: FastSint;
493
494 while i >= 3 {
495 c1 = t[i as usize] as FastSint;
496 f1 = usize::from(c1 > (c0 - f0 as FastSint));
497 sa[m] = (i + 1) as SaSint;
498 m -= f1 & !f0 & usize::from(c0 >= 0);
499
500 c0 = t[(i - 1) as usize] as FastSint;
501 f0 = usize::from(c0 > (c1 - f1 as FastSint));
502 sa[m] = i as SaSint;
503 m -= f0 & !f1 & usize::from(c1 >= 0);
504
505 c1 = t[(i - 2) as usize] as FastSint;
506 f1 = usize::from(c1 > (c0 - f0 as FastSint));
507 sa[m] = (i - 1) as SaSint;
508 m -= f1 & !f0 & usize::from(c0 >= 0);
509
510 c0 = t[(i - 3) as usize] as FastSint;
511 f0 = usize::from(c0 > (c1 - f1 as FastSint));
512 sa[m] = (i - 2) as SaSint;
513 m -= f0 & !f1 & usize::from(c1 >= 0);
514
515 i -= 4;
516 }
517
518 while i >= 0 {
519 c1 = c0;
520 c0 = t[i as usize] as FastSint;
521 f1 = f0;
522 f0 = usize::from(c0 > (c1 - f1 as FastSint));
523 sa[m] = (i + 1) as SaSint;
524 m -= f0 & !f1 & usize::from(c1 >= 0);
525 i -= 1;
526 }
527
528 (n_usize - 1 - m) as SaSint
529}
530
531#[doc(hidden)]
533pub fn count_lms_suffixes_32s_4k(t: &[SaSint], n: SaSint, k: SaSint, buckets: &mut [SaSint]) {
534 buckets.fill(0);
535 let n_usize = usize::try_from(n).expect("n must be non-negative");
536 let _k_usize = usize::try_from(k).expect("k must be non-negative");
537 let mut i = n as FastSint - 2;
538 let mut f0 = 1usize;
539 let mut f1: usize;
540 let mut c0 = t[n_usize - 1] as FastSint;
541 let mut c1: FastSint;
542
543 while i >= 3 {
544 c1 = t[i as usize] as FastSint;
545 f1 = usize::from(c1 > (c0 - f0 as FastSint));
546 buckets[buckets_index4((c0 as SaSint & SAINT_MAX) as usize, f0 + f0 + f1)] += 1;
547
548 c0 = t[(i - 1) as usize] as FastSint;
549 f0 = usize::from(c0 > (c1 - f1 as FastSint));
550 buckets[buckets_index4((c1 as SaSint & SAINT_MAX) as usize, f1 + f1 + f0)] += 1;
551
552 c1 = t[(i - 2) as usize] as FastSint;
553 f1 = usize::from(c1 > (c0 - f0 as FastSint));
554 buckets[buckets_index4((c0 as SaSint & SAINT_MAX) as usize, f0 + f0 + f1)] += 1;
555
556 c0 = t[(i - 3) as usize] as FastSint;
557 f0 = usize::from(c0 > (c1 - f1 as FastSint));
558 buckets[buckets_index4((c1 as SaSint & SAINT_MAX) as usize, f1 + f1 + f0)] += 1;
559
560 i -= 4;
561 }
562
563 while i >= 0 {
564 c1 = c0;
565 c0 = t[i as usize] as FastSint;
566 f1 = f0;
567 f0 = usize::from(c0 > (c1 - f1 as FastSint));
568 buckets[buckets_index4((c1 as SaSint & SAINT_MAX) as usize, f1 + f1 + f0)] += 1;
569 i -= 1;
570 }
571
572 buckets[buckets_index4((c0 as SaSint & SAINT_MAX) as usize, f0 + f0)] += 1;
573}
574
575#[doc(hidden)]
577pub fn count_lms_suffixes_32s_2k(t: &[SaSint], n: SaSint, k: SaSint, buckets: &mut [SaSint]) {
578 buckets.fill(0);
579 let n_usize = usize::try_from(n).expect("n must be non-negative");
580 let _k_usize = usize::try_from(k).expect("k must be non-negative");
581 let mut i = n as FastSint - 2;
582 let mut f0 = 1usize;
583 let mut f1: usize;
584 let mut c0 = t[n_usize - 1] as FastSint;
585 let mut c1: FastSint;
586
587 while i >= 3 {
588 c1 = t[i as usize] as FastSint;
589 f1 = usize::from(c1 > (c0 - f0 as FastSint));
590 buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
591
592 c0 = t[(i - 1) as usize] as FastSint;
593 f0 = usize::from(c0 > (c1 - f1 as FastSint));
594 buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
595
596 c1 = t[(i - 2) as usize] as FastSint;
597 f1 = usize::from(c1 > (c0 - f0 as FastSint));
598 buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
599
600 c0 = t[(i - 3) as usize] as FastSint;
601 f0 = usize::from(c0 > (c1 - f1 as FastSint));
602 buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
603
604 i -= 4;
605 }
606
607 while i >= 0 {
608 c1 = c0;
609 c0 = t[i as usize] as FastSint;
610 f1 = f0;
611 f0 = usize::from(c0 > (c1 - f1 as FastSint));
612 buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
613 i -= 1;
614 }
615
616 buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, 0)] += 1;
617}
618
619#[doc(hidden)]
621pub fn count_compacted_lms_suffixes_32s_2k(
622 t: &[SaSint],
623 n: SaSint,
624 k: SaSint,
625 buckets: &mut [SaSint],
626) {
627 buckets.fill(0);
628 let n_usize = usize::try_from(n).expect("n must be non-negative");
629 let _k_usize = usize::try_from(k).expect("k must be non-negative");
630 let mut i = n as FastSint - 2;
631 let mut f0 = 1usize;
632 let mut f1: usize;
633 let mut c0 = t[n_usize - 1] as FastSint;
634 let mut c1: FastSint;
635
636 while i >= 3 {
637 c1 = t[i as usize] as FastSint;
638 f1 = usize::from(c1 > (c0 - f0 as FastSint));
639 buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
640
641 c0 = t[(i - 1) as usize] as FastSint;
642 f0 = usize::from(c0 > (c1 - f1 as FastSint));
643 buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
644
645 c1 = t[(i - 2) as usize] as FastSint;
646 f1 = usize::from(c1 > (c0 - f0 as FastSint));
647 buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
648
649 c0 = t[(i - 3) as usize] as FastSint;
650 f0 = usize::from(c0 > (c1 - f1 as FastSint));
651 buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
652
653 i -= 4;
654 }
655
656 while i >= 0 {
657 c1 = c0;
658 c0 = t[i as usize] as FastSint;
659 f1 = f0;
660 f0 = usize::from(c0 > (c1 - f1 as FastSint));
661 buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
662 i -= 1;
663 }
664
665 buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, 0)] += 1;
666}
667
668#[doc(hidden)]
670pub fn count_and_gather_lms_suffixes_8u(
671 t: &[u8],
672 sa: &mut [SaSint],
673 n: SaSint,
674 buckets: &mut [SaSint],
675 omp_block_start: FastSint,
676 omp_block_size: FastSint,
677) -> SaSint {
678 buckets.fill(0);
679 let n = n as FastSint;
680 let mut m = omp_block_start + omp_block_size - 1;
681
682 if omp_block_size > 0 {
683 let prefetch_distance = 256 as FastSint;
684 let mut j = m + 1;
685 let mut c0 = t[m as usize] as FastSint;
686 let mut c1 = -1;
687 while j < n {
688 c1 = t[j as usize] as FastSint;
689 if c1 != c0 {
690 break;
691 }
692 j += 1;
693 }
694
695 let mut f0 = usize::from(c0 >= c1);
696 let mut f1: usize;
697 let mut i = m - 1;
698 let limit = omp_block_start + 3;
699
700 while i >= limit {
701 let _prefetch_index = i - prefetch_distance;
702 c1 = t[i as usize] as FastSint;
703 f1 = usize::from(c1 > (c0 - f0 as FastSint));
704 sa[m as usize] = (i + 1) as SaSint;
705 m -= (f1 & !f0) as FastSint;
706 buckets[buckets_index4((c0 as SaSint & SAINT_MAX) as usize, f0 + f0 + f1)] += 1;
707
708 c0 = t[(i - 1) as usize] as FastSint;
709 f0 = usize::from(c0 > (c1 - f1 as FastSint));
710 sa[m as usize] = i as SaSint;
711 m -= (f0 & !f1) as FastSint;
712 buckets[buckets_index4((c1 as SaSint & SAINT_MAX) as usize, f1 + f1 + f0)] += 1;
713
714 c1 = t[(i - 2) as usize] as FastSint;
715 f1 = usize::from(c1 > (c0 - f0 as FastSint));
716 sa[m as usize] = (i - 1) as SaSint;
717 m -= (f1 & !f0) as FastSint;
718 buckets[buckets_index4((c0 as SaSint & SAINT_MAX) as usize, f0 + f0 + f1)] += 1;
719
720 c0 = t[(i - 3) as usize] as FastSint;
721 f0 = usize::from(c0 > (c1 - f1 as FastSint));
722 sa[m as usize] = (i - 2) as SaSint;
723 m -= (f0 & !f1) as FastSint;
724 buckets[buckets_index4((c1 as SaSint & SAINT_MAX) as usize, f1 + f1 + f0)] += 1;
725
726 i -= 4;
727 }
728
729 let tail_limit = limit - 3;
730 while i >= tail_limit {
731 c1 = c0;
732 c0 = t[i as usize] as FastSint;
733 f1 = f0;
734 f0 = usize::from(c0 > (c1 - f1 as FastSint));
735 sa[m as usize] = (i + 1) as SaSint;
736 m -= (f0 & !f1) as FastSint;
737 buckets[buckets_index4((c1 as SaSint & SAINT_MAX) as usize, f1 + f1 + f0)] += 1;
738 i -= 1;
739 }
740
741 c1 = if i >= 0 {
742 t[i as usize] as FastSint
743 } else {
744 -1
745 };
746 f1 = usize::from(c1 > (c0 - f0 as FastSint));
747 sa[m as usize] = (i + 1) as SaSint;
748 m -= (f1 & !f0) as FastSint;
749 buckets[buckets_index4((c0 as SaSint & SAINT_MAX) as usize, f0 + f0 + f1)] += 1;
750 }
751
752 (omp_block_start + omp_block_size - 1 - m) as SaSint
753}
754
755#[doc(hidden)]
757pub fn count_and_gather_lms_suffixes_8u_omp(
758 t: &[u8],
759 sa: &mut [SaSint],
760 n: SaSint,
761 buckets: &mut [SaSint],
762 threads: SaSint,
763 thread_state: &mut [ThreadState],
764) -> SaSint {
765 let mut m = 0;
766 let n_usize = usize::try_from(n).expect("n must be non-negative");
767 let omp_num_threads = if threads > 1 && n >= 65_536 {
768 usize::try_from(threads)
769 .expect("threads must be non-negative")
770 .min(thread_state.len())
771 .max(1)
772 } else {
773 1
774 };
775 let omp_block_stride = (n_usize / omp_num_threads) & !15usize;
776
777 if omp_num_threads == 1 {
778 return count_and_gather_lms_suffixes_8u(t, sa, n, buckets, 0, n as FastSint);
779 }
780
781 for omp_thread_num in 0..omp_num_threads {
782 let omp_block_start = omp_thread_num * omp_block_stride;
783 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
784 omp_block_stride
785 } else {
786 n_usize - omp_block_start
787 };
788
789 let state = &mut thread_state[omp_thread_num];
790 state.position = FastSint::try_from(omp_block_start + omp_block_size)
791 .expect("position must fit FastSint");
792 state.m = FastSint::try_from(count_and_gather_lms_suffixes_8u(
793 t,
794 sa,
795 n,
796 &mut state.buckets,
797 FastSint::try_from(omp_block_start).expect("block start must fit FastSint"),
798 FastSint::try_from(omp_block_size).expect("block size must fit FastSint"),
799 ))
800 .expect("m must fit FastSint");
801
802 if state.m > 0 {
803 let position = usize::try_from(state.position).expect("position must be non-negative");
804 state.last_lms_suffix =
805 FastSint::try_from(sa[position - 1]).expect("suffix must fit FastSint");
806 }
807 }
808
809 buckets.fill(0);
810
811 for tnum in (0..omp_num_threads).rev() {
812 let state = &mut thread_state[tnum];
813 m += SaSint::try_from(state.m).expect("m must fit SaSint");
814
815 if tnum + 1 < omp_num_threads && state.m > 0 {
816 let position = usize::try_from(state.position).expect("position must be non-negative");
817 let count = usize::try_from(state.m).expect("m must be non-negative");
818 let dst = n_usize - usize::try_from(m).expect("m must be non-negative");
819 sa.copy_within(position - count..position, dst);
820 }
821
822 for s in 0..4 * ALPHABET_SIZE {
823 let a = buckets[s];
824 let b = state.buckets[s];
825 buckets[s] = a + b;
826 state.buckets[s] = a;
827 }
828 }
829
830 m
831}
832
833#[doc(hidden)]
835pub fn count_and_gather_lms_suffixes_32s_4k(
836 t: &[SaSint],
837 sa: &mut [SaSint],
838 n: SaSint,
839 k: SaSint,
840 buckets: &mut [SaSint],
841 omp_block_start: FastSint,
842 omp_block_size: FastSint,
843) -> SaSint {
844 buckets.fill(0);
845 let n = n as FastSint;
846 let _k = k as FastSint;
847 let mut m = omp_block_start + omp_block_size - 1;
848
849 if omp_block_size > 0 {
850 let prefetch_distance = 64 as FastSint;
851 let mut j = m + 1;
852 let mut c0 = t[m as usize] as FastSint;
853 let mut c1 = -1;
854
855 while j < n {
856 c1 = t[j as usize] as FastSint;
857 if c1 != c0 {
858 break;
859 }
860 j += 1;
861 }
862
863 let mut f0 = usize::from(c0 >= c1);
864 let mut f1: usize;
865 let mut i = m - 1;
866 let limit = omp_block_start + prefetch_distance + 3;
867
868 while i >= limit {
869 let _prefetch_index = i - 2 * prefetch_distance;
870 c1 = t[i as usize] as FastSint;
871 f1 = usize::from(c1 > (c0 - f0 as FastSint));
872 sa[m as usize] = (i + 1) as SaSint;
873 m -= (f1 & !f0) as FastSint;
874 buckets[buckets_index4((c0 as SaSint & SAINT_MAX) as usize, f0 + f0 + f1)] += 1;
875
876 c0 = t[(i - 1) as usize] as FastSint;
877 f0 = usize::from(c0 > (c1 - f1 as FastSint));
878 sa[m as usize] = i as SaSint;
879 m -= (f0 & !f1) as FastSint;
880 buckets[buckets_index4((c1 as SaSint & SAINT_MAX) as usize, f1 + f1 + f0)] += 1;
881
882 c1 = t[(i - 2) as usize] as FastSint;
883 f1 = usize::from(c1 > (c0 - f0 as FastSint));
884 sa[m as usize] = (i - 1) as SaSint;
885 m -= (f1 & !f0) as FastSint;
886 buckets[buckets_index4((c0 as SaSint & SAINT_MAX) as usize, f0 + f0 + f1)] += 1;
887
888 c0 = t[(i - 3) as usize] as FastSint;
889 f0 = usize::from(c0 > (c1 - f1 as FastSint));
890 sa[m as usize] = (i - 2) as SaSint;
891 m -= (f0 & !f1) as FastSint;
892 buckets[buckets_index4((c1 as SaSint & SAINT_MAX) as usize, f1 + f1 + f0)] += 1;
893
894 i -= 4;
895 }
896
897 let tail_limit = omp_block_start;
898 while i >= tail_limit {
899 c1 = c0;
900 c0 = t[i as usize] as FastSint;
901 f1 = f0;
902 f0 = usize::from(c0 > (c1 - f1 as FastSint));
903 sa[m as usize] = (i + 1) as SaSint;
904 m -= (f0 & !f1) as FastSint;
905 buckets[buckets_index4((c1 as SaSint & SAINT_MAX) as usize, f1 + f1 + f0)] += 1;
906 i -= 1;
907 }
908
909 c1 = if i >= 0 {
910 t[i as usize] as FastSint
911 } else {
912 -1
913 };
914 f1 = usize::from(c1 > (c0 - f0 as FastSint));
915 sa[m as usize] = (i + 1) as SaSint;
916 m -= (f1 & !f0) as FastSint;
917 buckets[buckets_index4((c0 as SaSint & SAINT_MAX) as usize, f0 + f0 + f1)] += 1;
918 }
919
920 (omp_block_start + omp_block_size - 1 - m) as SaSint
921}
922
923#[doc(hidden)]
925pub fn count_and_gather_lms_suffixes_32s_2k(
926 t: &[SaSint],
927 sa: &mut [SaSint],
928 n: SaSint,
929 k: SaSint,
930 buckets: &mut [SaSint],
931 omp_block_start: FastSint,
932 omp_block_size: FastSint,
933) -> SaSint {
934 buckets.fill(0);
935 let n = n as FastSint;
936 let _k = k as FastSint;
937 let mut m = omp_block_start + omp_block_size - 1;
938
939 if omp_block_size > 0 {
940 let prefetch_distance = 64 as FastSint;
941 let mut j = m + 1;
942 let mut c0 = t[m as usize] as FastSint;
943 let mut c1 = -1;
944
945 while j < n {
946 c1 = t[j as usize] as FastSint;
947 if c1 != c0 {
948 break;
949 }
950 j += 1;
951 }
952
953 let mut f0 = usize::from(c0 >= c1);
954 let mut f1: usize;
955 let mut i = m - 1;
956 let limit = omp_block_start + prefetch_distance + 3;
957
958 while i >= limit {
959 let _prefetch_index = i - 2 * prefetch_distance;
960 c1 = t[i as usize] as FastSint;
961 f1 = usize::from(c1 > (c0 - f0 as FastSint));
962 sa[m as usize] = (i + 1) as SaSint;
963 m -= (f1 & !f0) as FastSint;
964 buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
965
966 c0 = t[(i - 1) as usize] as FastSint;
967 f0 = usize::from(c0 > (c1 - f1 as FastSint));
968 sa[m as usize] = i as SaSint;
969 m -= (f0 & !f1) as FastSint;
970 buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
971
972 c1 = t[(i - 2) as usize] as FastSint;
973 f1 = usize::from(c1 > (c0 - f0 as FastSint));
974 sa[m as usize] = (i - 1) as SaSint;
975 m -= (f1 & !f0) as FastSint;
976 buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
977
978 c0 = t[(i - 3) as usize] as FastSint;
979 f0 = usize::from(c0 > (c1 - f1 as FastSint));
980 sa[m as usize] = (i - 2) as SaSint;
981 m -= (f0 & !f1) as FastSint;
982 buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
983
984 i -= 4;
985 }
986
987 let tail_limit = omp_block_start;
988 while i >= tail_limit {
989 c1 = c0;
990 c0 = t[i as usize] as FastSint;
991 f1 = f0;
992 f0 = usize::from(c0 > (c1 - f1 as FastSint));
993 sa[m as usize] = (i + 1) as SaSint;
994 m -= (f0 & !f1) as FastSint;
995 buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
996 i -= 1;
997 }
998
999 c1 = if i >= 0 {
1000 t[i as usize] as FastSint
1001 } else {
1002 -1
1003 };
1004 f1 = usize::from(c1 > (c0 - f0 as FastSint));
1005 sa[m as usize] = (i + 1) as SaSint;
1006 m -= (f1 & !f0) as FastSint;
1007 buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
1008 }
1009
1010 (omp_block_start + omp_block_size - 1 - m) as SaSint
1011}
1012
1013#[doc(hidden)]
1015pub fn count_and_gather_compacted_lms_suffixes_32s_2k(
1016 t: &[SaSint],
1017 sa: &mut [SaSint],
1018 n: SaSint,
1019 k: SaSint,
1020 buckets: &mut [SaSint],
1021 omp_block_start: FastSint,
1022 omp_block_size: FastSint,
1023) -> SaSint {
1024 buckets.fill(0);
1025 let n_usize = usize::try_from(n).expect("n must be non-negative");
1026 let _k_usize = usize::try_from(k).expect("k must be non-negative");
1027 let block_start =
1028 usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
1029 let block_size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
1030 let mut m = block_start + block_size - 1;
1031
1032 if omp_block_size > 0 {
1033 let mut j = m + 1;
1034 let mut c0 = t[m] as FastSint;
1035 let mut c1 = -1;
1036
1037 while j < n_usize {
1038 c1 = t[j] as FastSint;
1039 if c1 != c0 {
1040 break;
1041 }
1042 j += 1;
1043 }
1044
1045 let mut f0 = usize::from(c0 >= c1);
1046 let mut f1: usize;
1047 let mut i = m as FastSint - 1;
1048 let limit = block_start as FastSint + 3;
1049
1050 while i >= limit {
1051 c1 = t[i as usize] as FastSint;
1052 f1 = usize::from(c1 > (c0 - f0 as FastSint));
1053 sa[m] = (i + 1) as SaSint;
1054 m -= f1 & !f0 & usize::from(c0 >= 0);
1055 buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
1056
1057 c0 = t[(i - 1) as usize] as FastSint;
1058 f0 = usize::from(c0 > (c1 - f1 as FastSint));
1059 sa[m] = i as SaSint;
1060 m -= f0 & !f1 & usize::from(c1 >= 0);
1061 buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
1062
1063 c1 = t[(i - 2) as usize] as FastSint;
1064 f1 = usize::from(c1 > (c0 - f0 as FastSint));
1065 sa[m] = (i - 1) as SaSint;
1066 m -= f1 & !f0 & usize::from(c0 >= 0);
1067 buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
1068
1069 c0 = t[(i - 3) as usize] as FastSint;
1070 f0 = usize::from(c0 > (c1 - f1 as FastSint));
1071 sa[m] = (i - 2) as SaSint;
1072 m -= f0 & !f1 & usize::from(c1 >= 0);
1073 buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
1074
1075 i -= 4;
1076 }
1077
1078 let tail_limit = block_start as FastSint;
1079 while i >= tail_limit {
1080 c1 = c0;
1081 c0 = t[i as usize] as FastSint;
1082 f1 = f0;
1083 f0 = usize::from(c0 > (c1 - f1 as FastSint));
1084 sa[m] = (i + 1) as SaSint;
1085 m -= f0 & !f1 & usize::from(c1 >= 0);
1086 buckets[buckets_index2((c1 as SaSint & SAINT_MAX) as usize, f0 & !f1)] += 1;
1087 i -= 1;
1088 }
1089
1090 c1 = if i >= 0 {
1091 t[i as usize] as FastSint
1092 } else {
1093 -1
1094 };
1095 f1 = usize::from(c1 > (c0 - f0 as FastSint));
1096 sa[m] = (i + 1) as SaSint;
1097 m -= f1 & !f0 & usize::from(c0 >= 0);
1098 buckets[buckets_index2((c0 as SaSint & SAINT_MAX) as usize, f1 & !f0)] += 1;
1099 }
1100
1101 (block_start + block_size - 1 - m) as SaSint
1102}
1103
1104#[doc(hidden)]
1106pub fn get_bucket_stride(
1107 free_space: FastSint,
1108 bucket_size: FastSint,
1109 num_buckets: FastSint,
1110) -> FastSint {
1111 let bucket_size_1024 = (bucket_size + 1023) & (-1024);
1112 if free_space / (num_buckets - 1) >= bucket_size_1024 {
1113 return bucket_size_1024;
1114 }
1115 let bucket_size_16 = (bucket_size + 15) & (-16);
1116 if free_space / (num_buckets - 1) >= bucket_size_16 {
1117 return bucket_size_16;
1118 }
1119 bucket_size
1120}
1121
1122#[doc(hidden)]
1124pub fn count_and_gather_lms_suffixes_32s_4k_nofs_omp(
1125 t: &[SaSint],
1126 sa: &mut [SaSint],
1127 n: SaSint,
1128 k: SaSint,
1129 buckets: &mut [SaSint],
1130 threads: SaSint,
1131) -> SaSint {
1132 let m;
1133 let omp_num_threads = if threads > 1 && n >= 65_536 { 2 } else { 1 };
1134
1135 if omp_num_threads == 1 {
1136 m = count_and_gather_lms_suffixes_32s_4k(t, sa, n, k, buckets, 0, n as FastSint);
1137 } else {
1138 count_lms_suffixes_32s_4k(t, n, k, buckets);
1139 m = gather_lms_suffixes_32s(t, sa, n);
1140 }
1141
1142 m
1143}
1144
1145#[doc(hidden)]
1147pub fn count_and_gather_lms_suffixes_32s_2k_nofs_omp(
1148 t: &[SaSint],
1149 sa: &mut [SaSint],
1150 n: SaSint,
1151 k: SaSint,
1152 buckets: &mut [SaSint],
1153 threads: SaSint,
1154) -> SaSint {
1155 let m;
1156 let omp_num_threads = if threads > 1 && n >= 65_536 { 2 } else { 1 };
1157
1158 if omp_num_threads == 1 {
1159 m = count_and_gather_lms_suffixes_32s_2k(t, sa, n, k, buckets, 0, n as FastSint);
1160 } else {
1161 count_lms_suffixes_32s_2k(t, n, k, buckets);
1162 m = gather_lms_suffixes_32s(t, sa, n);
1163 }
1164
1165 m
1166}
1167
1168#[doc(hidden)]
1170pub fn count_and_gather_compacted_lms_suffixes_32s_2k_nofs_omp(
1171 t: &[SaSint],
1172 sa: &mut [SaSint],
1173 n: SaSint,
1174 k: SaSint,
1175 buckets: &mut [SaSint],
1176 threads: SaSint,
1177) -> SaSint {
1178 let m;
1179 let omp_num_threads = if threads > 1 && n >= 65_536 { 2 } else { 1 };
1180
1181 if omp_num_threads == 1 {
1182 m = count_and_gather_compacted_lms_suffixes_32s_2k(t, sa, n, k, buckets, 0, n as FastSint);
1183 } else {
1184 count_compacted_lms_suffixes_32s_2k(t, n, k, buckets);
1185 m = gather_compacted_lms_suffixes_32s(t, sa, n);
1186 }
1187
1188 m
1189}
1190
1191#[doc(hidden)]
1193pub fn count_and_gather_lms_suffixes_32s_4k_fs_omp(
1194 t: &[SaSint],
1195 sa: &mut [SaSint],
1196 n: SaSint,
1197 k: SaSint,
1198 buckets: &mut [SaSint],
1199 local_buckets: SaSint,
1200 threads: SaSint,
1201 thread_state: &mut [ThreadState],
1202) -> SaSint {
1203 let n_usize = usize::try_from(n).expect("n must be non-negative");
1204 let k_usize = usize::try_from(k).expect("k must be non-negative");
1205 let omp_num_threads = usize::try_from(threads).expect("threads must be non-negative");
1206 let bucket_size = FastSint::try_from(4 * k_usize).expect("bucket size must fit FastSint");
1207
1208 if omp_num_threads <= 1 || n < 65_536 {
1209 return count_and_gather_lms_suffixes_32s_4k(t, sa, n, k, buckets, 0, n as FastSint);
1210 }
1211
1212 let omp_block_stride = (n_usize / omp_num_threads) & !15usize;
1213 let free_space = if local_buckets == 1 {
1214 FastSint::try_from(LIBSAIS_LOCAL_BUFFER_SIZE).expect("free space must fit FastSint")
1215 } else if local_buckets > 1 {
1216 FastSint::try_from(local_buckets).expect("free space must fit FastSint")
1217 } else {
1218 FastSint::try_from(buckets.len()).expect("free space must fit FastSint")
1219 };
1220 let bucket_stride = get_bucket_stride(
1221 free_space,
1222 bucket_size,
1223 FastSint::try_from(omp_num_threads).expect("thread count must fit FastSint"),
1224 );
1225 let bucket_size_usize = usize::try_from(bucket_size).expect("bucket size must be non-negative");
1226 let bucket_stride_usize =
1227 usize::try_from(bucket_stride).expect("bucket stride must be non-negative");
1228 let workspace_len =
1229 bucket_size_usize + bucket_stride_usize.saturating_mul(omp_num_threads.saturating_sub(1));
1230 let mut workspace = vec![0; workspace_len];
1231
1232 for omp_thread_num in 0..omp_num_threads {
1233 let omp_block_start = omp_thread_num * omp_block_stride;
1234 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
1235 omp_block_stride
1236 } else {
1237 n_usize - omp_block_start
1238 };
1239 let workspace_end = workspace_len - omp_thread_num * bucket_stride_usize;
1240 let workspace_start = workspace_end - bucket_size_usize;
1241 let count = count_and_gather_lms_suffixes_32s_4k(
1242 t,
1243 sa,
1244 n,
1245 k,
1246 &mut workspace[workspace_start..workspace_end],
1247 omp_block_start as FastSint,
1248 omp_block_size as FastSint,
1249 );
1250
1251 thread_state[omp_thread_num].position = (omp_block_start + omp_block_size) as FastSint;
1252 thread_state[omp_thread_num].count = count as FastSint;
1253 }
1254
1255 let mut m = 0;
1256 for t in (0..omp_num_threads).rev() {
1257 m += thread_state[t].count as SaSint;
1258
1259 if t + 1 != omp_num_threads && thread_state[t].count > 0 {
1260 let src_end =
1261 usize::try_from(thread_state[t].position).expect("position must be non-negative");
1262 let src_start = src_end
1263 - usize::try_from(thread_state[t].count).expect("count must be non-negative");
1264 let dst_start = usize::try_from(n - m).expect("destination must be non-negative");
1265 sa.copy_within(src_start..src_end, dst_start);
1266 }
1267 }
1268
1269 let omp_num_threads = omp_num_threads - 1;
1270 let omp_block_stride = (bucket_size_usize / omp_num_threads) & !15usize;
1271 for omp_thread_num in 0..omp_num_threads {
1272 let omp_block_start = omp_thread_num * omp_block_stride;
1273 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
1274 omp_block_stride
1275 } else {
1276 bucket_size_usize - omp_block_start
1277 };
1278 accumulate_counts_s32(
1279 &mut workspace[omp_block_start..],
1280 omp_block_size as FastSint,
1281 bucket_stride,
1282 FastSint::try_from(omp_num_threads + 1).expect("thread count must fit FastSint"),
1283 );
1284 }
1285
1286 let accumulated_start = omp_num_threads * bucket_stride_usize;
1287 buckets[..bucket_size_usize]
1288 .copy_from_slice(&workspace[accumulated_start..accumulated_start + bucket_size_usize]);
1289 m
1290}
1291
1292#[doc(hidden)]
1294pub fn count_and_gather_lms_suffixes_32s_2k_fs_omp(
1295 t: &[SaSint],
1296 sa: &mut [SaSint],
1297 n: SaSint,
1298 k: SaSint,
1299 buckets: &mut [SaSint],
1300 local_buckets: SaSint,
1301 threads: SaSint,
1302 thread_state: &mut [ThreadState],
1303) -> SaSint {
1304 let n_usize = usize::try_from(n).expect("n must be non-negative");
1305 let k_usize = usize::try_from(k).expect("k must be non-negative");
1306 let omp_num_threads = usize::try_from(threads).expect("threads must be non-negative");
1307 let bucket_size = FastSint::try_from(2 * k_usize).expect("bucket size must fit FastSint");
1308
1309 if omp_num_threads <= 1 || n < 65_536 {
1310 return count_and_gather_lms_suffixes_32s_2k(t, sa, n, k, buckets, 0, n as FastSint);
1311 }
1312
1313 let omp_block_stride = (n_usize / omp_num_threads) & !15usize;
1314 let free_space = if local_buckets == 1 {
1315 FastSint::try_from(LIBSAIS_LOCAL_BUFFER_SIZE).expect("free space must fit FastSint")
1316 } else if local_buckets > 1 {
1317 FastSint::try_from(local_buckets).expect("free space must fit FastSint")
1318 } else {
1319 FastSint::try_from(buckets.len()).expect("free space must fit FastSint")
1320 };
1321 let bucket_stride = get_bucket_stride(
1322 free_space,
1323 bucket_size,
1324 FastSint::try_from(omp_num_threads).expect("thread count must fit FastSint"),
1325 );
1326 let bucket_size_usize = usize::try_from(bucket_size).expect("bucket size must be non-negative");
1327 let bucket_stride_usize =
1328 usize::try_from(bucket_stride).expect("bucket stride must be non-negative");
1329 let workspace_len =
1330 bucket_size_usize + bucket_stride_usize.saturating_mul(omp_num_threads.saturating_sub(1));
1331 let mut workspace = vec![0; workspace_len];
1332
1333 for omp_thread_num in 0..omp_num_threads {
1334 let omp_block_start = omp_thread_num * omp_block_stride;
1335 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
1336 omp_block_stride
1337 } else {
1338 n_usize - omp_block_start
1339 };
1340 let workspace_end = workspace_len - omp_thread_num * bucket_stride_usize;
1341 let workspace_start = workspace_end - bucket_size_usize;
1342 let count = count_and_gather_lms_suffixes_32s_2k(
1343 t,
1344 sa,
1345 n,
1346 k,
1347 &mut workspace[workspace_start..workspace_end],
1348 omp_block_start as FastSint,
1349 omp_block_size as FastSint,
1350 );
1351
1352 thread_state[omp_thread_num].position = (omp_block_start + omp_block_size) as FastSint;
1353 thread_state[omp_thread_num].count = count as FastSint;
1354 }
1355
1356 let mut m = 0;
1357 for t in (0..omp_num_threads).rev() {
1358 m += thread_state[t].count as SaSint;
1359 if t + 1 != omp_num_threads && thread_state[t].count > 0 {
1360 let src_end =
1361 usize::try_from(thread_state[t].position).expect("position must be non-negative");
1362 let src_start = src_end
1363 - usize::try_from(thread_state[t].count).expect("count must be non-negative");
1364 let dst_start = usize::try_from(n - m).expect("destination must be non-negative");
1365 sa.copy_within(src_start..src_end, dst_start);
1366 }
1367 }
1368
1369 let omp_num_threads = omp_num_threads - 1;
1370 let omp_block_stride = (bucket_size_usize / omp_num_threads) & !15usize;
1371 for omp_thread_num in 0..omp_num_threads {
1372 let omp_block_start = omp_thread_num * omp_block_stride;
1373 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
1374 omp_block_stride
1375 } else {
1376 bucket_size_usize - omp_block_start
1377 };
1378 accumulate_counts_s32(
1379 &mut workspace[omp_block_start..],
1380 omp_block_size as FastSint,
1381 bucket_stride,
1382 FastSint::try_from(omp_num_threads + 1).expect("thread count must fit FastSint"),
1383 );
1384 }
1385
1386 let accumulated_start = omp_num_threads * bucket_stride_usize;
1387 buckets[..bucket_size_usize]
1388 .copy_from_slice(&workspace[accumulated_start..accumulated_start + bucket_size_usize]);
1389 m
1390}
1391
1392#[doc(hidden)]
1394pub fn count_and_gather_compacted_lms_suffixes_32s_2k_fs_omp(
1395 t: &[SaSint],
1396 sa: &mut [SaSint],
1397 n: SaSint,
1398 k: SaSint,
1399 buckets: &mut [SaSint],
1400 _local_buckets: SaSint,
1401 threads: SaSint,
1402 thread_state: &mut [ThreadState],
1403) {
1404 let n_usize = usize::try_from(n).expect("n must be non-negative");
1405 let k_usize = usize::try_from(k).expect("k must be non-negative");
1406 let thread_count = usize::try_from(threads).expect("threads must be non-negative");
1407 let bucket_size = 2 * k_usize;
1408
1409 if thread_count <= 1 || n < 65_536 {
1410 let _ =
1411 count_and_gather_compacted_lms_suffixes_32s_2k(t, sa, n, k, buckets, 0, n as FastSint);
1412 return;
1413 }
1414
1415 if thread_state.len() < thread_count || sa.len() < 2 * n_usize {
1416 let _ =
1417 count_and_gather_compacted_lms_suffixes_32s_2k_nofs_omp(t, sa, n, k, buckets, threads);
1418 return;
1419 }
1420
1421 let omp_block_stride = (n_usize / thread_count) & !15usize;
1422 let free_space = if _local_buckets != 0 {
1423 FastSint::try_from(LIBSAIS_LOCAL_BUFFER_SIZE).expect("free space must fit FastSint")
1424 } else {
1425 FastSint::try_from(buckets.len()).expect("free space must fit FastSint")
1426 };
1427 let bucket_stride = get_bucket_stride(
1428 free_space,
1429 FastSint::try_from(bucket_size).expect("bucket size must fit FastSint"),
1430 FastSint::try_from(thread_count).expect("thread count must fit FastSint"),
1431 );
1432 let bucket_stride_usize =
1433 usize::try_from(bucket_stride).expect("bucket stride must be non-negative");
1434 let workspace_len =
1435 bucket_size + bucket_stride_usize.saturating_mul(thread_count.saturating_sub(1));
1436 let mut workspace = vec![0; workspace_len];
1437
1438 for omp_thread_num in 0..thread_count {
1439 let omp_block_start = omp_thread_num * omp_block_stride;
1440 let omp_block_size = if omp_thread_num + 1 < thread_count {
1441 omp_block_stride
1442 } else {
1443 n_usize - omp_block_start
1444 };
1445
1446 let workspace_end = workspace_len - omp_thread_num * bucket_stride_usize;
1447 let workspace_start = workspace_end - bucket_size;
1448 let count = count_and_gather_compacted_lms_suffixes_32s_2k(
1449 t,
1450 &mut sa[n_usize..],
1451 n,
1452 k,
1453 &mut workspace[workspace_start..workspace_end],
1454 omp_block_start as FastSint,
1455 omp_block_size as FastSint,
1456 );
1457
1458 if omp_thread_num < thread_state.len() {
1459 thread_state[omp_thread_num].position = (omp_block_start + omp_block_size) as FastSint;
1460 thread_state[omp_thread_num].count = count as FastSint;
1461 }
1462 }
1463
1464 let mut m = 0usize;
1465 for omp_thread_num in (0..thread_count).rev() {
1466 let count = usize::try_from(thread_state[omp_thread_num].count)
1467 .expect("count must be non-negative");
1468 m += count;
1469 if count > 0 {
1470 let position = usize::try_from(thread_state[omp_thread_num].position)
1471 .expect("position must be non-negative");
1472 let src_start = n_usize + position - count;
1473 let src_end = n_usize + position;
1474 let dst_start = n_usize - m;
1475 sa.copy_within(src_start..src_end, dst_start);
1476 }
1477 }
1478
1479 let accumulation_threads = thread_count;
1480 let omp_block_stride = (bucket_size / accumulation_threads) & !15usize;
1481 for omp_thread_num in 0..accumulation_threads {
1482 let omp_block_start = omp_thread_num * omp_block_stride;
1483 let omp_block_size = if omp_thread_num + 1 < accumulation_threads {
1484 omp_block_stride
1485 } else {
1486 bucket_size - omp_block_start
1487 };
1488 accumulate_counts_s32(
1489 &mut workspace[omp_block_start..],
1490 omp_block_size as FastSint,
1491 bucket_stride,
1492 FastSint::try_from(thread_count).expect("thread count must fit FastSint"),
1493 );
1494 }
1495 let accumulated_start = (accumulation_threads - 1) * bucket_stride_usize;
1496 buckets[..bucket_size]
1497 .copy_from_slice(&workspace[accumulated_start..accumulated_start + bucket_size]);
1498}
1499
1500#[doc(hidden)]
1502pub fn count_and_gather_lms_suffixes_32s_4k_omp(
1503 t: &[SaSint],
1504 sa: &mut [SaSint],
1505 n: SaSint,
1506 k: SaSint,
1507 buckets: &mut [SaSint],
1508 local_buckets: SaSint,
1509 threads: SaSint,
1510 thread_state: &mut [ThreadState],
1511) -> SaSint {
1512 let free_space = if local_buckets > 1 {
1513 local_buckets as FastSint
1514 } else if local_buckets != 0 {
1515 LIBSAIS_LOCAL_BUFFER_SIZE as FastSint
1516 } else {
1517 FastSint::try_from(buckets.len()).expect("bucket length must fit FastSint")
1518 };
1519 let threads_fast = threads as FastSint;
1520 let mut max_threads = (free_space / (((4 * k as FastSint) + 15) & -16)).min(threads_fast);
1521
1522 if max_threads > 1 && n >= 65_536 && n / k >= 2 {
1523 let thread_cap = (n / (16 * k)) as FastSint;
1524 if max_threads > thread_cap {
1525 max_threads = thread_cap;
1526 }
1527 return count_and_gather_lms_suffixes_32s_4k_fs_omp(
1528 t,
1529 sa,
1530 n,
1531 k,
1532 buckets,
1533 local_buckets,
1534 max_threads.max(2) as SaSint,
1535 thread_state,
1536 );
1537 }
1538
1539 if threads > 1 && n >= 65_536 {
1540 count_lms_suffixes_32s_4k(t, n, k, buckets);
1541 gather_lms_suffixes_32s(t, sa, n)
1542 } else {
1543 count_and_gather_lms_suffixes_32s_4k(t, sa, n, k, buckets, 0, n as FastSint)
1544 }
1545}
1546
1547#[doc(hidden)]
1549pub fn count_and_gather_lms_suffixes_32s_2k_omp(
1550 t: &[SaSint],
1551 sa: &mut [SaSint],
1552 n: SaSint,
1553 k: SaSint,
1554 buckets: &mut [SaSint],
1555 local_buckets: SaSint,
1556 threads: SaSint,
1557 thread_state: &mut [ThreadState],
1558) -> SaSint {
1559 let free_space = if local_buckets > 1 {
1560 local_buckets as FastSint
1561 } else if local_buckets != 0 {
1562 LIBSAIS_LOCAL_BUFFER_SIZE as FastSint
1563 } else {
1564 FastSint::try_from(buckets.len()).expect("bucket length must fit FastSint")
1565 };
1566 let threads_fast = threads as FastSint;
1567 let mut max_threads = (free_space / (((2 * k as FastSint) + 15) & -16)).min(threads_fast);
1568
1569 if max_threads > 1 && n >= 65_536 && n / k >= 2 {
1570 let thread_cap = (n / (8 * k)) as FastSint;
1571 if max_threads > thread_cap {
1572 max_threads = thread_cap;
1573 }
1574 return count_and_gather_lms_suffixes_32s_2k_fs_omp(
1575 t,
1576 sa,
1577 n,
1578 k,
1579 buckets,
1580 local_buckets,
1581 max_threads.max(2) as SaSint,
1582 thread_state,
1583 );
1584 }
1585
1586 if threads > 1 && n >= 65_536 {
1587 count_lms_suffixes_32s_2k(t, n, k, buckets);
1588 gather_lms_suffixes_32s(t, sa, n)
1589 } else {
1590 count_and_gather_lms_suffixes_32s_2k(t, sa, n, k, buckets, 0, n as FastSint)
1591 }
1592}
1593
1594#[doc(hidden)]
1596pub fn count_and_gather_compacted_lms_suffixes_32s_2k_omp(
1597 t: &[SaSint],
1598 sa: &mut [SaSint],
1599 n: SaSint,
1600 k: SaSint,
1601 buckets: &mut [SaSint],
1602 local_buckets: SaSint,
1603 threads: SaSint,
1604 thread_state: &mut [ThreadState],
1605) {
1606 let free_space = if local_buckets != 0 {
1607 LIBSAIS_LOCAL_BUFFER_SIZE as FastSint
1608 } else {
1609 FastSint::try_from(buckets.len()).expect("bucket length must fit FastSint")
1610 };
1611 let threads_fast = threads as FastSint;
1612 let mut max_threads = (free_space / (((2 * k as FastSint) + 15) & -16)).min(threads_fast);
1613
1614 if local_buckets == 0 && max_threads > 1 && n >= 65_536 && n / k >= 2 {
1615 let thread_cap = (n / (8 * k)) as FastSint;
1616 if max_threads > thread_cap {
1617 max_threads = thread_cap;
1618 }
1619 count_and_gather_compacted_lms_suffixes_32s_2k_fs_omp(
1620 t,
1621 sa,
1622 n,
1623 k,
1624 buckets,
1625 local_buckets,
1626 max_threads.max(2) as SaSint,
1627 thread_state,
1628 );
1629 return;
1630 }
1631
1632 let _ = count_and_gather_compacted_lms_suffixes_32s_2k_nofs_omp(t, sa, n, k, buckets, threads);
1633}
1634
1635#[doc(hidden)]
1637pub fn count_suffixes_32s(t: &[SaSint], n: SaSint, k: SaSint, buckets: &mut [SaSint]) {
1638 let n_usize = usize::try_from(n).expect("n must be non-negative");
1639 let k_usize = usize::try_from(k).expect("k must be non-negative");
1640 buckets[..k_usize].fill(0);
1641
1642 let mut i = 0usize;
1643 let mut j = n_usize.saturating_sub(7);
1644 while i < j {
1645 buckets[t[i] as usize] += 1;
1646 buckets[t[i + 1] as usize] += 1;
1647 buckets[t[i + 2] as usize] += 1;
1648 buckets[t[i + 3] as usize] += 1;
1649 buckets[t[i + 4] as usize] += 1;
1650 buckets[t[i + 5] as usize] += 1;
1651 buckets[t[i + 6] as usize] += 1;
1652 buckets[t[i + 7] as usize] += 1;
1653 i += 8;
1654 }
1655
1656 j += 7;
1657 while i < j {
1658 buckets[t[i] as usize] += 1;
1659 i += 1;
1660 }
1661}
1662
1663#[doc(hidden)]
1665pub fn initialize_buckets_start_and_end_8u(
1666 buckets: &mut [SaSint],
1667 freq: Option<&mut [SaSint]>,
1668) -> SaSint {
1669 let start_offset = 6 * ALPHABET_SIZE;
1670 let end_offset = 7 * ALPHABET_SIZE;
1671 let mut k = -1isize;
1672 let mut sum = 0;
1673
1674 match freq {
1675 Some(freq) => {
1676 for j in 0..ALPHABET_SIZE {
1677 let i = buckets_index4(j, 0);
1678 let total = buckets[i] + buckets[i + 1] + buckets[i + 2] + buckets[i + 3];
1679 buckets[start_offset + j] = sum;
1680 sum += total;
1681 buckets[end_offset + j] = sum;
1682 if total > 0 {
1683 k = j as isize;
1684 }
1685 freq[j] = total;
1686 }
1687 }
1688 None => {
1689 for j in 0..ALPHABET_SIZE {
1690 let i = buckets_index4(j, 0);
1691 let total = buckets[i] + buckets[i + 1] + buckets[i + 2] + buckets[i + 3];
1692 buckets[start_offset + j] = sum;
1693 sum += total;
1694 buckets[end_offset + j] = sum;
1695 if total > 0 {
1696 k = j as isize;
1697 }
1698 }
1699 }
1700 }
1701
1702 (k + 1) as SaSint
1703}
1704
1705#[doc(hidden)]
1707pub fn initialize_buckets_start_and_end_32s_6k(k: SaSint, buckets: &mut [SaSint]) {
1708 let k_usize = usize::try_from(k).expect("k must be non-negative");
1709 let start_offset = 4 * k_usize;
1710 let end_offset = 5 * k_usize;
1711 let mut sum = 0;
1712 for j in 0..k_usize {
1713 let i = buckets_index4(j, 0);
1714 buckets[start_offset + j] = sum;
1715 sum += buckets[i] + buckets[i + 1] + buckets[i + 2] + buckets[i + 3];
1716 buckets[end_offset + j] = sum;
1717 }
1718}
1719
1720#[doc(hidden)]
1722pub fn initialize_buckets_start_and_end_32s_4k(k: SaSint, buckets: &mut [SaSint]) {
1723 let k_usize = usize::try_from(k).expect("k must be non-negative");
1724 let start_offset = 2 * k_usize;
1725 let end_offset = 3 * k_usize;
1726 let mut sum = 0;
1727 for j in 0..k_usize {
1728 let i = buckets_index2(j, 0);
1729 buckets[start_offset + j] = sum;
1730 sum += buckets[i] + buckets[i + 1];
1731 buckets[end_offset + j] = sum;
1732 }
1733}
1734
1735#[doc(hidden)]
1737pub fn initialize_buckets_end_32s_2k(k: SaSint, buckets: &mut [SaSint]) {
1738 let k_usize = usize::try_from(k).expect("k must be non-negative");
1739 let mut sum0 = 0;
1740 for j in 0..k_usize {
1741 let i = buckets_index2(j, 0);
1742 sum0 += buckets[i] + buckets[i + 1];
1743 buckets[i] = sum0;
1744 }
1745}
1746
1747#[doc(hidden)]
1749pub fn initialize_buckets_start_and_end_32s_2k(k: SaSint, buckets: &mut [SaSint]) {
1750 let k_usize = usize::try_from(k).expect("k must be non-negative");
1751 for j in 0..k_usize {
1752 let i = buckets_index2(j, 0);
1753 buckets[j] = buckets[i];
1754 }
1755 buckets[k_usize] = 0;
1756 for j in 1..k_usize {
1757 buckets[k_usize + j] = buckets[j - 1];
1758 }
1759}
1760
1761#[doc(hidden)]
1763pub fn initialize_buckets_start_32s_1k(k: SaSint, buckets: &mut [SaSint]) {
1764 let k_usize = usize::try_from(k).expect("k must be non-negative");
1765 let mut sum = 0;
1766 for bucket in buckets.iter_mut().take(k_usize) {
1767 let tmp = *bucket;
1768 *bucket = sum;
1769 sum += tmp;
1770 }
1771}
1772
1773#[doc(hidden)]
1775pub fn initialize_buckets_end_32s_1k(k: SaSint, buckets: &mut [SaSint]) {
1776 let k_usize = usize::try_from(k).expect("k must be non-negative");
1777 let mut sum = 0;
1778 for bucket in buckets.iter_mut().take(k_usize) {
1779 sum += *bucket;
1780 *bucket = sum;
1781 }
1782}
1783
1784#[doc(hidden)]
1786pub fn initialize_buckets_for_lms_suffixes_radix_sort_8u(
1787 t: &[u8],
1788 buckets: &mut [SaSint],
1789 mut first_lms_suffix: SaSint,
1790) -> SaSint {
1791 let mut f0 = 0usize;
1792 let mut f1: usize;
1793 let mut c0 = t[first_lms_suffix as usize] as FastSint;
1794 let mut c1: FastSint;
1795
1796 while {
1797 first_lms_suffix -= 1;
1798 first_lms_suffix >= 0
1799 } {
1800 c1 = c0;
1801 c0 = t[first_lms_suffix as usize] as FastSint;
1802 f1 = f0;
1803 f0 = usize::from(c0 > (c1 - f1 as FastSint));
1804 let idx = 4 * c1 as usize + (f1 + f1 + f0);
1805 buckets[idx] -= 1;
1806 }
1807 buckets[4 * c0 as usize + (f0 + f0)] -= 1;
1808
1809 let temp_offset = 4 * ALPHABET_SIZE;
1810 let mut sum = 0;
1811 for j in 0..ALPHABET_SIZE {
1812 let i = 4 * j;
1813 let tj = 2 * j;
1814 buckets[temp_offset + tj + 1] = sum;
1815 sum += buckets[i + 1] + buckets[i + 3];
1816 buckets[temp_offset + tj] = sum;
1817 }
1818 sum
1819}
1820
1821#[doc(hidden)]
1823pub fn initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(
1824 t: &[SaSint],
1825 k: SaSint,
1826 buckets: &mut [SaSint],
1827 first_lms_suffix: SaSint,
1828) {
1829 let _k_usize = usize::try_from(k).expect("k must be non-negative");
1830 buckets[buckets_index2(t[first_lms_suffix as usize] as usize, 0)] += 1;
1831 buckets[buckets_index2(t[first_lms_suffix as usize] as usize, 1)] -= 1;
1832
1833 let mut sum0 = 0;
1834 let mut sum1 = 0;
1835 for j in 0..usize::try_from(k).unwrap() {
1836 let i = buckets_index2(j, 0);
1837 sum0 += buckets[i] + buckets[i + 1];
1838 sum1 += buckets[i + 1];
1839 buckets[i] = sum0;
1840 buckets[i + 1] = sum1;
1841 }
1842}
1843
1844#[doc(hidden)]
1846pub fn initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(
1847 t: &[SaSint],
1848 k: SaSint,
1849 buckets: &mut [SaSint],
1850 mut first_lms_suffix: SaSint,
1851) -> SaSint {
1852 let mut f0 = 0usize;
1853 let mut f1: usize;
1854 let mut c0 = t[first_lms_suffix as usize] as FastSint;
1855 let mut c1: FastSint;
1856
1857 while {
1858 first_lms_suffix -= 1;
1859 first_lms_suffix >= 0
1860 } {
1861 c1 = c0;
1862 c0 = t[first_lms_suffix as usize] as FastSint;
1863 f1 = f0;
1864 f0 = usize::from(c0 > (c1 - f1 as FastSint));
1865 buckets[4 * c1 as usize + (f1 + f1 + f0)] -= 1;
1866 }
1867 buckets[4 * c0 as usize + (f0 + f0)] -= 1;
1868
1869 let temp_offset = 4 * usize::try_from(k).unwrap();
1870 let mut sum = 0;
1871 for j in 0..usize::try_from(k).unwrap() {
1872 let i = 4 * j;
1873 sum += buckets[i + 1] + buckets[i + 3];
1874 buckets[temp_offset + j] = sum;
1875 }
1876 sum
1877}
1878
1879#[doc(hidden)]
1881pub fn initialize_buckets_for_radix_and_partial_sorting_32s_4k(
1882 t: &[SaSint],
1883 k: SaSint,
1884 buckets: &mut [SaSint],
1885 first_lms_suffix: SaSint,
1886) {
1887 let k_usize = usize::try_from(k).expect("k must be non-negative");
1888 let start_offset = 2 * k_usize;
1889 let end_offset = 3 * k_usize;
1890
1891 buckets[buckets_index2(t[first_lms_suffix as usize] as usize, 0)] += 1;
1892 buckets[buckets_index2(t[first_lms_suffix as usize] as usize, 1)] -= 1;
1893
1894 let mut sum0 = 0;
1895 let mut sum1 = 0;
1896 for j in 0..k_usize {
1897 let i = buckets_index2(j, 0);
1898 buckets[start_offset + j] = sum1;
1899 sum0 += buckets[i + 1];
1900 sum1 += buckets[i] + buckets[i + 1];
1901 buckets[i + 1] = sum0;
1902 buckets[end_offset + j] = sum1;
1903 }
1904}
1905
1906#[doc(hidden)]
1908pub fn radix_sort_lms_suffixes_8u(
1909 t: &[u8],
1910 sa: &mut [SaSint],
1911 induction_bucket: &mut [SaSint],
1912 omp_block_start: FastSint,
1913 omp_block_size: FastSint,
1914) {
1915 let prefetch_distance = 64 as FastSint;
1916 let mut i = omp_block_start + omp_block_size - 1;
1917 let mut j = omp_block_start + prefetch_distance + 3;
1918
1919 while i >= j {
1920 let p0 = sa[i as usize];
1921 let idx0 = buckets_index2(t[p0 as usize] as usize, 0);
1922 induction_bucket[idx0] -= 1;
1923 sa[induction_bucket[idx0] as usize] = p0;
1924
1925 let p1 = sa[(i - 1) as usize];
1926 let idx1 = buckets_index2(t[p1 as usize] as usize, 0);
1927 induction_bucket[idx1] -= 1;
1928 sa[induction_bucket[idx1] as usize] = p1;
1929
1930 let p2 = sa[(i - 2) as usize];
1931 let idx2 = buckets_index2(t[p2 as usize] as usize, 0);
1932 induction_bucket[idx2] -= 1;
1933 sa[induction_bucket[idx2] as usize] = p2;
1934
1935 let p3 = sa[(i - 3) as usize];
1936 let idx3 = buckets_index2(t[p3 as usize] as usize, 0);
1937 induction_bucket[idx3] -= 1;
1938 sa[induction_bucket[idx3] as usize] = p3;
1939
1940 i -= 4;
1941 }
1942
1943 j -= prefetch_distance + 3;
1944 while i >= j {
1945 let p = sa[i as usize];
1946 let idx = buckets_index2(t[p as usize] as usize, 0);
1947 induction_bucket[idx] -= 1;
1948 sa[induction_bucket[idx] as usize] = p;
1949 i -= 1;
1950 }
1951}
1952
1953#[doc(hidden)]
1955pub fn radix_sort_lms_suffixes_8u_omp(
1956 t: &[u8],
1957 sa: &mut [SaSint],
1958 n: SaSint,
1959 m: SaSint,
1960 flags: SaSint,
1961 buckets: &mut [SaSint],
1962 threads: SaSint,
1963 thread_state: &mut [ThreadState],
1964) {
1965 if (flags & LIBSAIS_FLAGS_GSA) != 0 {
1966 buckets[4 * ALPHABET_SIZE] -= 1;
1967 }
1968
1969 let omp_num_threads = if threads > 1 && n >= 65_536 && m >= 65_536 {
1970 usize::try_from(threads)
1971 .expect("threads must be non-negative")
1972 .min(thread_state.len())
1973 .max(1)
1974 } else {
1975 1
1976 };
1977
1978 if omp_num_threads == 1 {
1979 radix_sort_lms_suffixes_8u(
1980 t,
1981 sa,
1982 &mut buckets[4 * ALPHABET_SIZE..],
1983 n as FastSint - m as FastSint + 1,
1984 m as FastSint - 1,
1985 );
1986 return;
1987 }
1988
1989 let (_, src_bucket) = buckets.split_at_mut(4 * ALPHABET_SIZE);
1990
1991 for state in thread_state.iter_mut().take(omp_num_threads) {
1992 for (i, j) in (0..=buckets_index2(ALPHABET_SIZE - 1, 0))
1993 .step_by(buckets_index2(1, 0))
1994 .zip((buckets_index4(0, 1)..).step_by(buckets_index4(1, 0)))
1995 {
1996 state.buckets[i] = src_bucket[i] - state.buckets[j];
1997 }
1998 }
1999
2000 for thread_num in 0..omp_num_threads {
2001 let mut omp_block_start = 0;
2002 for state in thread_state
2003 .iter()
2004 .take(omp_num_threads)
2005 .skip(thread_num)
2006 .rev()
2007 {
2008 omp_block_start += state.m;
2009 }
2010
2011 let mut omp_block_size = thread_state[thread_num].m;
2012 if omp_block_start == m as FastSint && omp_block_size > 0 {
2013 omp_block_start -= 1;
2014 omp_block_size -= 1;
2015 }
2016
2017 radix_sort_lms_suffixes_8u(
2018 t,
2019 sa,
2020 &mut thread_state[thread_num].buckets,
2021 n as FastSint - omp_block_start,
2022 omp_block_size,
2023 );
2024 }
2025}
2026
2027#[doc(hidden)]
2029pub fn radix_sort_lms_suffixes_32s_6k(
2030 t: &[SaSint],
2031 sa: &mut [SaSint],
2032 induction_bucket: &mut [SaSint],
2033 omp_block_start: FastSint,
2034 omp_block_size: FastSint,
2035) {
2036 let prefetch_distance = 64 as FastSint;
2037 let mut i = omp_block_start + omp_block_size - 1;
2038 let mut j = omp_block_start + 2 * prefetch_distance + 3;
2039
2040 while i >= j {
2041 let p0 = sa[i as usize];
2042 let idx0 = t[p0 as usize] as usize;
2043 induction_bucket[idx0] -= 1;
2044 sa[induction_bucket[idx0] as usize] = p0;
2045
2046 let p1 = sa[(i - 1) as usize];
2047 let idx1 = t[p1 as usize] as usize;
2048 induction_bucket[idx1] -= 1;
2049 sa[induction_bucket[idx1] as usize] = p1;
2050
2051 let p2 = sa[(i - 2) as usize];
2052 let idx2 = t[p2 as usize] as usize;
2053 induction_bucket[idx2] -= 1;
2054 sa[induction_bucket[idx2] as usize] = p2;
2055
2056 let p3 = sa[(i - 3) as usize];
2057 let idx3 = t[p3 as usize] as usize;
2058 induction_bucket[idx3] -= 1;
2059 sa[induction_bucket[idx3] as usize] = p3;
2060
2061 i -= 4;
2062 }
2063
2064 j -= 2 * prefetch_distance + 3;
2065 while i >= j {
2066 let p = sa[i as usize];
2067 let idx = t[p as usize] as usize;
2068 induction_bucket[idx] -= 1;
2069 sa[induction_bucket[idx] as usize] = p;
2070 i -= 1;
2071 }
2072}
2073
2074#[doc(hidden)]
2076pub fn radix_sort_lms_suffixes_32s_2k(
2077 t: &[SaSint],
2078 sa: &mut [SaSint],
2079 induction_bucket: &mut [SaSint],
2080 omp_block_start: FastSint,
2081 omp_block_size: FastSint,
2082) {
2083 let prefetch_distance = 64 as FastSint;
2084 let mut i = omp_block_start + omp_block_size - 1;
2085 let mut j = omp_block_start + 2 * prefetch_distance + 3;
2086
2087 while i >= j {
2088 let p0 = sa[i as usize];
2089 let idx0 = buckets_index2(t[p0 as usize] as usize, 0);
2090 induction_bucket[idx0] -= 1;
2091 sa[induction_bucket[idx0] as usize] = p0;
2092
2093 let p1 = sa[(i - 1) as usize];
2094 let idx1 = buckets_index2(t[p1 as usize] as usize, 0);
2095 induction_bucket[idx1] -= 1;
2096 sa[induction_bucket[idx1] as usize] = p1;
2097
2098 let p2 = sa[(i - 2) as usize];
2099 let idx2 = buckets_index2(t[p2 as usize] as usize, 0);
2100 induction_bucket[idx2] -= 1;
2101 sa[induction_bucket[idx2] as usize] = p2;
2102
2103 let p3 = sa[(i - 3) as usize];
2104 let idx3 = buckets_index2(t[p3 as usize] as usize, 0);
2105 induction_bucket[idx3] -= 1;
2106 sa[induction_bucket[idx3] as usize] = p3;
2107
2108 i -= 4;
2109 }
2110
2111 j -= 2 * prefetch_distance + 3;
2112 while i >= j {
2113 let p = sa[i as usize];
2114 let idx = buckets_index2(t[p as usize] as usize, 0);
2115 induction_bucket[idx] -= 1;
2116 sa[induction_bucket[idx] as usize] = p;
2117 i -= 1;
2118 }
2119}
2120
2121#[doc(hidden)]
2123pub fn radix_sort_lms_suffixes_32s_block_gather(
2124 t: &[SaSint],
2125 sa: &[SaSint],
2126 cache: &mut [ThreadCache],
2127 omp_block_start: FastSint,
2128 omp_block_size: FastSint,
2129) {
2130 let start = usize::try_from(omp_block_start).expect("block start must be non-negative");
2131 let mut i = omp_block_start;
2132 let mut j = omp_block_start + omp_block_size - 64 - 3;
2133
2134 while i < j {
2135 for current in [i, i + 1, i + 2, i + 3] {
2136 let ci = current as usize - start;
2137 let index = sa[current as usize];
2138 cache[ci].index = index;
2139 cache[ci].symbol = t[index as usize];
2140 }
2141 i += 4;
2142 }
2143
2144 j += 64 + 3;
2145 while i < j {
2146 let ci = i as usize - start;
2147 let index = sa[i as usize];
2148 cache[ci].index = index;
2149 cache[ci].symbol = t[index as usize];
2150 i += 1;
2151 }
2152}
2153
2154#[doc(hidden)]
2156pub fn radix_sort_lms_suffixes_32s_6k_block_sort(
2157 induction_bucket: &mut [SaSint],
2158 cache: &mut [ThreadCache],
2159 omp_block_start: FastSint,
2160 omp_block_size: FastSint,
2161) {
2162 let start = usize::try_from(omp_block_start).expect("block start must be non-negative");
2163 let mut i = omp_block_start + omp_block_size - 1;
2164 let mut j = omp_block_start + 64 + 3;
2165
2166 while i >= j {
2167 for current in [i, i - 1, i - 2, i - 3] {
2168 let ci = current as usize - start;
2169 let v = cache[ci].symbol as usize;
2170 induction_bucket[v] -= 1;
2171 cache[ci].symbol = induction_bucket[v];
2172 }
2173 i -= 4;
2174 }
2175
2176 j -= 64 + 3;
2177 while i >= j {
2178 let ci = i as usize - start;
2179 let v = cache[ci].symbol as usize;
2180 induction_bucket[v] -= 1;
2181 cache[ci].symbol = induction_bucket[v];
2182 i -= 1;
2183 }
2184}
2185
2186#[doc(hidden)]
2188pub fn radix_sort_lms_suffixes_32s_2k_block_sort(
2189 induction_bucket: &mut [SaSint],
2190 cache: &mut [ThreadCache],
2191 omp_block_start: FastSint,
2192 omp_block_size: FastSint,
2193) {
2194 let start = usize::try_from(omp_block_start).expect("block start must be non-negative");
2195 let mut i = omp_block_start + omp_block_size - 1;
2196 let mut j = omp_block_start + 64 + 3;
2197
2198 while i >= j {
2199 for current in [i, i - 1, i - 2, i - 3] {
2200 let ci = current as usize - start;
2201 let v = buckets_index2(cache[ci].symbol as usize, 0);
2202 induction_bucket[v] -= 1;
2203 cache[ci].symbol = induction_bucket[v];
2204 }
2205 i -= 4;
2206 }
2207
2208 j -= 64 + 3;
2209 while i >= j {
2210 let ci = i as usize - start;
2211 let v = buckets_index2(cache[ci].symbol as usize, 0);
2212 induction_bucket[v] -= 1;
2213 cache[ci].symbol = induction_bucket[v];
2214 i -= 1;
2215 }
2216}
2217
2218#[doc(hidden)]
2220pub fn radix_sort_lms_suffixes_32s_6k_block_omp(
2221 t: &[SaSint],
2222 sa: &mut [SaSint],
2223 induction_bucket: &mut [SaSint],
2224 cache: &mut [ThreadCache],
2225 block_start: FastSint,
2226 block_size: FastSint,
2227 threads: SaSint,
2228) {
2229 if threads <= 1 || block_size < 16_384 {
2230 radix_sort_lms_suffixes_32s_6k(t, sa, induction_bucket, block_start, block_size);
2231 return;
2232 }
2233
2234 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
2235 let threads_usize = usize::try_from(threads)
2236 .expect("threads must be positive")
2237 .min(block_size_usize.max(1));
2238 let omp_block_stride = (block_size_usize / threads_usize) & !15usize;
2239
2240 for omp_thread_num in 0..threads_usize {
2241 let omp_block_start = omp_thread_num * omp_block_stride;
2242 let omp_block_size = if omp_thread_num + 1 < threads_usize {
2243 omp_block_stride
2244 } else {
2245 block_size_usize - omp_block_start
2246 };
2247 if omp_block_size > 0 {
2248 radix_sort_lms_suffixes_32s_block_gather(
2249 t,
2250 sa,
2251 &mut cache[omp_block_start..],
2252 block_start + omp_block_start as FastSint,
2253 omp_block_size as FastSint,
2254 );
2255 }
2256 }
2257
2258 radix_sort_lms_suffixes_32s_6k_block_sort(induction_bucket, cache, block_start, block_size);
2259
2260 for omp_thread_num in 0..threads_usize {
2261 let omp_block_start = omp_thread_num * omp_block_stride;
2262 let omp_block_size = if omp_thread_num + 1 < threads_usize {
2263 omp_block_stride
2264 } else {
2265 block_size_usize - omp_block_start
2266 };
2267 if omp_block_size > 0 {
2268 place_cached_suffixes(sa, &cache[omp_block_start..], 0, omp_block_size as FastSint);
2269 }
2270 }
2271}
2272
2273#[doc(hidden)]
2275pub fn radix_sort_lms_suffixes_32s_2k_block_omp(
2276 t: &[SaSint],
2277 sa: &mut [SaSint],
2278 induction_bucket: &mut [SaSint],
2279 cache: &mut [ThreadCache],
2280 block_start: FastSint,
2281 block_size: FastSint,
2282 threads: SaSint,
2283) {
2284 if threads <= 1 || block_size < 16_384 {
2285 radix_sort_lms_suffixes_32s_2k(t, sa, induction_bucket, block_start, block_size);
2286 return;
2287 }
2288
2289 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
2290 let threads_usize = usize::try_from(threads)
2291 .expect("threads must be positive")
2292 .min(block_size_usize.max(1));
2293 let omp_block_stride = (block_size_usize / threads_usize) & !15usize;
2294
2295 for omp_thread_num in 0..threads_usize {
2296 let omp_block_start = omp_thread_num * omp_block_stride;
2297 let omp_block_size = if omp_thread_num + 1 < threads_usize {
2298 omp_block_stride
2299 } else {
2300 block_size_usize - omp_block_start
2301 };
2302 if omp_block_size > 0 {
2303 radix_sort_lms_suffixes_32s_block_gather(
2304 t,
2305 sa,
2306 &mut cache[omp_block_start..],
2307 block_start + omp_block_start as FastSint,
2308 omp_block_size as FastSint,
2309 );
2310 }
2311 }
2312
2313 radix_sort_lms_suffixes_32s_2k_block_sort(induction_bucket, cache, block_start, block_size);
2314
2315 for omp_thread_num in 0..threads_usize {
2316 let omp_block_start = omp_thread_num * omp_block_stride;
2317 let omp_block_size = if omp_thread_num + 1 < threads_usize {
2318 omp_block_stride
2319 } else {
2320 block_size_usize - omp_block_start
2321 };
2322 if omp_block_size > 0 {
2323 place_cached_suffixes(sa, &cache[omp_block_start..], 0, omp_block_size as FastSint);
2324 }
2325 }
2326}
2327
2328#[doc(hidden)]
2330pub fn radix_sort_lms_suffixes_32s_6k_omp(
2331 t: &[SaSint],
2332 sa: &mut [SaSint],
2333 n: SaSint,
2334 m: SaSint,
2335 induction_bucket: &mut [SaSint],
2336 threads: SaSint,
2337 _thread_state: &mut [ThreadState],
2338) {
2339 if threads <= 1 || m < 65_536 {
2340 radix_sort_lms_suffixes_32s_6k(
2341 t,
2342 sa,
2343 induction_bucket,
2344 n as FastSint - m as FastSint + 1,
2345 m as FastSint - 1,
2346 );
2347 return;
2348 }
2349
2350 let threads_usize = usize::try_from(threads).expect("threads must be positive");
2351 let mut cache = vec![ThreadCache::default(); threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE];
2352 let mut block_start = 0usize;
2353 let m_usize = usize::try_from(m).expect("m must be non-negative");
2354 let n_usize = usize::try_from(n).expect("n must be non-negative");
2355 let last = m_usize - 1;
2356
2357 while block_start < last {
2358 let block_end = (block_start + threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE).min(last);
2359 radix_sort_lms_suffixes_32s_6k_block_omp(
2360 t,
2361 sa,
2362 induction_bucket,
2363 &mut cache,
2364 (n_usize - block_end) as FastSint,
2365 (block_end - block_start) as FastSint,
2366 threads,
2367 );
2368 block_start = block_end;
2369 }
2370}
2371
2372#[doc(hidden)]
2374pub fn radix_sort_lms_suffixes_32s_2k_omp(
2375 t: &[SaSint],
2376 sa: &mut [SaSint],
2377 n: SaSint,
2378 m: SaSint,
2379 induction_bucket: &mut [SaSint],
2380 threads: SaSint,
2381 _thread_state: &mut [ThreadState],
2382) {
2383 if threads <= 1 || m < 65_536 {
2384 radix_sort_lms_suffixes_32s_2k(
2385 t,
2386 sa,
2387 induction_bucket,
2388 n as FastSint - m as FastSint + 1,
2389 m as FastSint - 1,
2390 );
2391 return;
2392 }
2393
2394 let threads_usize = usize::try_from(threads).expect("threads must be positive");
2395 let mut cache = vec![ThreadCache::default(); threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE];
2396 let mut block_start = 0usize;
2397 let m_usize = usize::try_from(m).expect("m must be non-negative");
2398 let n_usize = usize::try_from(n).expect("n must be non-negative");
2399 let last = m_usize - 1;
2400
2401 while block_start < last {
2402 let block_end = (block_start + threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE).min(last);
2403 radix_sort_lms_suffixes_32s_2k_block_omp(
2404 t,
2405 sa,
2406 induction_bucket,
2407 &mut cache,
2408 (n_usize - block_end) as FastSint,
2409 (block_end - block_start) as FastSint,
2410 threads,
2411 );
2412 block_start = block_end;
2413 }
2414}
2415
2416#[doc(hidden)]
2418pub fn radix_sort_lms_suffixes_32s_1k(
2419 t: &[SaSint],
2420 sa: &mut [SaSint],
2421 n: SaSint,
2422 buckets: &mut [SaSint],
2423) -> SaSint {
2424 let n_usize = usize::try_from(n).expect("n must be non-negative");
2425 let mut i = n as FastSint - 2;
2426 let mut m = 0;
2427 let mut f0 = 1usize;
2428 let mut f1: usize;
2429 let mut c0 = t[n_usize - 1] as FastSint;
2430 let mut c1: FastSint;
2431 let mut c2 = 0 as FastSint;
2432
2433 while i >= 67 {
2434 c1 = t[i as usize] as FastSint;
2435 f1 = usize::from(c1 > (c0 - f0 as FastSint));
2436 if (f1 & !f0) != 0 {
2437 c2 = c0;
2438 buckets[c2 as usize] -= 1;
2439 sa[buckets[c2 as usize] as usize] = (i + 1) as SaSint;
2440 m += 1;
2441 }
2442
2443 c0 = t[(i - 1) as usize] as FastSint;
2444 f0 = usize::from(c0 > (c1 - f1 as FastSint));
2445 if (f0 & !f1) != 0 {
2446 c2 = c1;
2447 buckets[c2 as usize] -= 1;
2448 sa[buckets[c2 as usize] as usize] = i as SaSint;
2449 m += 1;
2450 }
2451
2452 c1 = t[(i - 2) as usize] as FastSint;
2453 f1 = usize::from(c1 > (c0 - f0 as FastSint));
2454 if (f1 & !f0) != 0 {
2455 c2 = c0;
2456 buckets[c2 as usize] -= 1;
2457 sa[buckets[c2 as usize] as usize] = (i - 1) as SaSint;
2458 m += 1;
2459 }
2460
2461 c0 = t[(i - 3) as usize] as FastSint;
2462 f0 = usize::from(c0 > (c1 - f1 as FastSint));
2463 if (f0 & !f1) != 0 {
2464 c2 = c1;
2465 buckets[c2 as usize] -= 1;
2466 sa[buckets[c2 as usize] as usize] = (i - 2) as SaSint;
2467 m += 1;
2468 }
2469
2470 i -= 4;
2471 }
2472
2473 while i >= 0 {
2474 c1 = c0;
2475 c0 = t[i as usize] as FastSint;
2476 f1 = f0;
2477 f0 = usize::from(c0 > (c1 - f1 as FastSint));
2478 if (f0 & !f1) != 0 {
2479 c2 = c1;
2480 buckets[c2 as usize] -= 1;
2481 sa[buckets[c2 as usize] as usize] = (i + 1) as SaSint;
2482 m += 1;
2483 }
2484 i -= 1;
2485 }
2486
2487 if m > 1 {
2488 sa[buckets[c2 as usize] as usize] = 0;
2489 }
2490
2491 m
2492}
2493
2494#[doc(hidden)]
2496pub fn radix_sort_set_markers_32s_6k(
2497 sa: &mut [SaSint],
2498 induction_bucket: &[SaSint],
2499 omp_block_start: FastSint,
2500 omp_block_size: FastSint,
2501) {
2502 let mut i = omp_block_start;
2503 let mut j = omp_block_start + omp_block_size - 67;
2504
2505 while i < j {
2506 sa[induction_bucket[i as usize] as usize] |= SAINT_MIN;
2507 sa[induction_bucket[(i + 1) as usize] as usize] |= SAINT_MIN;
2508 sa[induction_bucket[(i + 2) as usize] as usize] |= SAINT_MIN;
2509 sa[induction_bucket[(i + 3) as usize] as usize] |= SAINT_MIN;
2510 i += 4;
2511 }
2512
2513 j += 67;
2514 while i < j {
2515 sa[induction_bucket[i as usize] as usize] |= SAINT_MIN;
2516 i += 1;
2517 }
2518}
2519
2520#[doc(hidden)]
2522pub fn radix_sort_set_markers_32s_4k(
2523 sa: &mut [SaSint],
2524 induction_bucket: &[SaSint],
2525 omp_block_start: FastSint,
2526 omp_block_size: FastSint,
2527) {
2528 let mut i = omp_block_start;
2529 let mut j = omp_block_start + omp_block_size - 67;
2530
2531 while i < j {
2532 sa[induction_bucket[buckets_index2(i as usize, 0)] as usize] |= SUFFIX_GROUP_MARKER;
2533 sa[induction_bucket[buckets_index2((i + 1) as usize, 0)] as usize] |= SUFFIX_GROUP_MARKER;
2534 sa[induction_bucket[buckets_index2((i + 2) as usize, 0)] as usize] |= SUFFIX_GROUP_MARKER;
2535 sa[induction_bucket[buckets_index2((i + 3) as usize, 0)] as usize] |= SUFFIX_GROUP_MARKER;
2536 i += 4;
2537 }
2538
2539 j += 67;
2540 while i < j {
2541 sa[induction_bucket[buckets_index2(i as usize, 0)] as usize] |= SUFFIX_GROUP_MARKER;
2542 i += 1;
2543 }
2544}
2545
2546#[doc(hidden)]
2548pub fn radix_sort_set_markers_32s_6k_omp(
2549 sa: &mut [SaSint],
2550 k: SaSint,
2551 induction_bucket: &[SaSint],
2552 threads: SaSint,
2553) {
2554 if k <= 1 {
2555 return;
2556 }
2557
2558 if threads <= 1 || k < 65_536 {
2559 radix_sort_set_markers_32s_6k(sa, induction_bucket, 0, k as FastSint - 1);
2560 return;
2561 }
2562
2563 let threads_usize = usize::try_from(threads).expect("threads must be positive");
2564 let last = usize::try_from(k - 1).expect("k must be positive");
2565 let stride = (last / threads_usize) & !15usize;
2566 let mut start = 0usize;
2567
2568 for thread in 0..threads_usize {
2569 let end = if thread + 1 == threads_usize {
2570 last
2571 } else {
2572 start + stride
2573 };
2574 if end > start {
2575 radix_sort_set_markers_32s_6k(
2576 sa,
2577 induction_bucket,
2578 start as FastSint,
2579 (end - start) as FastSint,
2580 );
2581 }
2582 start = end;
2583 }
2584}
2585
2586#[doc(hidden)]
2588pub fn radix_sort_set_markers_32s_4k_omp(
2589 sa: &mut [SaSint],
2590 k: SaSint,
2591 induction_bucket: &[SaSint],
2592 threads: SaSint,
2593) {
2594 if k <= 1 {
2595 return;
2596 }
2597
2598 if threads <= 1 || k < 65_536 {
2599 radix_sort_set_markers_32s_4k(sa, induction_bucket, 0, k as FastSint - 1);
2600 return;
2601 }
2602
2603 let threads_usize = usize::try_from(threads).expect("threads must be positive");
2604 let last = usize::try_from(k - 1).expect("k must be positive");
2605 let stride = (last / threads_usize) & !15usize;
2606 let mut start = 0usize;
2607
2608 for thread in 0..threads_usize {
2609 let end = if thread + 1 == threads_usize {
2610 last
2611 } else {
2612 start + stride
2613 };
2614 if end > start {
2615 radix_sort_set_markers_32s_4k(
2616 sa,
2617 induction_bucket,
2618 start as FastSint,
2619 (end - start) as FastSint,
2620 );
2621 }
2622 start = end;
2623 }
2624}
2625
2626#[doc(hidden)]
2628pub fn initialize_buckets_for_partial_sorting_8u(
2629 t: &[u8],
2630 buckets: &mut [SaSint],
2631 first_lms_suffix: SaSint,
2632 left_suffixes_count: SaSint,
2633) {
2634 let temp_offset = 4 * ALPHABET_SIZE;
2635 buckets[buckets_index4(t[first_lms_suffix as usize] as usize, 1)] += 1;
2636
2637 let mut sum0 = left_suffixes_count + 1;
2638 let mut sum1 = 0;
2639 for j in 0..ALPHABET_SIZE {
2640 let i = buckets_index4(j, 0);
2641 let tj = buckets_index2(j, 0);
2642 buckets[temp_offset + tj] = sum0;
2643 sum0 += buckets[i] + buckets[i + 2];
2644 sum1 += buckets[i + 1];
2645 buckets[tj] = sum0;
2646 buckets[tj + 1] = sum1;
2647 }
2648}
2649
2650#[doc(hidden)]
2652pub fn initialize_buckets_for_partial_sorting_32s_6k(
2653 t: &[SaSint],
2654 k: SaSint,
2655 buckets: &mut [SaSint],
2656 first_lms_suffix: SaSint,
2657 left_suffixes_count: SaSint,
2658) {
2659 let k_usize = usize::try_from(k).expect("k must be non-negative");
2660 let temp_offset = 4 * k_usize;
2661 let first_symbol = t[first_lms_suffix as usize] as usize;
2662 let mut sum0 = left_suffixes_count + 1;
2663 let mut sum1 = 0;
2664 let mut sum2 = 0;
2665
2666 for j in 0..first_symbol {
2667 let i = buckets_index4(j, 0);
2668 let tj = buckets_index2(j, 0);
2669 let ss = buckets[i];
2670 let ls = buckets[i + 1];
2671 let sl = buckets[i + 2];
2672 let ll = buckets[i + 3];
2673
2674 buckets[i] = sum0;
2675 buckets[i + 1] = sum2;
2676 buckets[i + 2] = 0;
2677 buckets[i + 3] = 0;
2678
2679 sum0 += ss + sl;
2680 sum1 += ls;
2681 sum2 += ls + ll;
2682
2683 buckets[temp_offset + tj] = sum0;
2684 buckets[temp_offset + tj + 1] = sum1;
2685 }
2686
2687 sum1 += 1;
2688 for j in first_symbol..k_usize {
2689 let i = buckets_index4(j, 0);
2690 let tj = buckets_index2(j, 0);
2691 let ss = buckets[i];
2692 let ls = buckets[i + 1];
2693 let sl = buckets[i + 2];
2694 let ll = buckets[i + 3];
2695
2696 buckets[i] = sum0;
2697 buckets[i + 1] = sum2;
2698 buckets[i + 2] = 0;
2699 buckets[i + 3] = 0;
2700
2701 sum0 += ss + sl;
2702 sum1 += ls;
2703 sum2 += ls + ll;
2704
2705 buckets[temp_offset + tj] = sum0;
2706 buckets[temp_offset + tj + 1] = sum1;
2707 }
2708}
2709
2710#[doc(hidden)]
2712pub fn partial_sorting_scan_left_to_right_8u(
2713 t: &[u8],
2714 sa: &mut [SaSint],
2715 buckets: &mut [SaSint],
2716 mut d: SaSint,
2717 omp_block_start: FastSint,
2718 omp_block_size: FastSint,
2719) -> SaSint {
2720 let induction_offset = 4 * ALPHABET_SIZE;
2721 let distinct_offset = 2 * ALPHABET_SIZE;
2722 let prefetch_distance = 64 as FastSint;
2723 let mut i = omp_block_start;
2724 let mut j = if omp_block_size > prefetch_distance + 1 {
2725 omp_block_start + omp_block_size - prefetch_distance - 1
2726 } else {
2727 omp_block_start
2728 };
2729
2730 while i < j {
2731 let mut p0 = sa[i as usize];
2732 d += SaSint::from(p0 < 0);
2733 p0 &= SAINT_MAX;
2734 let v0 = buckets_index2(
2735 t[(p0 - 1) as usize] as usize,
2736 usize::from(t[(p0 - 2) as usize] >= t[(p0 - 1) as usize]),
2737 );
2738 let pos0 = buckets[induction_offset + v0] as usize;
2739 sa[pos0] = (p0 - 1) | (((buckets[distinct_offset + v0] != d) as SaSint) << (SAINT_BIT - 1));
2740 buckets[induction_offset + v0] += 1;
2741 buckets[distinct_offset + v0] = d;
2742
2743 let mut p1 = sa[(i + 1) as usize];
2744 d += SaSint::from(p1 < 0);
2745 p1 &= SAINT_MAX;
2746 let v1 = buckets_index2(
2747 t[(p1 - 1) as usize] as usize,
2748 usize::from(t[(p1 - 2) as usize] >= t[(p1 - 1) as usize]),
2749 );
2750 let pos1 = buckets[induction_offset + v1] as usize;
2751 sa[pos1] = (p1 - 1) | (((buckets[distinct_offset + v1] != d) as SaSint) << (SAINT_BIT - 1));
2752 buckets[induction_offset + v1] += 1;
2753 buckets[distinct_offset + v1] = d;
2754
2755 i += 2;
2756 }
2757
2758 j = omp_block_start + omp_block_size;
2759 while i < j {
2760 let mut p = sa[i as usize];
2761 d += SaSint::from(p < 0);
2762 p &= SAINT_MAX;
2763 let v = buckets_index2(
2764 t[(p - 1) as usize] as usize,
2765 usize::from(t[(p - 2) as usize] >= t[(p - 1) as usize]),
2766 );
2767 let pos = buckets[induction_offset + v] as usize;
2768 sa[pos] = (p - 1) | (((buckets[distinct_offset + v] != d) as SaSint) << (SAINT_BIT - 1));
2769 buckets[induction_offset + v] += 1;
2770 buckets[distinct_offset + v] = d;
2771 i += 1;
2772 }
2773
2774 d
2775}
2776
2777#[doc(hidden)]
2779pub fn partial_sorting_scan_left_to_right_8u_omp(
2780 t: &[u8],
2781 sa: &mut [SaSint],
2782 n: SaSint,
2783 k: SaSint,
2784 buckets: &mut [SaSint],
2785 left_suffixes_count: SaSint,
2786 mut d: SaSint,
2787 threads: SaSint,
2788 thread_state: &mut [ThreadState],
2789) -> SaSint {
2790 let v = buckets_index2(
2791 t[(n - 1) as usize] as usize,
2792 usize::from(t[(n - 2) as usize] >= t[(n - 1) as usize]),
2793 );
2794 let induction_offset = 4 * ALPHABET_SIZE;
2795 let distinct_offset = 2 * ALPHABET_SIZE;
2796 let pos = buckets[induction_offset + v] as usize;
2797 sa[pos] = (n - 1) | SAINT_MIN;
2798 buckets[induction_offset + v] += 1;
2799 d += 1;
2800 buckets[distinct_offset + v] = d;
2801
2802 if threads == 1 || left_suffixes_count < 65_536 {
2803 return partial_sorting_scan_left_to_right_8u(
2804 t,
2805 sa,
2806 buckets,
2807 d,
2808 0,
2809 left_suffixes_count as FastSint,
2810 );
2811 }
2812
2813 let mut block_start = 0usize;
2814 let left_suffixes_count =
2815 usize::try_from(left_suffixes_count).expect("left_suffixes_count must be non-negative");
2816 let threads_usize = usize::try_from(threads)
2817 .expect("threads must be non-negative")
2818 .min(thread_state.len())
2819 .max(1);
2820 while block_start < left_suffixes_count {
2821 if sa[block_start] == 0 {
2822 block_start += 1;
2823 } else {
2824 let mut block_max_end =
2825 block_start + threads_usize * (LIBSAIS_PER_THREAD_CACHE_SIZE - 16 * threads_usize);
2826 if block_max_end > left_suffixes_count {
2827 block_max_end = left_suffixes_count;
2828 }
2829 let mut block_end = block_start + 1;
2830 while block_end < block_max_end && sa[block_end] != 0 {
2831 block_end += 1;
2832 }
2833 let block_size = block_end - block_start;
2834
2835 if block_size < 32 {
2836 while block_start < block_end {
2837 let p = sa[block_start];
2838 d += SaSint::from(p < 0);
2839 let p = p & SAINT_MAX;
2840 let v = buckets_index2(
2841 t[(p - 1) as usize] as usize,
2842 usize::from(t[(p - 2) as usize] >= t[(p - 1) as usize]),
2843 );
2844 let pos = buckets[induction_offset + v] as usize;
2845 sa[pos] = (p - 1)
2846 | (((buckets[distinct_offset + v] != d) as SaSint) << (SAINT_BIT - 1));
2847 buckets[induction_offset + v] += 1;
2848 buckets[distinct_offset + v] = d;
2849 block_start += 1;
2850 }
2851 } else {
2852 d = partial_sorting_scan_left_to_right_8u_block_omp(
2853 t,
2854 sa,
2855 k,
2856 buckets,
2857 d,
2858 block_start as FastSint,
2859 block_size as FastSint,
2860 threads,
2861 thread_state,
2862 );
2863 block_start = block_end;
2864 }
2865 }
2866 }
2867
2868 d
2869}
2870
2871#[doc(hidden)]
2873pub fn partial_sorting_scan_left_to_right_32s_6k(
2874 t: &[SaSint],
2875 sa: &mut [SaSint],
2876 buckets: &mut [SaSint],
2877 mut d: SaSint,
2878 omp_block_start: FastSint,
2879 omp_block_size: FastSint,
2880) -> SaSint {
2881 let prefetch_distance: FastSint = 64;
2882
2883 let mut i = omp_block_start;
2884 let mut j = omp_block_start + omp_block_size - 2 * prefetch_distance - 1;
2885 while i < j {
2886 let mut p0 = sa[i as usize];
2887 d += SaSint::from(p0 < 0);
2888 p0 &= SAINT_MAX;
2889 let p0u = p0 as usize;
2890 let v0 = buckets_index4(t[p0u - 1] as usize, usize::from(t[p0u - 2] >= t[p0u - 1]));
2891 let pos0 = buckets[v0] as usize;
2892 sa[pos0] = (p0 - 1) | (((buckets[2 + v0] != d) as SaSint) << (SAINT_BIT - 1));
2893 buckets[v0] += 1;
2894 buckets[2 + v0] = d;
2895
2896 let mut p1 = sa[(i + 1) as usize];
2897 d += SaSint::from(p1 < 0);
2898 p1 &= SAINT_MAX;
2899 let p1u = p1 as usize;
2900 let v1 = buckets_index4(t[p1u - 1] as usize, usize::from(t[p1u - 2] >= t[p1u - 1]));
2901 let pos1 = buckets[v1] as usize;
2902 sa[pos1] = (p1 - 1) | (((buckets[2 + v1] != d) as SaSint) << (SAINT_BIT - 1));
2903 buckets[v1] += 1;
2904 buckets[2 + v1] = d;
2905
2906 i += 2;
2907 }
2908
2909 j += 2 * prefetch_distance + 1;
2910 while i < j {
2911 let mut p = sa[i as usize];
2912 d += SaSint::from(p < 0);
2913 p &= SAINT_MAX;
2914 let pu = p as usize;
2915 let v = buckets_index4(t[pu - 1] as usize, usize::from(t[pu - 2] >= t[pu - 1]));
2916 let pos = buckets[v] as usize;
2917 sa[pos] = (p - 1) | (((buckets[2 + v] != d) as SaSint) << (SAINT_BIT - 1));
2918 buckets[v] += 1;
2919 buckets[2 + v] = d;
2920 i += 1;
2921 }
2922
2923 d
2924}
2925
2926#[doc(hidden)]
2928pub fn partial_sorting_scan_left_to_right_32s_4k(
2929 t: &[SaSint],
2930 sa: &mut [SaSint],
2931 k: SaSint,
2932 buckets: &mut [SaSint],
2933 mut d: SaSint,
2934 omp_block_start: FastSint,
2935 omp_block_size: FastSint,
2936) -> SaSint {
2937 let k_usize = usize::try_from(k).expect("k must be non-negative");
2938 let prefetch_distance: FastSint = 64;
2939 let induction_offset = 2 * k_usize;
2940 let mut i = omp_block_start;
2941 let mut j = omp_block_start + omp_block_size - 2 * prefetch_distance - 1;
2942
2943 while i < j {
2944 let i0 = i as usize;
2945 let mut p0 = sa[i0];
2946 sa[i0] = p0 & SAINT_MAX;
2947 if p0 > 0 {
2948 sa[i0] = 0;
2949 d += p0 >> (SUFFIX_GROUP_BIT - 1);
2950 p0 &= !SUFFIX_GROUP_MARKER;
2951 let p0u = p0 as usize;
2952 let c0 = t[p0u - 1];
2953 let f0 = usize::from(t[p0u - 2] < c0);
2954 let v0 = buckets_index2(c0 as usize, f0);
2955 let c0u = c0 as usize;
2956 let pos0 = buckets[induction_offset + c0u] as usize;
2957 sa[pos0] = (p0 - 1)
2958 | ((f0 as SaSint) << (SAINT_BIT - 1))
2959 | (((buckets[v0] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
2960 buckets[induction_offset + c0u] += 1;
2961 buckets[v0] = d;
2962 }
2963
2964 let i1 = (i + 1) as usize;
2965 let mut p1 = sa[i1];
2966 sa[i1] = p1 & SAINT_MAX;
2967 if p1 > 0 {
2968 sa[i1] = 0;
2969 d += p1 >> (SUFFIX_GROUP_BIT - 1);
2970 p1 &= !SUFFIX_GROUP_MARKER;
2971 let p1u = p1 as usize;
2972 let c1 = t[p1u - 1];
2973 let f1 = usize::from(t[p1u - 2] < c1);
2974 let v1 = buckets_index2(c1 as usize, f1);
2975 let c1u = c1 as usize;
2976 let pos1 = buckets[induction_offset + c1u] as usize;
2977 sa[pos1] = (p1 - 1)
2978 | ((f1 as SaSint) << (SAINT_BIT - 1))
2979 | (((buckets[v1] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
2980 buckets[induction_offset + c1u] += 1;
2981 buckets[v1] = d;
2982 }
2983
2984 i += 2;
2985 }
2986
2987 j += 2 * prefetch_distance + 1;
2988 while i < j {
2989 let iu = i as usize;
2990 let mut p = sa[iu];
2991 sa[iu] = p & SAINT_MAX;
2992 if p > 0 {
2993 sa[iu] = 0;
2994 d += p >> (SUFFIX_GROUP_BIT - 1);
2995 p &= !SUFFIX_GROUP_MARKER;
2996 let pu = p as usize;
2997 let c = t[pu - 1];
2998 let f = usize::from(t[pu - 2] < c);
2999 let v = buckets_index2(c as usize, f);
3000 let cu = c as usize;
3001 let pos = buckets[induction_offset + cu] as usize;
3002 sa[pos] = (p - 1)
3003 | ((f as SaSint) << (SAINT_BIT - 1))
3004 | (((buckets[v] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3005 buckets[induction_offset + cu] += 1;
3006 buckets[v] = d;
3007 }
3008 i += 1;
3009 }
3010
3011 d
3012}
3013
3014#[doc(hidden)]
3016pub fn partial_sorting_scan_left_to_right_32s_1k(
3017 t: &[SaSint],
3018 sa: &mut [SaSint],
3019 induction_bucket: &mut [SaSint],
3020 omp_block_start: FastSint,
3021 omp_block_size: FastSint,
3022) {
3023 let prefetch_distance = 64 as FastSint;
3024 let mut i = omp_block_start;
3025 let mut j = omp_block_start + omp_block_size - 2 * prefetch_distance - 1;
3026
3027 while i < j {
3028 let p0 = sa[i as usize];
3029 sa[i as usize] = p0 & SAINT_MAX;
3030 if p0 > 0 {
3031 sa[i as usize] = 0;
3032 let c0 = t[(p0 - 1) as usize] as usize;
3033 let pos0 = induction_bucket[c0] as usize;
3034 induction_bucket[c0] += 1;
3035 sa[pos0] = (p0 - 1)
3036 | ((usize::from(t[(p0 - 2) as usize] < t[(p0 - 1) as usize]) as SaSint)
3037 << (SAINT_BIT - 1));
3038 }
3039
3040 let p1 = sa[(i + 1) as usize];
3041 sa[(i + 1) as usize] = p1 & SAINT_MAX;
3042 if p1 > 0 {
3043 sa[(i + 1) as usize] = 0;
3044 let c1 = t[(p1 - 1) as usize] as usize;
3045 let pos1 = induction_bucket[c1] as usize;
3046 induction_bucket[c1] += 1;
3047 sa[pos1] = (p1 - 1)
3048 | ((usize::from(t[(p1 - 2) as usize] < t[(p1 - 1) as usize]) as SaSint)
3049 << (SAINT_BIT - 1));
3050 }
3051
3052 i += 2;
3053 }
3054
3055 j += 2 * prefetch_distance + 1;
3056 while i < j {
3057 let p = sa[i as usize];
3058 sa[i as usize] = p & SAINT_MAX;
3059 if p > 0 {
3060 sa[i as usize] = 0;
3061 let c = t[(p - 1) as usize] as usize;
3062 let pos = induction_bucket[c] as usize;
3063 induction_bucket[c] += 1;
3064 sa[pos] = (p - 1)
3065 | ((usize::from(t[(p - 2) as usize] < t[(p - 1) as usize]) as SaSint)
3066 << (SAINT_BIT - 1));
3067 }
3068 i += 1;
3069 }
3070}
3071
3072#[doc(hidden)]
3074pub fn partial_sorting_scan_left_to_right_32s_6k_omp(
3075 t: &[SaSint],
3076 sa: &mut [SaSint],
3077 n: SaSint,
3078 buckets: &mut [SaSint],
3079 left_suffixes_count: SaSint,
3080 mut d: SaSint,
3081 threads: SaSint,
3082 thread_state: &mut [ThreadState],
3083) -> SaSint {
3084 let v = buckets_index4(
3085 t[(n - 1) as usize] as usize,
3086 usize::from(t[(n - 2) as usize] >= t[(n - 1) as usize]),
3087 );
3088 let pos = buckets[v] as usize;
3089 sa[pos] = (n - 1) | SAINT_MIN;
3090 buckets[v] += 1;
3091 d += 1;
3092 buckets[2 + v] = d;
3093 if threads == 1 || left_suffixes_count < 65_536 {
3094 return partial_sorting_scan_left_to_right_32s_6k(
3095 t,
3096 sa,
3097 buckets,
3098 d,
3099 0,
3100 left_suffixes_count as FastSint,
3101 );
3102 }
3103 if thread_state.is_empty() {
3104 return partial_sorting_scan_left_to_right_32s_6k(
3105 t,
3106 sa,
3107 buckets,
3108 d,
3109 0,
3110 left_suffixes_count as FastSint,
3111 );
3112 }
3113
3114 let left_suffixes_count =
3115 usize::try_from(left_suffixes_count).expect("left_suffixes_count must be non-negative");
3116 let threads_usize = usize::try_from(threads)
3117 .expect("threads must be non-negative")
3118 .max(1);
3119 let mut block_start = 0usize;
3120 let block_span = threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE;
3121 let mut cache = vec![ThreadCache::default(); block_span];
3122 while block_start < left_suffixes_count {
3123 let mut block_end = block_start + block_span;
3124 if block_end > left_suffixes_count {
3125 block_end = left_suffixes_count;
3126 }
3127
3128 d = partial_sorting_scan_left_to_right_32s_6k_block_omp(
3129 t,
3130 sa,
3131 buckets,
3132 d,
3133 &mut cache,
3134 block_start as FastSint,
3135 (block_end - block_start) as FastSint,
3136 threads,
3137 );
3138
3139 block_start = block_end;
3140 }
3141
3142 d
3143}
3144
3145#[doc(hidden)]
3147pub fn partial_sorting_scan_left_to_right_32s_4k_omp(
3148 t: &[SaSint],
3149 sa: &mut [SaSint],
3150 n: SaSint,
3151 k: SaSint,
3152 buckets: &mut [SaSint],
3153 mut d: SaSint,
3154 threads: SaSint,
3155 thread_state: &mut [ThreadState],
3156) -> SaSint {
3157 let k_usize = usize::try_from(k).expect("k must be non-negative");
3158 let induction_offset = 2 * k_usize;
3159 let distinct_offset = 0usize;
3160 let symbol = t[(n - 1) as usize] as usize;
3161 let is_s = usize::from(t[(n - 2) as usize] < t[(n - 1) as usize]);
3162 let pos = buckets[induction_offset + symbol] as usize;
3163 sa[pos] = (n - 1) | ((is_s as SaSint) << (SAINT_BIT - 1)) | SUFFIX_GROUP_MARKER;
3164 buckets[induction_offset + symbol] += 1;
3165 d += 1;
3166 buckets[distinct_offset + buckets_index2(symbol, is_s)] = d;
3167
3168 if threads == 1 || n < 65_536 {
3169 d = partial_sorting_scan_left_to_right_32s_4k(t, sa, k, buckets, d, 0, n as FastSint);
3170 } else {
3171 if thread_state.is_empty() {
3172 return partial_sorting_scan_left_to_right_32s_4k(
3173 t,
3174 sa,
3175 k,
3176 buckets,
3177 d,
3178 0,
3179 n as FastSint,
3180 );
3181 }
3182 let mut block_start = 0usize;
3183 let n_usize = usize::try_from(n).expect("n must be non-negative");
3184 let threads_usize = usize::try_from(threads)
3185 .expect("threads must be non-negative")
3186 .max(1);
3187 let chunk_capacity = threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE;
3188 let mut cache = vec![ThreadCache::default(); chunk_capacity];
3189
3190 while block_start < n_usize {
3191 let mut block_end = block_start + chunk_capacity;
3192 if block_end > n_usize {
3193 block_end = n_usize;
3194 }
3195
3196 d = partial_sorting_scan_left_to_right_32s_4k_block_omp(
3197 t,
3198 sa,
3199 k,
3200 buckets,
3201 d,
3202 &mut cache,
3203 block_start as FastSint,
3204 (block_end - block_start) as FastSint,
3205 threads,
3206 );
3207
3208 block_start = block_end;
3209 }
3210 }
3211
3212 d
3213}
3214
3215#[doc(hidden)]
3217pub fn partial_sorting_scan_left_to_right_32s_1k_omp(
3218 t: &[SaSint],
3219 sa: &mut [SaSint],
3220 n: SaSint,
3221 buckets: &mut [SaSint],
3222 threads: SaSint,
3223 thread_state: &mut [ThreadState],
3224) {
3225 let symbol = t[(n - 1) as usize] as usize;
3226 let pos = buckets[symbol] as usize;
3227 sa[pos] = (n - 1)
3228 | ((usize::from(t[(n - 2) as usize] < t[(n - 1) as usize]) as SaSint) << (SAINT_BIT - 1));
3229 buckets[symbol] += 1;
3230 if threads == 1 || n < 65_536 {
3231 partial_sorting_scan_left_to_right_32s_1k(t, sa, buckets, 0, n as FastSint);
3232 } else {
3233 if thread_state.is_empty() {
3234 partial_sorting_scan_left_to_right_32s_1k(t, sa, buckets, 0, n as FastSint);
3235 return;
3236 }
3237 let n_usize = usize::try_from(n).expect("n must be non-negative");
3238 let threads_usize = usize::try_from(threads)
3239 .expect("threads must be non-negative")
3240 .max(1);
3241 let mut block_start = 0usize;
3242 let block_span = threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE;
3243 let mut cache = vec![ThreadCache::default(); block_span];
3244
3245 while block_start < n_usize {
3246 let mut block_end = block_start + block_span;
3247 if block_end > n_usize {
3248 block_end = n_usize;
3249 }
3250
3251 partial_sorting_scan_left_to_right_32s_1k_block_omp(
3252 t,
3253 sa,
3254 buckets,
3255 &mut cache,
3256 block_start as FastSint,
3257 (block_end - block_start) as FastSint,
3258 threads,
3259 );
3260
3261 block_start = block_end;
3262 }
3263 }
3264}
3265
3266#[doc(hidden)]
3268pub fn partial_sorting_scan_left_to_right_8u_block_prepare(
3269 t: &[u8],
3270 sa: &[SaSint],
3271 k: SaSint,
3272 buckets: &mut [SaSint],
3273 cache: &mut [ThreadCache],
3274 omp_block_start: FastSint,
3275 omp_block_size: FastSint,
3276) -> (FastSint, FastSint) {
3277 let k_usize = usize::try_from(k).expect("k must be non-negative");
3278 buckets[..2 * k_usize].fill(0);
3279 buckets[2 * k_usize..4 * k_usize].fill(0);
3280
3281 let mut i = omp_block_start;
3282 let mut j = omp_block_start + omp_block_size - 65;
3283 let mut count = 0usize;
3284 let mut d: SaSint = 1;
3285
3286 while i < j {
3287 let mut p0 = sa[i as usize];
3288 cache[count].index = p0;
3289 d += SaSint::from(p0 < 0);
3290 p0 &= SAINT_MAX;
3291 let v0 = buckets_index2(
3292 t[(p0 - 1) as usize] as usize,
3293 usize::from(t[(p0 - 2) as usize] >= t[(p0 - 1) as usize]),
3294 );
3295 cache[count].symbol = v0 as SaSint;
3296 count += 1;
3297 buckets[v0] += 1;
3298 buckets[2 * k_usize + v0] = d;
3299
3300 let mut p1 = sa[(i + 1) as usize];
3301 cache[count].index = p1;
3302 d += SaSint::from(p1 < 0);
3303 p1 &= SAINT_MAX;
3304 let v1 = buckets_index2(
3305 t[(p1 - 1) as usize] as usize,
3306 usize::from(t[(p1 - 2) as usize] >= t[(p1 - 1) as usize]),
3307 );
3308 cache[count].symbol = v1 as SaSint;
3309 count += 1;
3310 buckets[v1] += 1;
3311 buckets[2 * k_usize + v1] = d;
3312
3313 i += 2;
3314 }
3315
3316 j += 65;
3317 while i < j {
3318 let mut p = sa[i as usize];
3319 cache[count].index = p;
3320 d += SaSint::from(p < 0);
3321 p &= SAINT_MAX;
3322 let v = buckets_index2(
3323 t[(p - 1) as usize] as usize,
3324 usize::from(t[(p - 2) as usize] >= t[(p - 1) as usize]),
3325 );
3326 cache[count].symbol = v as SaSint;
3327 count += 1;
3328 buckets[v] += 1;
3329 buckets[2 * k_usize + v] = d;
3330 i += 1;
3331 }
3332
3333 (d as FastSint - 1, count as FastSint)
3334}
3335
3336#[doc(hidden)]
3338pub fn partial_sorting_scan_left_to_right_8u_block_place(
3339 sa: &mut [SaSint],
3340 buckets: &mut [SaSint],
3341 k: SaSint,
3342 cache: &[ThreadCache],
3343 count: FastSint,
3344 mut d: SaSint,
3345) {
3346 let split = 2 * usize::try_from(k).expect("k must be non-negative");
3347 let (induction_bucket, distinct_names) = buckets.split_at_mut(split);
3348
3349 let mut i = 0usize;
3350 let mut j = usize::try_from(count)
3351 .expect("count must be non-negative")
3352 .saturating_sub(1);
3353 while i < j {
3354 let p0 = cache[i].index;
3355 d += SaSint::from(p0 < 0);
3356 let v0 = cache[i].symbol as usize;
3357 let pos0 = induction_bucket[v0] as usize;
3358 sa[pos0] = (p0 - 1) | (((distinct_names[v0] != d) as SaSint) << (SAINT_BIT - 1));
3359 induction_bucket[v0] += 1;
3360 distinct_names[v0] = d;
3361
3362 let p1 = cache[i + 1].index;
3363 d += SaSint::from(p1 < 0);
3364 let v1 = cache[i + 1].symbol as usize;
3365 let pos1 = induction_bucket[v1] as usize;
3366 sa[pos1] = (p1 - 1) | (((distinct_names[v1] != d) as SaSint) << (SAINT_BIT - 1));
3367 induction_bucket[v1] += 1;
3368 distinct_names[v1] = d;
3369
3370 i += 2;
3371 }
3372
3373 j += 1;
3374 while i < j {
3375 let p = cache[i].index;
3376 d += SaSint::from(p < 0);
3377 let v = cache[i].symbol as usize;
3378 let pos = induction_bucket[v] as usize;
3379 sa[pos] = (p - 1) | (((distinct_names[v] != d) as SaSint) << (SAINT_BIT - 1));
3380 induction_bucket[v] += 1;
3381 distinct_names[v] = d;
3382 i += 1;
3383 }
3384}
3385
3386#[doc(hidden)]
3388pub fn partial_sorting_scan_left_to_right_8u_block_omp(
3389 t: &[u8],
3390 sa: &mut [SaSint],
3391 k: SaSint,
3392 buckets: &mut [SaSint],
3393 d: SaSint,
3394 block_start: FastSint,
3395 block_size: FastSint,
3396 threads: SaSint,
3397 thread_state: &mut [ThreadState],
3398) -> SaSint {
3399 let mut d = d;
3400 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
3401 let k_usize = usize::try_from(k).expect("k must be non-negative");
3402 let omp_num_threads = if threads > 1 && block_size_usize >= 64 * k_usize.max(256) {
3403 usize::try_from(threads)
3404 .expect("threads must be non-negative")
3405 .min(thread_state.len())
3406 .max(1)
3407 } else {
3408 1
3409 };
3410 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
3411
3412 if omp_num_threads == 1 {
3413 return partial_sorting_scan_left_to_right_8u(t, sa, buckets, d, block_start, block_size);
3414 }
3415
3416 for omp_thread_num in 0..omp_num_threads {
3417 let mut omp_block_start = omp_thread_num * omp_block_stride;
3418 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
3419 omp_block_stride
3420 } else {
3421 block_size_usize - omp_block_start
3422 };
3423 omp_block_start += usize::try_from(block_start).expect("block_start must be non-negative");
3424
3425 let state = &mut thread_state[omp_thread_num];
3426 let (position, count) = partial_sorting_scan_left_to_right_8u_block_prepare(
3427 t,
3428 sa,
3429 k,
3430 &mut state.buckets,
3431 &mut state.cache,
3432 FastSint::try_from(omp_block_start).expect("block start must fit FastSint"),
3433 FastSint::try_from(omp_block_size).expect("block size must fit FastSint"),
3434 );
3435 state.position = position;
3436 state.count = count;
3437 }
3438
3439 let induction_offset = 4 * ALPHABET_SIZE;
3440 let distinct_offset = 2 * ALPHABET_SIZE;
3441 let (prefix, induction_tail) = buckets.split_at_mut(induction_offset);
3442 let induction_bucket = &mut induction_tail[..2 * k_usize];
3443 let distinct_names = &mut prefix[distinct_offset..distinct_offset + 2 * k_usize];
3444
3445 for tnum in 0..omp_num_threads {
3446 let state = &mut thread_state[tnum];
3447 let (temp_induction_bucket, temp_tail) = state.buckets.split_at_mut(2 * k_usize);
3448 let temp_distinct_names = &mut temp_tail[..2 * k_usize];
3449
3450 for c in 0..2 * k_usize {
3451 let a = induction_bucket[c];
3452 let b = temp_induction_bucket[c];
3453 induction_bucket[c] = a + b;
3454 temp_induction_bucket[c] = a;
3455 }
3456
3457 d -= 1;
3458 for c in 0..2 * k_usize {
3459 let a = distinct_names[c];
3460 let b = temp_distinct_names[c];
3461 let next_d = b + d;
3462 distinct_names[c] = if b > 0 { next_d } else { a };
3463 temp_distinct_names[c] = a;
3464 }
3465 d += 1 + SaSint::try_from(state.position).expect("position must fit SaSint");
3466 state.position = FastSint::try_from(d).expect("d must fit FastSint") - state.position;
3467 }
3468
3469 for tnum in 0..omp_num_threads {
3470 let state = &mut thread_state[tnum];
3471 partial_sorting_scan_left_to_right_8u_block_place(
3472 sa,
3473 &mut state.buckets,
3474 k,
3475 &state.cache,
3476 state.count,
3477 state.position as SaSint,
3478 );
3479 }
3480
3481 d
3482}
3483
3484#[doc(hidden)]
3486pub fn partial_sorting_shift_markers_8u_omp(
3487 sa: &mut [SaSint],
3488 n: SaSint,
3489 buckets: &[SaSint],
3490 threads: SaSint,
3491) {
3492 let temp_bucket = &buckets[4 * ALPHABET_SIZE..];
3493 let thread_count = if threads > 1 && n >= 65536 {
3494 usize::try_from(threads).expect("threads must be positive")
3495 } else {
3496 1
3497 };
3498 let c_step = buckets_index2(1, 0) as isize;
3499 let c_min = buckets_index2(1, 0) as isize;
3500 let c_max = buckets_index2(ALPHABET_SIZE - 1, 0) as isize;
3501 for t in 0..thread_count {
3502 let mut c = c_max - (t as isize * c_step);
3503 while c >= c_min {
3504 let c_usize = c as usize;
3505 let mut i = temp_bucket[c_usize] as isize - 1;
3506 let mut j = buckets[c_usize - buckets_index2(1, 0)] as isize + 3;
3507 let mut s = SAINT_MIN;
3508
3509 while i >= j {
3510 let p0 = sa[i as usize];
3511 let q0 = (p0 & SAINT_MIN) ^ s;
3512 s ^= q0;
3513 sa[i as usize] = p0 ^ q0;
3514
3515 let p1 = sa[(i - 1) as usize];
3516 let q1 = (p1 & SAINT_MIN) ^ s;
3517 s ^= q1;
3518 sa[(i - 1) as usize] = p1 ^ q1;
3519
3520 let p2 = sa[(i - 2) as usize];
3521 let q2 = (p2 & SAINT_MIN) ^ s;
3522 s ^= q2;
3523 sa[(i - 2) as usize] = p2 ^ q2;
3524
3525 let p3 = sa[(i - 3) as usize];
3526 let q3 = (p3 & SAINT_MIN) ^ s;
3527 s ^= q3;
3528 sa[(i - 3) as usize] = p3 ^ q3;
3529
3530 i -= 4;
3531 }
3532
3533 j -= 3;
3534 while i >= j {
3535 let p = sa[i as usize];
3536 let q = (p & SAINT_MIN) ^ s;
3537 s ^= q;
3538 sa[i as usize] = p ^ q;
3539 i -= 1;
3540 }
3541
3542 c -= c_step * thread_count as isize;
3543 }
3544 }
3545}
3546
3547#[doc(hidden)]
3549pub fn partial_sorting_shift_markers_32s_6k_omp(
3550 sa: &mut [SaSint],
3551 k: SaSint,
3552 buckets: &[SaSint],
3553 threads: SaSint,
3554) {
3555 let k_usize = usize::try_from(k).expect("k must be non-negative");
3556 let temp_bucket = &buckets[4 * k_usize..];
3557 let thread_count = if threads > 1 && k >= 65536 {
3558 usize::try_from(threads).expect("threads must be positive")
3559 } else {
3560 1
3561 };
3562 for t in 0..thread_count {
3563 let mut c = k_usize as isize - 1 - t as isize;
3564 while c >= 1 {
3565 let c_usize = c as usize;
3566 let mut i = buckets[buckets_index4(c_usize, 0)] as isize - 1;
3567 let mut j = temp_bucket[buckets_index2(c_usize - 1, 0)] as isize + 3;
3568 let mut s = SAINT_MIN;
3569
3570 while i >= j {
3571 let p0 = sa[i as usize];
3572 let q0 = (p0 & SAINT_MIN) ^ s;
3573 s ^= q0;
3574 sa[i as usize] = p0 ^ q0;
3575
3576 let p1 = sa[(i - 1) as usize];
3577 let q1 = (p1 & SAINT_MIN) ^ s;
3578 s ^= q1;
3579 sa[(i - 1) as usize] = p1 ^ q1;
3580
3581 let p2 = sa[(i - 2) as usize];
3582 let q2 = (p2 & SAINT_MIN) ^ s;
3583 s ^= q2;
3584 sa[(i - 2) as usize] = p2 ^ q2;
3585
3586 let p3 = sa[(i - 3) as usize];
3587 let q3 = (p3 & SAINT_MIN) ^ s;
3588 s ^= q3;
3589 sa[(i - 3) as usize] = p3 ^ q3;
3590
3591 i -= 4;
3592 }
3593
3594 j -= 3;
3595 while i >= j {
3596 let p = sa[i as usize];
3597 let q = (p & SAINT_MIN) ^ s;
3598 s ^= q;
3599 sa[i as usize] = p ^ q;
3600 i -= 1;
3601 }
3602
3603 c -= thread_count as isize;
3604 }
3605 }
3606}
3607
3608#[doc(hidden)]
3610pub fn partial_sorting_shift_markers_32s_4k(sa: &mut [SaSint], n: SaSint) {
3611 let mut i = n as isize - 1;
3612 let mut s = SUFFIX_GROUP_MARKER;
3613 while i >= 3 {
3614 let p0 = sa[i as usize];
3615 let q0 =
3616 ((p0 & SUFFIX_GROUP_MARKER) ^ s) & (((p0 > 0) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3617 s ^= q0;
3618 sa[i as usize] = p0 ^ q0;
3619
3620 let p1 = sa[(i - 1) as usize];
3621 let q1 =
3622 ((p1 & SUFFIX_GROUP_MARKER) ^ s) & (((p1 > 0) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3623 s ^= q1;
3624 sa[(i - 1) as usize] = p1 ^ q1;
3625
3626 let p2 = sa[(i - 2) as usize];
3627 let q2 =
3628 ((p2 & SUFFIX_GROUP_MARKER) ^ s) & (((p2 > 0) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3629 s ^= q2;
3630 sa[(i - 2) as usize] = p2 ^ q2;
3631
3632 let p3 = sa[(i - 3) as usize];
3633 let q3 =
3634 ((p3 & SUFFIX_GROUP_MARKER) ^ s) & (((p3 > 0) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3635 s ^= q3;
3636 sa[(i - 3) as usize] = p3 ^ q3;
3637
3638 i -= 4;
3639 }
3640
3641 while i >= 0 {
3642 let p = sa[i as usize];
3643 let q = ((p & SUFFIX_GROUP_MARKER) ^ s) & (((p > 0) as SaSint) << (SUFFIX_GROUP_BIT - 1));
3644 s ^= q;
3645 sa[i as usize] = p ^ q;
3646 i -= 1;
3647 }
3648}
3649
3650#[doc(hidden)]
3652pub fn partial_sorting_shift_buckets_32s_6k(k: SaSint, buckets: &mut [SaSint]) {
3653 let k_usize = usize::try_from(k).expect("k must be non-negative");
3654 let temp_offset = 4 * k_usize;
3655 for i in 0..k_usize {
3656 let src = buckets_index2(i, 0);
3657 let dst = 2 * src;
3658 buckets[dst] = buckets[temp_offset + src];
3659 buckets[dst + 1] = buckets[temp_offset + src + 1];
3660 }
3661}
3662
3663#[doc(hidden)]
3665pub fn partial_sorting_scan_right_to_left_8u(
3666 t: &[u8],
3667 sa: &mut [SaSint],
3668 buckets: &mut [SaSint],
3669 mut d: SaSint,
3670 omp_block_start: FastSint,
3671 omp_block_size: FastSint,
3672) -> SaSint {
3673 if omp_block_size <= 0 {
3674 return d;
3675 }
3676
3677 let prefetch_distance = 64usize;
3678 let (induction_bucket, distinct_names_all) = buckets.split_at_mut(2 * ALPHABET_SIZE);
3679 let distinct_names = &mut distinct_names_all[..2 * ALPHABET_SIZE];
3680
3681 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
3682 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
3683 let mut i = start + size - 1;
3684 let mut j = start + prefetch_distance + 1;
3685
3686 while i >= j {
3687 let mut p0 = sa[i];
3688 d += SaSint::from(p0 < 0);
3689 p0 &= SAINT_MAX;
3690
3691 let p0_usize = p0 as usize;
3692 let v0 = buckets_index2(
3693 t[p0_usize - 1] as usize,
3694 usize::from(t[p0_usize - 2] > t[p0_usize - 1]),
3695 );
3696
3697 induction_bucket[v0] -= 1;
3698 let slot0 = induction_bucket[v0] as usize;
3699 sa[slot0] = (p0 - 1) | (((distinct_names[v0] != d) as SaSint) << (SAINT_BIT - 1));
3700 distinct_names[v0] = d;
3701
3702 let mut p1 = sa[i - 1];
3703 d += SaSint::from(p1 < 0);
3704 p1 &= SAINT_MAX;
3705
3706 let p1_usize = p1 as usize;
3707 let v1 = buckets_index2(
3708 t[p1_usize - 1] as usize,
3709 usize::from(t[p1_usize - 2] > t[p1_usize - 1]),
3710 );
3711
3712 induction_bucket[v1] -= 1;
3713 let slot1 = induction_bucket[v1] as usize;
3714 sa[slot1] = (p1 - 1) | (((distinct_names[v1] != d) as SaSint) << (SAINT_BIT - 1));
3715 distinct_names[v1] = d;
3716
3717 i -= 2;
3718 }
3719
3720 j = if start + prefetch_distance < start + size {
3721 start
3722 } else {
3723 start
3724 };
3725 while i >= j {
3726 let mut p = sa[i];
3727 d += SaSint::from(p < 0);
3728 p &= SAINT_MAX;
3729
3730 let p_usize = p as usize;
3731 let v = buckets_index2(
3732 t[p_usize - 1] as usize,
3733 usize::from(t[p_usize - 2] > t[p_usize - 1]),
3734 );
3735
3736 induction_bucket[v] -= 1;
3737 let slot = induction_bucket[v] as usize;
3738 sa[slot] = (p - 1) | (((distinct_names[v] != d) as SaSint) << (SAINT_BIT - 1));
3739 distinct_names[v] = d;
3740
3741 if i == 0 {
3742 break;
3743 }
3744 i -= 1;
3745 }
3746
3747 d
3748}
3749
3750#[doc(hidden)]
3752pub fn partial_gsa_scan_right_to_left_8u(
3753 t: &[u8],
3754 sa: &mut [SaSint],
3755 buckets: &mut [SaSint],
3756 mut d: SaSint,
3757 omp_block_start: FastSint,
3758 omp_block_size: FastSint,
3759) -> SaSint {
3760 if omp_block_size <= 0 {
3761 return d;
3762 }
3763
3764 let prefetch_distance = 64usize;
3765 let (induction_bucket, distinct_names_all) = buckets.split_at_mut(2 * ALPHABET_SIZE);
3766 let distinct_names = &mut distinct_names_all[..2 * ALPHABET_SIZE];
3767
3768 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
3769 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
3770 let mut i = start + size - 1;
3771 let mut j = start + prefetch_distance + 1;
3772
3773 while i >= j {
3774 let mut p0 = sa[i];
3775 d += SaSint::from(p0 < 0);
3776 p0 &= SAINT_MAX;
3777
3778 let p0_usize = p0 as usize;
3779 let v0 = buckets_index2(
3780 t[p0_usize - 1] as usize,
3781 usize::from(t[p0_usize - 2] > t[p0_usize - 1]),
3782 );
3783
3784 if v0 != 1 {
3785 induction_bucket[v0] -= 1;
3786 let slot0 = induction_bucket[v0] as usize;
3787 sa[slot0] = (p0 - 1) | (((distinct_names[v0] != d) as SaSint) << (SAINT_BIT - 1));
3788 distinct_names[v0] = d;
3789 }
3790
3791 let mut p1 = sa[i - 1];
3792 d += SaSint::from(p1 < 0);
3793 p1 &= SAINT_MAX;
3794
3795 let p1_usize = p1 as usize;
3796 let v1 = buckets_index2(
3797 t[p1_usize - 1] as usize,
3798 usize::from(t[p1_usize - 2] > t[p1_usize - 1]),
3799 );
3800
3801 if v1 != 1 {
3802 induction_bucket[v1] -= 1;
3803 let slot1 = induction_bucket[v1] as usize;
3804 sa[slot1] = (p1 - 1) | (((distinct_names[v1] != d) as SaSint) << (SAINT_BIT - 1));
3805 distinct_names[v1] = d;
3806 }
3807
3808 i -= 2;
3809 }
3810
3811 j = start;
3812 while i >= j {
3813 let mut p = sa[i];
3814 d += SaSint::from(p < 0);
3815 p &= SAINT_MAX;
3816
3817 let p_usize = p as usize;
3818 let v = buckets_index2(
3819 t[p_usize - 1] as usize,
3820 usize::from(t[p_usize - 2] > t[p_usize - 1]),
3821 );
3822
3823 if v != 1 {
3824 induction_bucket[v] -= 1;
3825 let slot = induction_bucket[v] as usize;
3826 sa[slot] = (p - 1) | (((distinct_names[v] != d) as SaSint) << (SAINT_BIT - 1));
3827 distinct_names[v] = d;
3828 }
3829
3830 if i == 0 {
3831 break;
3832 }
3833 i -= 1;
3834 }
3835
3836 d
3837}
3838
3839#[doc(hidden)]
3841pub fn partial_sorting_scan_right_to_left_8u_block_prepare(
3842 t: &[u8],
3843 sa: &[SaSint],
3844 k: SaSint,
3845 buckets: &mut [SaSint],
3846 cache: &mut [ThreadCache],
3847 omp_block_start: FastSint,
3848 omp_block_size: FastSint,
3849) -> (FastSint, FastSint) {
3850 let k_usize = usize::try_from(k).expect("k must be non-negative");
3851 let (induction_bucket, distinct_names_all) = buckets.split_at_mut(2 * k_usize);
3852 let distinct_names = &mut distinct_names_all[..2 * k_usize];
3853 induction_bucket.fill(0);
3854 distinct_names.fill(0);
3855
3856 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
3857 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
3858 let mut count = 0usize;
3859 let mut d = 1;
3860
3861 let mut i = start + size;
3862 while i > start {
3863 i -= 1;
3864
3865 let mut p = sa[i];
3866 cache[count].index = p;
3867 d += SaSint::from(p < 0);
3868 p &= SAINT_MAX;
3869
3870 let p_usize = usize::try_from(p).expect("suffix index must be non-negative");
3871 let v = buckets_index2(
3872 t[p_usize - 1] as usize,
3873 usize::from(t[p_usize - 2] > t[p_usize - 1]),
3874 );
3875
3876 cache[count].symbol = v as SaSint;
3877 induction_bucket[v] += 1;
3878 distinct_names[v] = d;
3879 count += 1;
3880 }
3881
3882 ((d - 1) as FastSint, count as FastSint)
3883}
3884
3885#[doc(hidden)]
3887pub fn partial_sorting_scan_right_to_left_8u_block_place(
3888 sa: &mut [SaSint],
3889 buckets: &mut [SaSint],
3890 k: SaSint,
3891 cache: &[ThreadCache],
3892 count: FastSint,
3893 mut d: SaSint,
3894) {
3895 let split = 2 * usize::try_from(k).expect("k must be non-negative");
3896 let (induction_bucket, distinct_names) = buckets.split_at_mut(split);
3897
3898 let count = usize::try_from(count).expect("count must be non-negative");
3899 for entry in &cache[..count] {
3900 let p = entry.index;
3901 d += SaSint::from(p < 0);
3902 let v = usize::try_from(entry.symbol).expect("cache symbol must be non-negative");
3903 induction_bucket[v] -= 1;
3904 let slot = usize::try_from(induction_bucket[v]).expect("bucket slot must be non-negative");
3905 sa[slot] = (p - 1) | (((distinct_names[v] != d) as SaSint) << (SAINT_BIT - 1));
3906 distinct_names[v] = d;
3907 }
3908}
3909
3910#[doc(hidden)]
3912pub fn partial_gsa_scan_right_to_left_8u_block_place(
3913 sa: &mut [SaSint],
3914 buckets: &mut [SaSint],
3915 k: SaSint,
3916 cache: &[ThreadCache],
3917 count: FastSint,
3918 mut d: SaSint,
3919) {
3920 let split = 2 * usize::try_from(k).expect("k must be non-negative");
3921 let (induction_bucket, distinct_names) = buckets.split_at_mut(split);
3922
3923 let count = usize::try_from(count).expect("count must be non-negative");
3924 for entry in &cache[..count] {
3925 let p = entry.index;
3926 d += SaSint::from(p < 0);
3927 let v = usize::try_from(entry.symbol).expect("cache symbol must be non-negative");
3928 if v != 1 {
3929 induction_bucket[v] -= 1;
3930 let slot =
3931 usize::try_from(induction_bucket[v]).expect("bucket slot must be non-negative");
3932 sa[slot] = (p - 1) | (((distinct_names[v] != d) as SaSint) << (SAINT_BIT - 1));
3933 distinct_names[v] = d;
3934 }
3935 }
3936}
3937
3938#[doc(hidden)]
3940pub fn partial_sorting_scan_right_to_left_8u_block_omp(
3941 t: &[u8],
3942 sa: &mut [SaSint],
3943 k: SaSint,
3944 buckets: &mut [SaSint],
3945 d: SaSint,
3946 block_start: FastSint,
3947 block_size: FastSint,
3948 threads: SaSint,
3949 thread_state: &mut [ThreadState],
3950) -> SaSint {
3951 let mut d = d;
3952 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
3953 let k_usize = usize::try_from(k).expect("k must be non-negative");
3954 let omp_num_threads = if threads > 1 && block_size_usize >= 64 * k_usize.max(256) {
3955 usize::try_from(threads)
3956 .expect("threads must be non-negative")
3957 .min(thread_state.len())
3958 .max(1)
3959 } else {
3960 1
3961 };
3962 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
3963
3964 if omp_num_threads == 1 {
3965 return partial_sorting_scan_right_to_left_8u(t, sa, buckets, d, block_start, block_size);
3966 }
3967
3968 for omp_thread_num in 0..omp_num_threads {
3969 let mut omp_block_start = omp_thread_num * omp_block_stride;
3970 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
3971 omp_block_stride
3972 } else {
3973 block_size_usize - omp_block_start
3974 };
3975 omp_block_start += usize::try_from(block_start).expect("block_start must be non-negative");
3976
3977 let state = &mut thread_state[omp_thread_num];
3978 let (position, count) = partial_sorting_scan_right_to_left_8u_block_prepare(
3979 t,
3980 sa,
3981 k,
3982 &mut state.buckets,
3983 &mut state.cache,
3984 FastSint::try_from(omp_block_start).expect("block start must fit FastSint"),
3985 FastSint::try_from(omp_block_size).expect("block size must fit FastSint"),
3986 );
3987 state.position = position;
3988 state.count = count;
3989 }
3990
3991 let distinct_offset = 2 * ALPHABET_SIZE;
3992 let (induction_bucket, distinct_tail) = buckets.split_at_mut(distinct_offset);
3993 let distinct_names = &mut distinct_tail[..2 * k_usize];
3994
3995 for tnum in (0..omp_num_threads).rev() {
3996 let state = &mut thread_state[tnum];
3997 let (temp_induction_bucket, temp_tail) = state.buckets.split_at_mut(2 * k_usize);
3998 let temp_distinct_names = &mut temp_tail[..2 * k_usize];
3999
4000 for c in 0..2 * k_usize {
4001 let a = induction_bucket[c];
4002 let b = temp_induction_bucket[c];
4003 induction_bucket[c] = a - b;
4004 temp_induction_bucket[c] = a;
4005 }
4006
4007 d -= 1;
4008 for c in 0..2 * k_usize {
4009 let a = distinct_names[c];
4010 let b = temp_distinct_names[c];
4011 let next_d = b + d;
4012 distinct_names[c] = if b > 0 { next_d } else { a };
4013 temp_distinct_names[c] = a;
4014 }
4015 d += 1 + SaSint::try_from(state.position).expect("position must fit SaSint");
4016 state.position = FastSint::try_from(d).expect("d must fit FastSint") - state.position;
4017 }
4018
4019 for tnum in 0..omp_num_threads {
4020 let state = &mut thread_state[tnum];
4021 partial_sorting_scan_right_to_left_8u_block_place(
4022 sa,
4023 &mut state.buckets,
4024 k,
4025 &state.cache,
4026 state.count,
4027 state.position as SaSint,
4028 );
4029 }
4030
4031 d
4032}
4033
4034#[doc(hidden)]
4036pub fn partial_gsa_scan_right_to_left_8u_block_omp(
4037 t: &[u8],
4038 sa: &mut [SaSint],
4039 k: SaSint,
4040 buckets: &mut [SaSint],
4041 d: SaSint,
4042 block_start: FastSint,
4043 block_size: FastSint,
4044 threads: SaSint,
4045 thread_state: &mut [ThreadState],
4046) -> SaSint {
4047 let mut d = d;
4048 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
4049 let k_usize = usize::try_from(k).expect("k must be non-negative");
4050 let omp_num_threads = if threads > 1 && block_size_usize >= 64 * k_usize.max(256) {
4051 usize::try_from(threads)
4052 .expect("threads must be non-negative")
4053 .min(thread_state.len())
4054 .max(1)
4055 } else {
4056 1
4057 };
4058 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
4059
4060 if omp_num_threads == 1 {
4061 return partial_gsa_scan_right_to_left_8u(t, sa, buckets, d, block_start, block_size);
4062 }
4063
4064 for omp_thread_num in 0..omp_num_threads {
4065 let mut omp_block_start = omp_thread_num * omp_block_stride;
4066 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4067 omp_block_stride
4068 } else {
4069 block_size_usize - omp_block_start
4070 };
4071 omp_block_start += usize::try_from(block_start).expect("block_start must be non-negative");
4072
4073 let state = &mut thread_state[omp_thread_num];
4074 let (position, count) = partial_sorting_scan_right_to_left_8u_block_prepare(
4075 t,
4076 sa,
4077 k,
4078 &mut state.buckets,
4079 &mut state.cache,
4080 FastSint::try_from(omp_block_start).expect("block start must fit FastSint"),
4081 FastSint::try_from(omp_block_size).expect("block size must fit FastSint"),
4082 );
4083 state.position = position;
4084 state.count = count;
4085 }
4086
4087 let distinct_offset = 2 * ALPHABET_SIZE;
4088 let (induction_bucket, distinct_tail) = buckets.split_at_mut(distinct_offset);
4089 let distinct_names = &mut distinct_tail[..2 * k_usize];
4090
4091 for tnum in (0..omp_num_threads).rev() {
4092 let state = &mut thread_state[tnum];
4093 let (temp_induction_bucket, temp_tail) = state.buckets.split_at_mut(2 * k_usize);
4094 let temp_distinct_names = &mut temp_tail[..2 * k_usize];
4095
4096 for c in 0..2 * k_usize {
4097 let a = induction_bucket[c];
4098 let b = temp_induction_bucket[c];
4099 induction_bucket[c] = a - b;
4100 temp_induction_bucket[c] = a;
4101 }
4102
4103 d -= 1;
4104 for c in 0..2 * k_usize {
4105 let a = distinct_names[c];
4106 let b = temp_distinct_names[c];
4107 let next_d = b + d;
4108 distinct_names[c] = if b > 0 { next_d } else { a };
4109 temp_distinct_names[c] = a;
4110 }
4111 d += 1 + SaSint::try_from(state.position).expect("position must fit SaSint");
4112 state.position = FastSint::try_from(d).expect("d must fit FastSint") - state.position;
4113 }
4114
4115 for tnum in 0..omp_num_threads {
4116 let state = &mut thread_state[tnum];
4117 partial_gsa_scan_right_to_left_8u_block_place(
4118 sa,
4119 &mut state.buckets,
4120 k,
4121 &state.cache,
4122 state.count,
4123 state.position as SaSint,
4124 );
4125 }
4126
4127 d
4128}
4129
4130#[doc(hidden)]
4132pub fn partial_sorting_scan_right_to_left_8u_omp(
4133 t: &[u8],
4134 sa: &mut [SaSint],
4135 n: SaSint,
4136 k: SaSint,
4137 buckets: &mut [SaSint],
4138 first_lms_suffix: SaSint,
4139 left_suffixes_count: SaSint,
4140 mut d: SaSint,
4141 threads: SaSint,
4142 thread_state: &mut [ThreadState],
4143) {
4144 let scan_start = left_suffixes_count as FastSint + 1;
4145 let scan_end = n as FastSint - first_lms_suffix as FastSint;
4146
4147 if threads == 1 || (scan_end - scan_start) < 65_536 {
4148 let _ = partial_sorting_scan_right_to_left_8u(
4149 t,
4150 sa,
4151 buckets,
4152 d,
4153 scan_start,
4154 scan_end - scan_start,
4155 );
4156 return;
4157 }
4158
4159 let distinct_offset = 2 * ALPHABET_SIZE;
4160
4161 let mut block_start = usize::try_from(scan_end - 1).expect("scan end must be positive");
4162 let scan_start_usize = usize::try_from(scan_start).expect("scan_start must be non-negative");
4163 let threads_usize = usize::try_from(threads)
4164 .expect("threads must be non-negative")
4165 .min(thread_state.len())
4166 .max(1);
4167
4168 while block_start >= scan_start_usize {
4169 if sa[block_start] == 0 {
4170 if block_start == 0 {
4171 break;
4172 }
4173 block_start -= 1;
4174 } else {
4175 let mut block_max_end = block_start.saturating_sub(
4176 threads_usize * (LIBSAIS_PER_THREAD_CACHE_SIZE - 16 * threads_usize),
4177 );
4178 if block_max_end + 1 < scan_start_usize {
4179 block_max_end = scan_start_usize.saturating_sub(1);
4180 }
4181 let mut block_end = block_start - 1;
4182 while block_end > block_max_end && sa[block_end] != 0 {
4183 block_end -= 1;
4184 }
4185 let block_size = block_start - block_end;
4186
4187 if block_size < 32 {
4188 while block_start > block_end {
4189 let p = sa[block_start];
4190 d += SaSint::from(p < 0);
4191 let p = p & SAINT_MAX;
4192 let v = buckets_index2(
4193 t[(p - 1) as usize] as usize,
4194 usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
4195 );
4196 buckets[v] -= 1;
4197 let slot =
4198 usize::try_from(buckets[v]).expect("bucket slot must be non-negative");
4199 sa[slot] = (p - 1)
4200 | (((buckets[distinct_offset + v] != d) as SaSint) << (SAINT_BIT - 1));
4201 buckets[distinct_offset + v] = d;
4202
4203 if block_start == 0 {
4204 break;
4205 }
4206 block_start -= 1;
4207 }
4208 } else {
4209 d = partial_sorting_scan_right_to_left_8u_block_omp(
4210 t,
4211 sa,
4212 k,
4213 buckets,
4214 d,
4215 FastSint::try_from(block_end + 1).expect("block start must fit FastSint"),
4216 FastSint::try_from(block_size).expect("block size must fit FastSint"),
4217 threads,
4218 thread_state,
4219 );
4220 block_start = block_end;
4221 }
4222 }
4223 }
4224}
4225
4226#[doc(hidden)]
4228pub fn partial_gsa_scan_right_to_left_8u_omp(
4229 t: &[u8],
4230 sa: &mut [SaSint],
4231 n: SaSint,
4232 k: SaSint,
4233 buckets: &mut [SaSint],
4234 first_lms_suffix: SaSint,
4235 left_suffixes_count: SaSint,
4236 mut d: SaSint,
4237 threads: SaSint,
4238 thread_state: &mut [ThreadState],
4239) {
4240 let scan_start = left_suffixes_count as FastSint + 1;
4241 let scan_end = n as FastSint - first_lms_suffix as FastSint;
4242
4243 if threads == 1 || (scan_end - scan_start) < 65_536 {
4244 let _ =
4245 partial_gsa_scan_right_to_left_8u(t, sa, buckets, d, scan_start, scan_end - scan_start);
4246 return;
4247 }
4248
4249 let distinct_offset = 2 * ALPHABET_SIZE;
4250 let mut block_start = usize::try_from(scan_end - 1).expect("scan end must be positive");
4251 let scan_start_usize = usize::try_from(scan_start).expect("scan_start must be non-negative");
4252 let threads_usize = usize::try_from(threads)
4253 .expect("threads must be non-negative")
4254 .min(thread_state.len())
4255 .max(1);
4256
4257 while block_start >= scan_start_usize {
4258 if sa[block_start] == 0 {
4259 if block_start == 0 {
4260 break;
4261 }
4262 block_start -= 1;
4263 } else {
4264 let mut block_max_end = block_start.saturating_sub(
4265 threads_usize * (LIBSAIS_PER_THREAD_CACHE_SIZE - 16 * threads_usize),
4266 );
4267 if block_max_end + 1 < scan_start_usize {
4268 block_max_end = scan_start_usize.saturating_sub(1);
4269 }
4270 let mut block_end = block_start - 1;
4271 while block_end > block_max_end && sa[block_end] != 0 {
4272 block_end -= 1;
4273 }
4274 let block_size = block_start - block_end;
4275
4276 if block_size < 32 {
4277 while block_start > block_end {
4278 let p = sa[block_start];
4279 d += SaSint::from(p < 0);
4280 let p = p & SAINT_MAX;
4281 let v = buckets_index2(
4282 t[(p - 1) as usize] as usize,
4283 usize::from(t[(p - 2) as usize] > t[(p - 1) as usize]),
4284 );
4285 if v != 1 {
4286 buckets[v] -= 1;
4287 let slot =
4288 usize::try_from(buckets[v]).expect("bucket slot must be non-negative");
4289 sa[slot] = (p - 1)
4290 | (((buckets[distinct_offset + v] != d) as SaSint) << (SAINT_BIT - 1));
4291 buckets[distinct_offset + v] = d;
4292 }
4293
4294 if block_start == 0 {
4295 break;
4296 }
4297 block_start -= 1;
4298 }
4299 } else {
4300 d = partial_gsa_scan_right_to_left_8u_block_omp(
4301 t,
4302 sa,
4303 k,
4304 buckets,
4305 d,
4306 FastSint::try_from(block_end + 1).expect("block start must fit FastSint"),
4307 FastSint::try_from(block_size).expect("block size must fit FastSint"),
4308 threads,
4309 thread_state,
4310 );
4311 block_start = block_end;
4312 }
4313 }
4314 }
4315}
4316
4317#[doc(hidden)]
4319pub fn partial_sorting_scan_right_to_left_32s_6k(
4320 t: &[SaSint],
4321 sa: &mut [SaSint],
4322 buckets: &mut [SaSint],
4323 mut d: SaSint,
4324 omp_block_start: FastSint,
4325 omp_block_size: FastSint,
4326) -> SaSint {
4327 if omp_block_size <= 0 {
4328 return d;
4329 }
4330
4331 let prefetch_distance: FastSint = 64;
4332 let mut i = omp_block_start + omp_block_size - 1;
4333 let mut j = omp_block_start + 2 * prefetch_distance + 1;
4334
4335 while i >= j {
4336 let mut p0 = sa[i as usize];
4337 d += SaSint::from(p0 < 0);
4338 p0 &= SAINT_MAX;
4339 let p0u = p0 as usize;
4340 let v0 = buckets_index4(t[p0u - 1] as usize, usize::from(t[p0u - 2] > t[p0u - 1]));
4341 buckets[v0] -= 1;
4342 let slot0 = buckets[v0] as usize;
4343 sa[slot0] = (p0 - 1) | (((buckets[2 + v0] != d) as SaSint) << (SAINT_BIT - 1));
4344 buckets[2 + v0] = d;
4345
4346 let mut p1 = sa[(i - 1) as usize];
4347 d += SaSint::from(p1 < 0);
4348 p1 &= SAINT_MAX;
4349 let p1u = p1 as usize;
4350 let v1 = buckets_index4(t[p1u - 1] as usize, usize::from(t[p1u - 2] > t[p1u - 1]));
4351 buckets[v1] -= 1;
4352 let slot1 = buckets[v1] as usize;
4353 sa[slot1] = (p1 - 1) | (((buckets[2 + v1] != d) as SaSint) << (SAINT_BIT - 1));
4354 buckets[2 + v1] = d;
4355
4356 i -= 2;
4357 }
4358
4359 j -= 2 * prefetch_distance + 1;
4360 while i >= j {
4361 let mut p = sa[i as usize];
4362 d += SaSint::from(p < 0);
4363 p &= SAINT_MAX;
4364 let pu = p as usize;
4365 let v = buckets_index4(t[pu - 1] as usize, usize::from(t[pu - 2] > t[pu - 1]));
4366
4367 buckets[v] -= 1;
4368 let slot = buckets[v] as usize;
4369 sa[slot] = (p - 1) | (((buckets[2 + v] != d) as SaSint) << (SAINT_BIT - 1));
4370 buckets[2 + v] = d;
4371 i -= 1;
4372 }
4373
4374 d
4375}
4376
4377#[doc(hidden)]
4379pub fn partial_sorting_scan_right_to_left_32s_4k(
4380 t: &[SaSint],
4381 sa: &mut [SaSint],
4382 k: SaSint,
4383 buckets: &mut [SaSint],
4384 mut d: SaSint,
4385 omp_block_start: FastSint,
4386 omp_block_size: FastSint,
4387) -> SaSint {
4388 if omp_block_size <= 0 {
4389 return d;
4390 }
4391
4392 let k_usize = usize::try_from(k).expect("k must be non-negative");
4393 let prefetch_distance: FastSint = 64;
4394 let induction_offset = 3 * k_usize;
4395
4396 let mut i = omp_block_start + omp_block_size - 1;
4397 let mut j = omp_block_start + 2 * prefetch_distance + 1;
4398
4399 while i >= j {
4400 let i0 = i as usize;
4401 let mut p0 = sa[i0];
4402 if p0 > 0 {
4403 sa[i0] = 0;
4404 d += p0 >> (SUFFIX_GROUP_BIT - 1);
4405 p0 &= !SUFFIX_GROUP_MARKER;
4406
4407 let p0u = p0 as usize;
4408 let c0 = t[p0u - 1];
4409 let f0 = usize::from(t[p0u - 2] > c0);
4410 let v0 = buckets_index2(c0 as usize, f0);
4411 let c0u = c0 as usize;
4412 buckets[induction_offset + c0u] -= 1;
4413 let slot0 = buckets[induction_offset + c0u] as usize;
4414 sa[slot0] = (p0 - 1)
4415 | ((f0 as SaSint) << (SAINT_BIT - 1))
4416 | (((buckets[v0] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
4417 buckets[v0] = d;
4418 }
4419
4420 let i1 = (i - 1) as usize;
4421 let mut p1 = sa[i1];
4422 if p1 > 0 {
4423 sa[i1] = 0;
4424 d += p1 >> (SUFFIX_GROUP_BIT - 1);
4425 p1 &= !SUFFIX_GROUP_MARKER;
4426
4427 let p1u = p1 as usize;
4428 let c1 = t[p1u - 1];
4429 let f1 = usize::from(t[p1u - 2] > c1);
4430 let v1 = buckets_index2(c1 as usize, f1);
4431 let c1u = c1 as usize;
4432 buckets[induction_offset + c1u] -= 1;
4433 let slot1 = buckets[induction_offset + c1u] as usize;
4434 sa[slot1] = (p1 - 1)
4435 | ((f1 as SaSint) << (SAINT_BIT - 1))
4436 | (((buckets[v1] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
4437 buckets[v1] = d;
4438 }
4439
4440 i -= 2;
4441 }
4442
4443 j -= 2 * prefetch_distance + 1;
4444 while i >= j {
4445 let iu = i as usize;
4446 let mut p = sa[iu];
4447 if p > 0 {
4448 sa[iu] = 0;
4449 d += p >> (SUFFIX_GROUP_BIT - 1);
4450 p &= !SUFFIX_GROUP_MARKER;
4451
4452 let pu = p as usize;
4453 let c = t[pu - 1];
4454 let f = usize::from(t[pu - 2] > c);
4455 let v = buckets_index2(c as usize, f);
4456 let cu = c as usize;
4457 buckets[induction_offset + cu] -= 1;
4458 let slot = buckets[induction_offset + cu] as usize;
4459 sa[slot] = (p - 1)
4460 | ((f as SaSint) << (SAINT_BIT - 1))
4461 | (((buckets[v] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
4462 buckets[v] = d;
4463 }
4464 i -= 1;
4465 }
4466
4467 d
4468}
4469
4470#[doc(hidden)]
4472pub fn partial_sorting_scan_right_to_left_32s_1k(
4473 t: &[SaSint],
4474 sa: &mut [SaSint],
4475 induction_bucket: &mut [SaSint],
4476 omp_block_start: FastSint,
4477 omp_block_size: FastSint,
4478) {
4479 if omp_block_size <= 0 {
4480 return;
4481 }
4482
4483 let prefetch_distance = 64usize;
4484 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
4485 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
4486 let mut i = (start + size - 1) as isize;
4487 let mut j = (start + 2 * prefetch_distance + 1) as isize;
4488
4489 while i >= j {
4490 let p0 = sa[i as usize];
4491 if p0 > 0 {
4492 sa[i as usize] = 0;
4493 let p0_usize = usize::try_from(p0).expect("suffix index must be non-negative");
4494 let bucket_index0 =
4495 usize::try_from(t[p0_usize - 1]).expect("bucket symbol must be non-negative");
4496 induction_bucket[bucket_index0] -= 1;
4497 let slot0 = usize::try_from(induction_bucket[bucket_index0])
4498 .expect("bucket slot must be non-negative");
4499 sa[slot0] = (p0 - 1)
4500 | ((usize::from(t[p0_usize - 2] > t[p0_usize - 1]) as SaSint) << (SAINT_BIT - 1));
4501 }
4502 let p1 = sa[(i - 1) as usize];
4503 if p1 > 0 {
4504 sa[(i - 1) as usize] = 0;
4505 let p1_usize = usize::try_from(p1).expect("suffix index must be non-negative");
4506 let bucket_index1 =
4507 usize::try_from(t[p1_usize - 1]).expect("bucket symbol must be non-negative");
4508 induction_bucket[bucket_index1] -= 1;
4509 let slot1 = usize::try_from(induction_bucket[bucket_index1])
4510 .expect("bucket slot must be non-negative");
4511 sa[slot1] = (p1 - 1)
4512 | ((usize::from(t[p1_usize - 2] > t[p1_usize - 1]) as SaSint) << (SAINT_BIT - 1));
4513 }
4514
4515 i -= 2;
4516 }
4517
4518 j -= (2 * prefetch_distance + 1) as isize;
4519 while i >= j {
4520 let p = sa[i as usize];
4521 if p > 0 {
4522 sa[i as usize] = 0;
4523 let p_usize = usize::try_from(p).expect("suffix index must be non-negative");
4524 let bucket_index =
4525 usize::try_from(t[p_usize - 1]).expect("bucket symbol must be non-negative");
4526 induction_bucket[bucket_index] -= 1;
4527 let slot = usize::try_from(induction_bucket[bucket_index])
4528 .expect("bucket slot must be non-negative");
4529 sa[slot] = (p - 1)
4530 | ((usize::from(t[p_usize - 2] > t[p_usize - 1]) as SaSint) << (SAINT_BIT - 1));
4531 }
4532 if i == 0 {
4533 break;
4534 }
4535 i -= 1;
4536 }
4537}
4538
4539#[doc(hidden)]
4541pub fn partial_sorting_scan_right_to_left_32s_6k_block_gather(
4542 t: &[SaSint],
4543 sa: &[SaSint],
4544 cache: &mut [ThreadCache],
4545 omp_block_start: FastSint,
4546 omp_block_size: FastSint,
4547) {
4548 if omp_block_size <= 0 {
4549 return;
4550 }
4551
4552 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
4553 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
4554 for offset in 0..size {
4555 let i = start + offset;
4556 let mut p = sa[i];
4557 let mut symbol = 0usize;
4558 p &= SAINT_MAX;
4559 if p != 0 {
4560 let p_usize = usize::try_from(p).expect("suffix index must be non-negative");
4561 symbol = buckets_index4(
4562 usize::try_from(t[p_usize - 1]).expect("bucket symbol must be non-negative"),
4563 usize::from(t[p_usize - 2] > t[p_usize - 1]),
4564 );
4565 }
4566 cache[offset].index = sa[i];
4567 cache[offset].symbol = symbol as SaSint;
4568 }
4569}
4570
4571#[doc(hidden)]
4573pub fn partial_sorting_scan_right_to_left_32s_4k_block_gather(
4574 t: &[SaSint],
4575 sa: &mut [SaSint],
4576 cache: &mut [ThreadCache],
4577 omp_block_start: FastSint,
4578 omp_block_size: FastSint,
4579) {
4580 if omp_block_size <= 0 {
4581 return;
4582 }
4583
4584 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
4585 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
4586 for offset in 0..size {
4587 let i = start + offset;
4588 let mut symbol = SAINT_MIN;
4589 let mut p = sa[i];
4590 if p > 0 {
4591 sa[i] = 0;
4592 cache[offset].index = p;
4593 p &= !SUFFIX_GROUP_MARKER;
4594 let p_usize = usize::try_from(p).expect("suffix index must be non-negative");
4595 symbol = buckets_index2(
4596 usize::try_from(t[p_usize - 1]).expect("bucket symbol must be non-negative"),
4597 usize::from(t[p_usize - 2] > t[p_usize - 1]),
4598 ) as SaSint;
4599 }
4600 cache[offset].symbol = symbol;
4601 }
4602}
4603
4604#[doc(hidden)]
4606pub fn partial_sorting_scan_right_to_left_32s_1k_block_gather(
4607 t: &[SaSint],
4608 sa: &mut [SaSint],
4609 cache: &mut [ThreadCache],
4610 omp_block_start: FastSint,
4611 omp_block_size: FastSint,
4612) {
4613 if omp_block_size <= 0 {
4614 return;
4615 }
4616 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
4617 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
4618 for offset in 0..size {
4619 let i = start + offset;
4620 let mut symbol = SAINT_MIN;
4621 let p = sa[i];
4622 if p > 0 {
4623 sa[i] = 0;
4624 cache[offset].index = (p - 1)
4625 | ((usize::from(t[p as usize - 2] > t[p as usize - 1]) as SaSint)
4626 << (SAINT_BIT - 1));
4627 symbol = t[p as usize - 1];
4628 }
4629 cache[offset].symbol = symbol;
4630 }
4631}
4632
4633#[doc(hidden)]
4635pub fn partial_sorting_scan_right_to_left_32s_6k_block_sort(
4636 t: &[SaSint],
4637 buckets: &mut [SaSint],
4638 mut d: SaSint,
4639 cache: &mut [ThreadCache],
4640 omp_block_start: FastSint,
4641 omp_block_size: FastSint,
4642) -> SaSint {
4643 if omp_block_size <= 0 {
4644 return d;
4645 }
4646
4647 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
4648 let mut i = size;
4649 while i > 0 {
4650 i -= 1;
4651
4652 let v = usize::try_from(cache[i].symbol).expect("cache symbol must be non-negative");
4653 let p = cache[i].index;
4654 d += SaSint::from(p < 0);
4655 buckets[v] -= 1;
4656 let target = buckets[v];
4657 cache[i].symbol = target;
4658 cache[i].index = (p - 1) | (((buckets[2 + v] != d) as SaSint) << (SAINT_BIT - 1));
4659 buckets[2 + v] = d;
4660
4661 let block_end = omp_block_start as SaSint + omp_block_size as SaSint;
4662 if target >= omp_block_start as SaSint && target < block_end {
4663 let s = usize::try_from(target - omp_block_start as SaSint)
4664 .expect("cache slot must be non-negative");
4665 let q = cache[i].index & SAINT_MAX;
4666 let q_usize = usize::try_from(q).expect("suffix index must be non-negative");
4667 cache[s].index = cache[i].index;
4668 cache[s].symbol = buckets_index4(
4669 usize::try_from(t[q_usize - 1]).expect("bucket symbol must be non-negative"),
4670 usize::from(t[q_usize - 2] > t[q_usize - 1]),
4671 ) as SaSint;
4672 }
4673 }
4674
4675 d
4676}
4677
4678#[doc(hidden)]
4680pub fn partial_sorting_scan_right_to_left_32s_4k_block_sort(
4681 t: &[SaSint],
4682 k: SaSint,
4683 buckets: &mut [SaSint],
4684 mut d: SaSint,
4685 cache: &mut [ThreadCache],
4686 omp_block_start: FastSint,
4687 omp_block_size: FastSint,
4688) -> SaSint {
4689 if omp_block_size <= 0 {
4690 return d;
4691 }
4692
4693 let k_usize = usize::try_from(k).expect("k must be non-negative");
4694 let (distinct_names, tail) = buckets.split_at_mut(2 * k_usize);
4695 let induction_bucket = &mut tail[k_usize..2 * k_usize];
4696
4697 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
4698 let mut i = size;
4699 while i > 0 {
4700 i -= 1;
4701
4702 let v = cache[i].symbol;
4703 if v >= 0 {
4704 let p = cache[i].index;
4705 d += p >> (SUFFIX_GROUP_BIT - 1);
4706 let bucket_index = usize::try_from(v >> 1).expect("bucket symbol must be non-negative");
4707 induction_bucket[bucket_index] -= 1;
4708 let target = induction_bucket[bucket_index];
4709 cache[i].symbol = target;
4710 cache[i].index = (p - 1)
4711 | ((v & 1) << (SAINT_BIT - 1))
4712 | (((distinct_names
4713 [usize::try_from(v).expect("bucket symbol must be non-negative")]
4714 != d) as SaSint)
4715 << (SUFFIX_GROUP_BIT - 1));
4716 distinct_names[usize::try_from(v).expect("bucket symbol must be non-negative")] = d;
4717
4718 let block_end = omp_block_start as SaSint + omp_block_size as SaSint;
4719 if target >= omp_block_start as SaSint && target < block_end {
4720 let ni = usize::try_from(target - omp_block_start as SaSint)
4721 .expect("cache slot must be non-negative");
4722 let mut np = cache[i].index;
4723 if np > 0 {
4724 cache[i].index = 0;
4725 cache[ni].index = np;
4726 np &= !SUFFIX_GROUP_MARKER;
4727 let np_usize = usize::try_from(np).expect("suffix index must be non-negative");
4728 cache[ni].symbol = buckets_index2(
4729 usize::try_from(t[np_usize - 1])
4730 .expect("bucket symbol must be non-negative"),
4731 usize::from(t[np_usize - 2] > t[np_usize - 1]),
4732 ) as SaSint;
4733 }
4734 }
4735 }
4736 }
4737
4738 d
4739}
4740
4741#[doc(hidden)]
4743pub fn partial_sorting_scan_right_to_left_32s_1k_block_sort(
4744 t: &[SaSint],
4745 induction_bucket: &mut [SaSint],
4746 cache: &mut [ThreadCache],
4747 omp_block_start: FastSint,
4748 omp_block_size: FastSint,
4749) {
4750 if omp_block_size <= 0 {
4751 return;
4752 }
4753 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
4754 let mut offset = size;
4755
4756 while offset > 0 {
4757 offset -= 1;
4758 let v = cache[offset].symbol;
4759 if v >= 0 {
4760 let bucket_index = v as usize;
4761 induction_bucket[bucket_index] -= 1;
4762 let target = induction_bucket[bucket_index];
4763 cache[offset].symbol = target;
4764 let block_end = omp_block_start as SaSint + omp_block_size as SaSint;
4765 if target >= omp_block_start as SaSint && target < block_end {
4766 let ni = usize::try_from(target - omp_block_start as SaSint)
4767 .expect("cache slot must be non-negative");
4768 let np = cache[offset].index;
4769 if np > 0 {
4770 cache[offset].index = 0;
4771 cache[ni].index = (np - 1)
4772 | ((usize::from(t[np as usize - 2] > t[np as usize - 1]) as SaSint)
4773 << (SAINT_BIT - 1));
4774 cache[ni].symbol = t[np as usize - 1];
4775 }
4776 }
4777 }
4778 }
4779}
4780
4781#[doc(hidden)]
4783pub fn partial_sorting_scan_right_to_left_32s_6k_block_omp(
4784 t: &[SaSint],
4785 sa: &mut [SaSint],
4786 buckets: &mut [SaSint],
4787 mut d: SaSint,
4788 cache: &mut [ThreadCache],
4789 block_start: FastSint,
4790 block_size: FastSint,
4791 threads: SaSint,
4792) -> SaSint {
4793 if block_size <= 0 {
4794 return d;
4795 }
4796 if threads == 1 || block_size < 16_384 {
4797 return partial_sorting_scan_right_to_left_32s_6k(
4798 t,
4799 sa,
4800 buckets,
4801 d,
4802 block_start,
4803 block_size,
4804 );
4805 }
4806
4807 let threads_usize = usize::try_from(threads)
4808 .expect("threads must be non-negative")
4809 .max(1);
4810 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
4811 let omp_num_threads = threads_usize.min(block_size_usize.max(1));
4812 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
4813
4814 for omp_thread_num in 0..omp_num_threads {
4815 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4816 omp_block_stride
4817 } else {
4818 block_size_usize - omp_thread_num * omp_block_stride
4819 };
4820 let omp_block_start = usize::try_from(block_start)
4821 .expect("block_start must be non-negative")
4822 + omp_thread_num * omp_block_stride;
4823 if omp_block_size > 0 {
4824 partial_sorting_scan_right_to_left_32s_6k_block_gather(
4825 t,
4826 sa,
4827 &mut cache[omp_thread_num * omp_block_stride
4828 ..omp_thread_num * omp_block_stride + omp_block_size],
4829 omp_block_start as FastSint,
4830 omp_block_size as FastSint,
4831 );
4832 }
4833 }
4834
4835 d = partial_sorting_scan_right_to_left_32s_6k_block_sort(
4836 t,
4837 buckets,
4838 d,
4839 &mut cache[..block_size_usize],
4840 block_start,
4841 block_size,
4842 );
4843
4844 for omp_thread_num in 0..omp_num_threads {
4845 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4846 omp_block_stride
4847 } else {
4848 block_size_usize - omp_thread_num * omp_block_stride
4849 };
4850 let cache_start = omp_thread_num * omp_block_stride;
4851 if omp_block_size > 0 {
4852 place_cached_suffixes(sa, &cache[cache_start..], 0, omp_block_size as FastSint);
4853 }
4854 }
4855
4856 d
4857}
4858
4859#[doc(hidden)]
4861pub fn partial_sorting_scan_right_to_left_32s_4k_block_omp(
4862 t: &[SaSint],
4863 sa: &mut [SaSint],
4864 k: SaSint,
4865 buckets: &mut [SaSint],
4866 mut d: SaSint,
4867 cache: &mut [ThreadCache],
4868 block_start: FastSint,
4869 block_size: FastSint,
4870 threads: SaSint,
4871) -> SaSint {
4872 if block_size <= 0 {
4873 return d;
4874 }
4875 if threads == 1 || block_size < 16_384 {
4876 return partial_sorting_scan_right_to_left_32s_4k(
4877 t,
4878 sa,
4879 k,
4880 buckets,
4881 d,
4882 block_start,
4883 block_size,
4884 );
4885 }
4886
4887 let threads_usize = usize::try_from(threads)
4888 .expect("threads must be non-negative")
4889 .max(1);
4890 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
4891 let omp_num_threads = threads_usize.min(block_size_usize.max(1));
4892 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
4893
4894 for omp_thread_num in 0..omp_num_threads {
4895 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4896 omp_block_stride
4897 } else {
4898 block_size_usize - omp_thread_num * omp_block_stride
4899 };
4900 let omp_block_start = usize::try_from(block_start)
4901 .expect("block_start must be non-negative")
4902 + omp_thread_num * omp_block_stride;
4903 if omp_block_size > 0 {
4904 partial_sorting_scan_right_to_left_32s_4k_block_gather(
4905 t,
4906 sa,
4907 &mut cache[omp_thread_num * omp_block_stride
4908 ..omp_thread_num * omp_block_stride + omp_block_size],
4909 omp_block_start as FastSint,
4910 omp_block_size as FastSint,
4911 );
4912 }
4913 }
4914
4915 d = partial_sorting_scan_right_to_left_32s_4k_block_sort(
4916 t,
4917 k,
4918 buckets,
4919 d,
4920 &mut cache[..block_size_usize],
4921 block_start,
4922 block_size,
4923 );
4924
4925 for omp_thread_num in 0..omp_num_threads {
4926 let omp_block_start = omp_thread_num * omp_block_stride;
4927 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4928 omp_block_stride
4929 } else {
4930 block_size_usize - omp_block_start
4931 };
4932 if omp_block_size > 0 {
4933 compact_and_place_cached_suffixes(
4934 sa,
4935 &mut cache[omp_block_start..],
4936 0,
4937 omp_block_size as FastSint,
4938 );
4939 }
4940 }
4941
4942 d
4943}
4944
4945#[doc(hidden)]
4947pub fn partial_sorting_scan_right_to_left_32s_1k_block_omp(
4948 t: &[SaSint],
4949 sa: &mut [SaSint],
4950 buckets: &mut [SaSint],
4951 cache: &mut [ThreadCache],
4952 block_start: FastSint,
4953 block_size: FastSint,
4954 threads: SaSint,
4955) {
4956 if block_size <= 0 {
4957 return;
4958 }
4959 if threads == 1 || block_size < 16_384 {
4960 partial_sorting_scan_right_to_left_32s_1k(t, sa, buckets, block_start, block_size);
4961 return;
4962 }
4963
4964 let threads_usize = usize::try_from(threads)
4965 .expect("threads must be non-negative")
4966 .max(1);
4967 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
4968 let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
4969 let omp_num_threads = threads_usize.min(block_size_usize.max(1));
4970 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
4971
4972 for omp_thread_num in 0..omp_num_threads {
4973 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
4974 omp_block_stride
4975 } else {
4976 block_size_usize - omp_thread_num * omp_block_stride
4977 };
4978 let omp_block_start = block_start_usize + omp_thread_num * omp_block_stride;
4979 if omp_block_size > 0 {
4980 partial_sorting_scan_right_to_left_32s_1k_block_gather(
4981 t,
4982 sa,
4983 &mut cache[omp_thread_num * omp_block_stride
4984 ..omp_thread_num * omp_block_stride + omp_block_size],
4985 omp_block_start as FastSint,
4986 omp_block_size as FastSint,
4987 );
4988 }
4989 }
4990
4991 let cache = &mut cache[..block_size_usize];
4992 partial_sorting_scan_right_to_left_32s_1k_block_sort(
4993 t,
4994 buckets,
4995 cache,
4996 block_start,
4997 block_size,
4998 );
4999 for omp_thread_num in 0..omp_num_threads {
5000 let omp_block_start = omp_thread_num * omp_block_stride;
5001 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
5002 omp_block_stride
5003 } else {
5004 block_size_usize - omp_block_start
5005 };
5006 if omp_block_size > 0 {
5007 compact_and_place_cached_suffixes(
5008 sa,
5009 &mut cache[omp_block_start..],
5010 0,
5011 omp_block_size as FastSint,
5012 );
5013 }
5014 }
5015}
5016
5017#[doc(hidden)]
5019pub fn partial_sorting_scan_left_to_right_32s_6k_block_gather(
5020 t: &[SaSint],
5021 sa: &mut [SaSint],
5022 cache: &mut [ThreadCache],
5023 omp_block_start: FastSint,
5024 omp_block_size: FastSint,
5025) {
5026 if omp_block_size <= 0 {
5027 return;
5028 }
5029
5030 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
5031 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
5032 for offset in 0..size {
5033 let i = start + offset;
5034 let p = sa[i];
5035 cache[offset].index = p;
5036 let q = p & SAINT_MAX;
5037 cache[offset].symbol = if q != 0 {
5038 buckets_index4(
5039 usize::try_from(t[q as usize - 1]).expect("bucket symbol must be non-negative"),
5040 usize::from(t[q as usize - 2] >= t[q as usize - 1]),
5041 ) as SaSint
5042 } else {
5043 0
5044 };
5045 }
5046}
5047
5048#[doc(hidden)]
5050pub fn partial_sorting_scan_left_to_right_32s_4k_block_gather(
5051 t: &[SaSint],
5052 sa: &mut [SaSint],
5053 cache: &mut [ThreadCache],
5054 omp_block_start: FastSint,
5055 omp_block_size: FastSint,
5056) {
5057 if omp_block_size <= 0 {
5058 return;
5059 }
5060
5061 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
5062 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
5063 for offset in 0..size {
5064 let i = start + offset;
5065 let mut symbol = SAINT_MIN;
5066 let mut p = sa[i];
5067 if p > 0 {
5068 cache[offset].index = p;
5069 p &= !SUFFIX_GROUP_MARKER;
5070 let p_usize = usize::try_from(p).expect("suffix index must be non-negative");
5071 symbol = buckets_index2(
5072 usize::try_from(t[p_usize - 1]).expect("bucket symbol must be non-negative"),
5073 usize::from(t[p_usize - 2] < t[p_usize - 1]),
5074 ) as SaSint;
5075 p = 0;
5076 }
5077 cache[offset].symbol = symbol;
5078 sa[i] = p & SAINT_MAX;
5079 }
5080}
5081
5082#[doc(hidden)]
5084pub fn partial_sorting_scan_left_to_right_32s_1k_block_gather(
5085 t: &[SaSint],
5086 sa: &mut [SaSint],
5087 cache: &mut [ThreadCache],
5088 omp_block_start: FastSint,
5089 omp_block_size: FastSint,
5090) {
5091 if omp_block_size <= 0 {
5092 return;
5093 }
5094 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
5095 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
5096 for offset in 0..size {
5097 let i = start + offset;
5098 let mut symbol = SAINT_MIN;
5099 let mut p = sa[i];
5100 if p > 0 {
5101 cache[offset].index = (p - 1)
5102 | ((usize::from(t[p as usize - 2] < t[p as usize - 1]) as SaSint)
5103 << (SAINT_BIT - 1));
5104 symbol = t[p as usize - 1];
5105 p = 0;
5106 }
5107 cache[offset].symbol = symbol;
5108 sa[i] = p & SAINT_MAX;
5109 }
5110}
5111
5112#[doc(hidden)]
5114pub fn partial_sorting_scan_left_to_right_32s_6k_block_sort(
5115 t: &[SaSint],
5116 buckets: &mut [SaSint],
5117 mut d: SaSint,
5118 cache: &mut [ThreadCache],
5119 omp_block_start: FastSint,
5120 omp_block_size: FastSint,
5121) -> SaSint {
5122 if omp_block_size <= 0 {
5123 return d;
5124 }
5125
5126 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
5127 let block_end =
5128 start + usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
5129
5130 let mut i = start;
5131 let mut j = block_end.saturating_sub(65);
5132 while i < j {
5133 let cache_i0 = i - start;
5134 let cache_i1 = cache_i0 + 1;
5135
5136 let v0 =
5137 usize::try_from(cache[cache_i0].symbol).expect("cache symbol must be non-negative");
5138 let p0 = cache[cache_i0].index;
5139 d += SaSint::from(p0 < 0);
5140 cache[cache_i0].symbol = buckets[v0];
5141 buckets[v0] += 1;
5142 cache[cache_i0].index =
5143 (p0 - 1) | ((SaSint::from(buckets[2 + v0] != d)) << (SAINT_BIT - 1));
5144 buckets[2 + v0] = d;
5145 if cache[cache_i0].symbol >= omp_block_start as SaSint
5146 && cache[cache_i0].symbol < block_end as SaSint
5147 {
5148 let s = usize::try_from(cache[cache_i0].symbol - omp_block_start as SaSint)
5149 .expect("cache slot must be non-negative");
5150 let q = cache[cache_i0].index & SAINT_MAX;
5151 cache[s].index = cache[cache_i0].index;
5152 let q_usize = usize::try_from(q).expect("suffix index must be non-negative");
5153 cache[s].symbol = buckets_index4(
5154 usize::try_from(t[q_usize - 1]).expect("bucket symbol must be non-negative"),
5155 usize::from(t[q_usize - 2] >= t[q_usize - 1]),
5156 ) as SaSint;
5157 }
5158
5159 let v1 =
5160 usize::try_from(cache[cache_i1].symbol).expect("cache symbol must be non-negative");
5161 let p1 = cache[cache_i1].index;
5162 d += SaSint::from(p1 < 0);
5163 cache[cache_i1].symbol = buckets[v1];
5164 buckets[v1] += 1;
5165 cache[cache_i1].index =
5166 (p1 - 1) | ((SaSint::from(buckets[2 + v1] != d)) << (SAINT_BIT - 1));
5167 buckets[2 + v1] = d;
5168 if cache[cache_i1].symbol >= omp_block_start as SaSint
5169 && cache[cache_i1].symbol < block_end as SaSint
5170 {
5171 let s = usize::try_from(cache[cache_i1].symbol - omp_block_start as SaSint)
5172 .expect("cache slot must be non-negative");
5173 let q = cache[cache_i1].index & SAINT_MAX;
5174 cache[s].index = cache[cache_i1].index;
5175 let q_usize = usize::try_from(q).expect("suffix index must be non-negative");
5176 cache[s].symbol = buckets_index4(
5177 usize::try_from(t[q_usize - 1]).expect("bucket symbol must be non-negative"),
5178 usize::from(t[q_usize - 2] >= t[q_usize - 1]),
5179 ) as SaSint;
5180 }
5181
5182 i += 2;
5183 }
5184
5185 j += 65;
5186 while i < j {
5187 let cache_i = i - start;
5188 let v = usize::try_from(cache[cache_i].symbol).expect("cache symbol must be non-negative");
5189 let p = cache[cache_i].index;
5190 d += SaSint::from(p < 0);
5191 cache[cache_i].symbol = buckets[v];
5192 buckets[v] += 1;
5193 cache[cache_i].index = (p - 1) | ((SaSint::from(buckets[2 + v] != d)) << (SAINT_BIT - 1));
5194 buckets[2 + v] = d;
5195 if cache[cache_i].symbol >= omp_block_start as SaSint
5196 && cache[cache_i].symbol < block_end as SaSint
5197 {
5198 let s = usize::try_from(cache[cache_i].symbol - omp_block_start as SaSint)
5199 .expect("cache slot must be non-negative");
5200 let q = cache[cache_i].index & SAINT_MAX;
5201 cache[s].index = cache[cache_i].index;
5202 let q_usize = usize::try_from(q).expect("suffix index must be non-negative");
5203 cache[s].symbol = buckets_index4(
5204 usize::try_from(t[q_usize - 1]).expect("bucket symbol must be non-negative"),
5205 usize::from(t[q_usize - 2] >= t[q_usize - 1]),
5206 ) as SaSint;
5207 }
5208 i += 1;
5209 }
5210
5211 d
5212}
5213
5214#[doc(hidden)]
5216pub fn partial_sorting_scan_left_to_right_32s_4k_block_sort(
5217 t: &[SaSint],
5218 k: SaSint,
5219 buckets: &mut [SaSint],
5220 mut d: SaSint,
5221 cache: &mut [ThreadCache],
5222 omp_block_start: FastSint,
5223 omp_block_size: FastSint,
5224) -> SaSint {
5225 if omp_block_size <= 0 {
5226 return d;
5227 }
5228
5229 let k_usize = usize::try_from(k).expect("k must be non-negative");
5230 let (distinct_names, tail) = buckets.split_at_mut(2 * k_usize);
5231 let induction_bucket = &mut tail[..k_usize];
5232
5233 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
5234 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
5235 let block_end = start + size;
5236
5237 for offset in 0..size {
5238 let v = cache[offset].symbol;
5239 if v >= 0 {
5240 let p = cache[offset].index;
5241 d += p >> (SUFFIX_GROUP_BIT - 1);
5242
5243 let bucket_index = usize::try_from(v >> 1).expect("bucket index must be non-negative");
5244 let v_usize = usize::try_from(v).expect("cache symbol must be non-negative");
5245 let target = induction_bucket[bucket_index];
5246 induction_bucket[bucket_index] += 1;
5247
5248 cache[offset].symbol = target;
5249 cache[offset].index = (p - 1)
5250 | ((v & 1) << (SAINT_BIT - 1))
5251 | (((distinct_names[v_usize] != d) as SaSint) << (SUFFIX_GROUP_BIT - 1));
5252 distinct_names[v_usize] = d;
5253
5254 if target >= omp_block_start as SaSint && target < block_end as SaSint {
5255 let ni = usize::try_from(target - omp_block_start as SaSint)
5256 .expect("cache slot must be non-negative");
5257 let mut np = cache[offset].index;
5258 if np > 0 {
5259 cache[ni].index = np;
5260 np &= !SUFFIX_GROUP_MARKER;
5261 let np_usize = usize::try_from(np).expect("suffix index must be non-negative");
5262 cache[ni].symbol = buckets_index2(
5263 usize::try_from(t[np_usize - 1])
5264 .expect("bucket symbol must be non-negative"),
5265 usize::from(t[np_usize - 2] < t[np_usize - 1]),
5266 ) as SaSint;
5267 np = 0;
5268 }
5269 cache[offset].index = np & SAINT_MAX;
5270 }
5271 }
5272 }
5273
5274 d
5275}
5276
5277#[doc(hidden)]
5279pub fn partial_sorting_scan_left_to_right_32s_1k_block_sort(
5280 t: &[SaSint],
5281 induction_bucket: &mut [SaSint],
5282 cache: &mut [ThreadCache],
5283 omp_block_start: FastSint,
5284 omp_block_size: FastSint,
5285) {
5286 if omp_block_size <= 0 {
5287 return;
5288 }
5289 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
5290 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
5291 let block_end = start + size;
5292
5293 for offset in 0..size {
5294 let v = cache[offset].symbol;
5295 if v >= 0 {
5296 let v_usize = v as usize;
5297 let target = induction_bucket[v_usize];
5298 cache[offset].symbol = target;
5299 induction_bucket[v_usize] += 1;
5300 if target >= omp_block_start as SaSint && target < block_end as SaSint {
5301 let ni = usize::try_from(target - omp_block_start as SaSint)
5302 .expect("cache slot must be non-negative");
5303 let mut np = cache[offset].index;
5304 if np > 0 {
5305 cache[ni].index = (np - 1)
5306 | ((usize::from(t[np as usize - 2] < t[np as usize - 1]) as SaSint)
5307 << (SAINT_BIT - 1));
5308 cache[ni].symbol = t[np as usize - 1];
5309 np = 0;
5310 }
5311 cache[offset].index = np & SAINT_MAX;
5312 }
5313 }
5314 }
5315}
5316
5317#[doc(hidden)]
5319pub fn partial_sorting_scan_left_to_right_32s_6k_block_omp(
5320 t: &[SaSint],
5321 sa: &mut [SaSint],
5322 buckets: &mut [SaSint],
5323 d: SaSint,
5324 cache: &mut [ThreadCache],
5325 block_start: FastSint,
5326 block_size: FastSint,
5327 threads: SaSint,
5328) -> SaSint {
5329 if block_size <= 0 {
5330 return d;
5331 }
5332 if threads == 1 || block_size < 16_384 {
5333 return partial_sorting_scan_left_to_right_32s_6k(
5334 t,
5335 sa,
5336 buckets,
5337 d,
5338 block_start,
5339 block_size,
5340 );
5341 }
5342
5343 let threads_usize = usize::try_from(threads)
5344 .expect("threads must be non-negative")
5345 .max(1);
5346 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
5347 let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
5348 let omp_num_threads = threads_usize.min(block_size_usize.max(1));
5349 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
5350
5351 for omp_thread_num in 0..omp_num_threads {
5352 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
5353 omp_block_stride
5354 } else {
5355 block_size_usize - omp_thread_num * omp_block_stride
5356 };
5357 let omp_block_start = block_start_usize + omp_thread_num * omp_block_stride;
5358 if omp_block_size > 0 {
5359 partial_sorting_scan_left_to_right_32s_6k_block_gather(
5360 t,
5361 sa,
5362 &mut cache[omp_thread_num * omp_block_stride
5363 ..omp_thread_num * omp_block_stride + omp_block_size],
5364 omp_block_start as FastSint,
5365 omp_block_size as FastSint,
5366 );
5367 }
5368 }
5369
5370 let d = partial_sorting_scan_left_to_right_32s_6k_block_sort(
5371 t,
5372 buckets,
5373 d,
5374 &mut cache[..block_size_usize],
5375 block_start,
5376 block_size,
5377 );
5378
5379 for omp_thread_num in 0..omp_num_threads {
5380 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
5381 omp_block_stride
5382 } else {
5383 block_size_usize - omp_thread_num * omp_block_stride
5384 };
5385 if omp_block_size > 0 {
5386 place_cached_suffixes(
5387 sa,
5388 &cache[omp_thread_num * omp_block_stride..],
5389 0,
5390 omp_block_size as FastSint,
5391 );
5392 }
5393 }
5394 d
5395}
5396
5397#[doc(hidden)]
5399pub fn partial_sorting_scan_left_to_right_32s_4k_block_omp(
5400 t: &[SaSint],
5401 sa: &mut [SaSint],
5402 k: SaSint,
5403 buckets: &mut [SaSint],
5404 d: SaSint,
5405 cache: &mut [ThreadCache],
5406 block_start: FastSint,
5407 block_size: FastSint,
5408 threads: SaSint,
5409) -> SaSint {
5410 if block_size <= 0 {
5411 return d;
5412 }
5413 if threads == 1 || block_size < 16_384 {
5414 return partial_sorting_scan_left_to_right_32s_4k(
5415 t,
5416 sa,
5417 k,
5418 buckets,
5419 d,
5420 block_start,
5421 block_size,
5422 );
5423 }
5424
5425 let threads_usize = usize::try_from(threads)
5426 .expect("threads must be non-negative")
5427 .max(1);
5428 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
5429 let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
5430 let omp_num_threads = threads_usize.min(block_size_usize.max(1));
5431 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
5432
5433 for omp_thread_num in 0..omp_num_threads {
5434 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
5435 omp_block_stride
5436 } else {
5437 block_size_usize - omp_thread_num * omp_block_stride
5438 };
5439 let omp_block_start = block_start_usize + omp_thread_num * omp_block_stride;
5440 if omp_block_size > 0 {
5441 partial_sorting_scan_left_to_right_32s_4k_block_gather(
5442 t,
5443 sa,
5444 &mut cache[omp_thread_num * omp_block_stride
5445 ..omp_thread_num * omp_block_stride + omp_block_size],
5446 omp_block_start as FastSint,
5447 omp_block_size as FastSint,
5448 );
5449 }
5450 }
5451
5452 let cache = &mut cache[..block_size_usize];
5453 let d = partial_sorting_scan_left_to_right_32s_4k_block_sort(
5454 t,
5455 k,
5456 buckets,
5457 d,
5458 cache,
5459 block_start,
5460 block_size,
5461 );
5462
5463 for omp_thread_num in 0..omp_num_threads {
5464 let omp_block_start = omp_thread_num * omp_block_stride;
5465 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
5466 omp_block_stride
5467 } else {
5468 block_size_usize - omp_block_start
5469 };
5470 if omp_block_size > 0 {
5471 compact_and_place_cached_suffixes(
5472 sa,
5473 &mut cache[omp_block_start..],
5474 0,
5475 omp_block_size as FastSint,
5476 );
5477 }
5478 }
5479
5480 d
5481}
5482
5483#[doc(hidden)]
5485pub fn partial_sorting_scan_left_to_right_32s_1k_block_omp(
5486 t: &[SaSint],
5487 sa: &mut [SaSint],
5488 buckets: &mut [SaSint],
5489 cache: &mut [ThreadCache],
5490 block_start: FastSint,
5491 block_size: FastSint,
5492 threads: SaSint,
5493) {
5494 if block_size <= 0 {
5495 return;
5496 }
5497 if threads == 1 || block_size < 16_384 {
5498 partial_sorting_scan_left_to_right_32s_1k(t, sa, buckets, block_start, block_size);
5499 return;
5500 }
5501
5502 let threads_usize = usize::try_from(threads)
5503 .expect("threads must be non-negative")
5504 .max(1);
5505 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
5506 let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
5507 let omp_num_threads = threads_usize.min(block_size_usize.max(1));
5508 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
5509
5510 for omp_thread_num in 0..omp_num_threads {
5511 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
5512 omp_block_stride
5513 } else {
5514 block_size_usize - omp_thread_num * omp_block_stride
5515 };
5516 let omp_block_start = block_start_usize + omp_thread_num * omp_block_stride;
5517 if omp_block_size > 0 {
5518 partial_sorting_scan_left_to_right_32s_1k_block_gather(
5519 t,
5520 sa,
5521 &mut cache[omp_thread_num * omp_block_stride
5522 ..omp_thread_num * omp_block_stride + omp_block_size],
5523 omp_block_start as FastSint,
5524 omp_block_size as FastSint,
5525 );
5526 }
5527 }
5528
5529 let cache = &mut cache[..block_size_usize];
5530 partial_sorting_scan_left_to_right_32s_1k_block_sort(
5531 t,
5532 buckets,
5533 cache,
5534 block_start,
5535 block_size,
5536 );
5537 for omp_thread_num in 0..omp_num_threads {
5538 let omp_block_start = omp_thread_num * omp_block_stride;
5539 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
5540 omp_block_stride
5541 } else {
5542 block_size_usize - omp_block_start
5543 };
5544 if omp_block_size > 0 {
5545 compact_and_place_cached_suffixes(
5546 sa,
5547 &mut cache[omp_block_start..],
5548 0,
5549 omp_block_size as FastSint,
5550 );
5551 }
5552 }
5553}
5554
5555#[doc(hidden)]
5557pub fn partial_sorting_scan_right_to_left_32s_6k_omp(
5558 t: &[SaSint],
5559 sa: &mut [SaSint],
5560 n: SaSint,
5561 buckets: &mut [SaSint],
5562 first_lms_suffix: SaSint,
5563 left_suffixes_count: SaSint,
5564 mut d: SaSint,
5565 threads: SaSint,
5566 thread_state: &mut [ThreadState],
5567) -> SaSint {
5568 let scan_start = left_suffixes_count as FastSint + 1;
5569 let scan_end = n as FastSint - first_lms_suffix as FastSint;
5570 if threads == 1 || (scan_end - scan_start) < 65_536 {
5571 return partial_sorting_scan_right_to_left_32s_6k(
5572 t,
5573 sa,
5574 buckets,
5575 d,
5576 scan_start,
5577 scan_end - scan_start,
5578 );
5579 }
5580 if thread_state.is_empty() {
5581 return partial_sorting_scan_right_to_left_32s_6k(
5582 t,
5583 sa,
5584 buckets,
5585 d,
5586 scan_start,
5587 scan_end - scan_start,
5588 );
5589 }
5590
5591 let threads_usize = usize::try_from(threads)
5592 .expect("threads must be non-negative")
5593 .max(1);
5594 let mut cache = vec![ThreadCache::default(); threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE];
5595 let mut block_start = scan_end - 1;
5596 let block_span = FastSint::try_from(threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE)
5597 .expect("block span must fit FastSint");
5598 while block_start >= scan_start {
5599 let mut block_end = block_start - block_span;
5600 if block_end < scan_start {
5601 block_end = scan_start - 1;
5602 }
5603
5604 d = partial_sorting_scan_right_to_left_32s_6k_block_omp(
5605 t,
5606 sa,
5607 buckets,
5608 d,
5609 &mut cache,
5610 block_end + 1,
5611 block_start - block_end,
5612 threads,
5613 );
5614
5615 if block_end < scan_start {
5616 break;
5617 }
5618 block_start = block_end;
5619 }
5620
5621 d
5622}
5623
5624#[doc(hidden)]
5626pub fn partial_sorting_scan_right_to_left_32s_4k_omp(
5627 t: &[SaSint],
5628 sa: &mut [SaSint],
5629 n: SaSint,
5630 k: SaSint,
5631 buckets: &mut [SaSint],
5632 mut d: SaSint,
5633 threads: SaSint,
5634 thread_state: &mut [ThreadState],
5635) -> SaSint {
5636 if threads == 1 || n < 65_536 {
5637 return partial_sorting_scan_right_to_left_32s_4k(t, sa, k, buckets, d, 0, n as FastSint);
5638 }
5639 if thread_state.is_empty() {
5640 return partial_sorting_scan_right_to_left_32s_4k(t, sa, k, buckets, d, 0, n as FastSint);
5641 }
5642 let threads_usize = usize::try_from(threads)
5643 .expect("threads must be non-negative")
5644 .max(1);
5645 let mut cache = vec![ThreadCache::default(); threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE];
5646 let mut block_start = FastSint::try_from(n).expect("n must fit FastSint") - 1;
5647 let block_span = FastSint::try_from(threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE)
5648 .expect("block span must fit FastSint");
5649 while block_start >= 0 {
5650 let mut block_end = block_start - block_span;
5651 if block_end < 0 {
5652 block_end = -1;
5653 }
5654
5655 d = partial_sorting_scan_right_to_left_32s_4k_block_omp(
5656 t,
5657 sa,
5658 k,
5659 buckets,
5660 d,
5661 &mut cache,
5662 block_end + 1,
5663 block_start - block_end,
5664 threads,
5665 );
5666
5667 if block_end < 0 {
5668 break;
5669 }
5670 block_start = block_end;
5671 }
5672
5673 d
5674}
5675
5676#[doc(hidden)]
5678pub fn partial_sorting_scan_right_to_left_32s_1k_omp(
5679 t: &[SaSint],
5680 sa: &mut [SaSint],
5681 n: SaSint,
5682 buckets: &mut [SaSint],
5683 threads: SaSint,
5684 thread_state: &mut [ThreadState],
5685) {
5686 if threads == 1 || n < 65_536 {
5687 partial_sorting_scan_right_to_left_32s_1k(t, sa, buckets, 0, n as FastSint);
5688 return;
5689 }
5690 if thread_state.is_empty() {
5691 partial_sorting_scan_right_to_left_32s_1k(t, sa, buckets, 0, n as FastSint);
5692 return;
5693 }
5694
5695 let threads_usize = usize::try_from(threads)
5696 .expect("threads must be non-negative")
5697 .max(1);
5698 let mut cache = vec![ThreadCache::default(); threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE];
5699 let mut block_start = FastSint::try_from(n).expect("n must fit FastSint") - 1;
5700 let block_span = FastSint::try_from(threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE)
5701 .expect("block span must fit FastSint");
5702 while block_start >= 0 {
5703 let mut block_end = block_start - block_span;
5704 if block_end < 0 {
5705 block_end = -1;
5706 }
5707
5708 partial_sorting_scan_right_to_left_32s_1k_block_omp(
5709 t,
5710 sa,
5711 buckets,
5712 &mut cache,
5713 block_end + 1,
5714 block_start - block_end,
5715 threads,
5716 );
5717
5718 if block_end < 0 {
5719 break;
5720 }
5721 block_start = block_end;
5722 }
5723}
5724
5725#[doc(hidden)]
5727pub fn partial_sorting_gather_lms_suffixes_32s_4k(
5728 sa: &mut [SaSint],
5729 omp_block_start: FastSint,
5730 omp_block_size: FastSint,
5731) -> FastSint {
5732 if omp_block_size <= 0 {
5733 return omp_block_start;
5734 }
5735
5736 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
5737 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
5738 let mut l = start;
5739
5740 for i in start..start + size {
5741 let s = sa[i] as SaUint;
5742 sa[l] = ((s.wrapping_sub(SUFFIX_GROUP_MARKER as SaUint)) & !(SUFFIX_GROUP_MARKER as SaUint))
5743 as SaSint;
5744 l += usize::from((s as SaSint) < 0);
5745 }
5746
5747 l as FastSint
5748}
5749
5750#[doc(hidden)]
5752pub fn partial_sorting_gather_lms_suffixes_32s_1k(
5753 sa: &mut [SaSint],
5754 omp_block_start: FastSint,
5755 omp_block_size: FastSint,
5756) -> FastSint {
5757 if omp_block_size <= 0 {
5758 return omp_block_start;
5759 }
5760
5761 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
5762 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
5763 let mut l = start;
5764
5765 for i in start..start + size {
5766 let s = sa[i];
5767 sa[l] = s & SAINT_MAX;
5768 l += usize::from(s < 0);
5769 }
5770
5771 l as FastSint
5772}
5773
5774#[doc(hidden)]
5776pub fn partial_sorting_gather_lms_suffixes_32s_4k_omp(
5777 sa: &mut [SaSint],
5778 n: SaSint,
5779 threads: SaSint,
5780 thread_state: &mut [ThreadState],
5781) {
5782 let n_usize = usize::try_from(n).expect("n must be non-negative");
5783 let omp_num_threads = if threads > 1 && n >= 65_536 {
5784 usize::try_from(threads)
5785 .expect("threads must be non-negative")
5786 .min(thread_state.len())
5787 .max(1)
5788 } else {
5789 1
5790 };
5791
5792 if omp_num_threads == 1 {
5793 let _ = partial_sorting_gather_lms_suffixes_32s_4k(sa, 0, n as FastSint);
5794 return;
5795 }
5796
5797 let omp_block_stride = (n_usize / omp_num_threads) & !15usize;
5798 for (thread_num, state) in thread_state.iter_mut().take(omp_num_threads).enumerate() {
5799 let block_start = thread_num * omp_block_stride;
5800 let block_size = if thread_num + 1 < omp_num_threads {
5801 omp_block_stride
5802 } else {
5803 n_usize - block_start
5804 };
5805 state.position = block_start as FastSint;
5806 state.count = partial_sorting_gather_lms_suffixes_32s_4k(
5807 sa,
5808 block_start as FastSint,
5809 block_size as FastSint,
5810 ) - block_start as FastSint;
5811 }
5812
5813 let mut position = 0usize;
5814 for (thread_num, state) in thread_state.iter().take(omp_num_threads).enumerate() {
5815 let count = usize::try_from(state.count).expect("count must be non-negative");
5816 let src = usize::try_from(state.position).expect("position must be non-negative");
5817 if thread_num > 0 && count > 0 {
5818 sa.copy_within(src..src + count, position);
5819 }
5820 position += count;
5821 }
5822}
5823
5824#[doc(hidden)]
5826pub fn partial_sorting_gather_lms_suffixes_32s_1k_omp(
5827 sa: &mut [SaSint],
5828 n: SaSint,
5829 threads: SaSint,
5830 thread_state: &mut [ThreadState],
5831) {
5832 let n_usize = usize::try_from(n).expect("n must be non-negative");
5833 let omp_num_threads = if threads > 1 && n >= 65_536 {
5834 usize::try_from(threads)
5835 .expect("threads must be non-negative")
5836 .min(thread_state.len())
5837 .max(1)
5838 } else {
5839 1
5840 };
5841
5842 if omp_num_threads == 1 {
5843 let _ = partial_sorting_gather_lms_suffixes_32s_1k(sa, 0, n as FastSint);
5844 return;
5845 }
5846
5847 let omp_block_stride = (n_usize / omp_num_threads) & !15usize;
5848 for (thread_num, state) in thread_state.iter_mut().take(omp_num_threads).enumerate() {
5849 let block_start = thread_num * omp_block_stride;
5850 let block_size = if thread_num + 1 < omp_num_threads {
5851 omp_block_stride
5852 } else {
5853 n_usize - block_start
5854 };
5855 state.position = block_start as FastSint;
5856 state.count = partial_sorting_gather_lms_suffixes_32s_1k(
5857 sa,
5858 block_start as FastSint,
5859 block_size as FastSint,
5860 ) - block_start as FastSint;
5861 }
5862
5863 let mut position = 0usize;
5864 for (thread_num, state) in thread_state.iter().take(omp_num_threads).enumerate() {
5865 let count = usize::try_from(state.count).expect("count must be non-negative");
5866 let src = usize::try_from(state.position).expect("position must be non-negative");
5867 if thread_num > 0 && count > 0 {
5868 sa.copy_within(src..src + count, position);
5869 }
5870 position += count;
5871 }
5872}
5873
5874#[doc(hidden)]
5876pub fn induce_partial_order_8u_omp(
5877 t: &[u8],
5878 sa: &mut [SaSint],
5879 n: SaSint,
5880 k: SaSint,
5881 flags: SaSint,
5882 buckets: &mut [SaSint],
5883 first_lms_suffix: SaSint,
5884 left_suffixes_count: SaSint,
5885 threads: SaSint,
5886 thread_state: &mut [ThreadState],
5887) {
5888 buckets[2 * ALPHABET_SIZE..4 * ALPHABET_SIZE].fill(0);
5889
5890 if (flags & LIBSAIS_FLAGS_GSA) != 0 {
5891 let left = 4 * ALPHABET_SIZE + buckets_index2(0, 1);
5892 let right = 4 * ALPHABET_SIZE + buckets_index2(1, 1);
5893 buckets[left] = buckets[right] - 1;
5894 flip_suffix_markers_omp(sa, buckets[left], threads);
5895 }
5896
5897 let d = partial_sorting_scan_left_to_right_8u_omp(
5898 t,
5899 sa,
5900 n,
5901 k,
5902 buckets,
5903 left_suffixes_count,
5904 0,
5905 threads,
5906 thread_state,
5907 );
5908 partial_sorting_shift_markers_8u_omp(sa, n, buckets, threads);
5909
5910 if (flags & LIBSAIS_FLAGS_GSA) != 0 {
5911 partial_gsa_scan_right_to_left_8u_omp(
5912 t,
5913 sa,
5914 n,
5915 k,
5916 buckets,
5917 first_lms_suffix,
5918 left_suffixes_count,
5919 d,
5920 threads,
5921 thread_state,
5922 );
5923
5924 if t[usize::try_from(first_lms_suffix).expect("first_lms_suffix must be non-negative")] == 0
5925 {
5926 let count = usize::try_from(buckets[buckets_index2(1, 1)] - 1)
5927 .expect("count must be non-negative");
5928 sa.copy_within(0..count, 1);
5929 sa[0] = first_lms_suffix | SAINT_MIN;
5930 }
5931
5932 buckets[buckets_index2(0, 1)] = 0;
5933 } else {
5934 partial_sorting_scan_right_to_left_8u_omp(
5935 t,
5936 sa,
5937 n,
5938 k,
5939 buckets,
5940 first_lms_suffix,
5941 left_suffixes_count,
5942 d,
5943 threads,
5944 thread_state,
5945 );
5946 }
5947}
5948
5949#[doc(hidden)]
5951pub fn induce_partial_order_32s_6k_omp(
5952 t: &[SaSint],
5953 sa: &mut [SaSint],
5954 n: SaSint,
5955 k: SaSint,
5956 buckets: &mut [SaSint],
5957 first_lms_suffix: SaSint,
5958 left_suffixes_count: SaSint,
5959 threads: SaSint,
5960 thread_state: &mut [ThreadState],
5961) {
5962 let d = partial_sorting_scan_left_to_right_32s_6k_omp(
5963 t,
5964 sa,
5965 n,
5966 buckets,
5967 left_suffixes_count,
5968 0,
5969 threads,
5970 thread_state,
5971 );
5972 partial_sorting_shift_markers_32s_6k_omp(sa, k, buckets, threads);
5973 partial_sorting_shift_buckets_32s_6k(k, buckets);
5974 let _ = partial_sorting_scan_right_to_left_32s_6k_omp(
5975 t,
5976 sa,
5977 n,
5978 buckets,
5979 first_lms_suffix,
5980 left_suffixes_count,
5981 d,
5982 threads,
5983 thread_state,
5984 );
5985}
5986
5987#[doc(hidden)]
5989pub fn induce_partial_order_32s_4k_omp(
5990 t: &[SaSint],
5991 sa: &mut [SaSint],
5992 n: SaSint,
5993 k: SaSint,
5994 buckets: &mut [SaSint],
5995 threads: SaSint,
5996 thread_state: &mut [ThreadState],
5997) {
5998 let zero_len = 2 * usize::try_from(k).expect("k must be non-negative");
5999 buckets[..zero_len].fill(0);
6000
6001 let d = partial_sorting_scan_left_to_right_32s_4k_omp(
6002 t,
6003 sa,
6004 n,
6005 k,
6006 buckets,
6007 0,
6008 threads,
6009 thread_state,
6010 );
6011 partial_sorting_shift_markers_32s_4k(sa, n);
6012 let _ = partial_sorting_scan_right_to_left_32s_4k_omp(
6013 t,
6014 sa,
6015 n,
6016 k,
6017 buckets,
6018 d,
6019 threads,
6020 thread_state,
6021 );
6022 partial_sorting_gather_lms_suffixes_32s_4k_omp(sa, n, threads, thread_state);
6023}
6024
6025#[doc(hidden)]
6027pub fn induce_partial_order_32s_2k_omp(
6028 t: &[SaSint],
6029 sa: &mut [SaSint],
6030 n: SaSint,
6031 k: SaSint,
6032 buckets: &mut [SaSint],
6033 threads: SaSint,
6034 thread_state: &mut [ThreadState],
6035) {
6036 let k_usize = usize::try_from(k).expect("k must be non-negative");
6037 let (left, right) = buckets.split_at_mut(k_usize);
6038 partial_sorting_scan_left_to_right_32s_1k_omp(t, sa, n, right, threads, thread_state);
6039 partial_sorting_scan_right_to_left_32s_1k_omp(t, sa, n, left, threads, thread_state);
6040 partial_sorting_gather_lms_suffixes_32s_1k_omp(sa, n, threads, thread_state);
6041}
6042
6043#[doc(hidden)]
6045pub fn induce_partial_order_32s_1k_omp(
6046 t: &[SaSint],
6047 sa: &mut [SaSint],
6048 n: SaSint,
6049 k: SaSint,
6050 buckets: &mut [SaSint],
6051 threads: SaSint,
6052 thread_state: &mut [ThreadState],
6053) {
6054 count_suffixes_32s(t, n, k, buckets);
6055 initialize_buckets_start_32s_1k(k, buckets);
6056 partial_sorting_scan_left_to_right_32s_1k_omp(t, sa, n, buckets, threads, thread_state);
6057
6058 count_suffixes_32s(t, n, k, buckets);
6059 initialize_buckets_end_32s_1k(k, buckets);
6060 partial_sorting_scan_right_to_left_32s_1k_omp(t, sa, n, buckets, threads, thread_state);
6061
6062 partial_sorting_gather_lms_suffixes_32s_1k_omp(sa, n, threads, thread_state);
6063}
6064
6065#[doc(hidden)]
6067pub fn renumber_lms_suffixes_8u(
6068 sa: &mut [SaSint],
6069 m: SaSint,
6070 mut name: SaSint,
6071 omp_block_start: FastSint,
6072 omp_block_size: FastSint,
6073) -> SaSint {
6074 if omp_block_size <= 0 {
6075 return name;
6076 }
6077
6078 let m_usize = usize::try_from(m).expect("m must be non-negative");
6079 let (sa_head, sam) = sa.split_at_mut(m_usize);
6080 let mut i = omp_block_start;
6081 let mut j = omp_block_start + omp_block_size - 64 - 3;
6082
6083 while i < j {
6084 let i0 = i as usize;
6085 let p0 = sa_head[i0];
6086 let d0 = ((p0 & SAINT_MAX) >> 1) as usize;
6087 sam[d0] = name | SAINT_MIN;
6088 name += SaSint::from(p0 < 0);
6089
6090 let p1 = sa_head[i0 + 1];
6091 let d1 = ((p1 & SAINT_MAX) >> 1) as usize;
6092 sam[d1] = name | SAINT_MIN;
6093 name += SaSint::from(p1 < 0);
6094
6095 let p2 = sa_head[i0 + 2];
6096 let d2 = ((p2 & SAINT_MAX) >> 1) as usize;
6097 sam[d2] = name | SAINT_MIN;
6098 name += SaSint::from(p2 < 0);
6099
6100 let p3 = sa_head[i0 + 3];
6101 let d3 = ((p3 & SAINT_MAX) >> 1) as usize;
6102 sam[d3] = name | SAINT_MIN;
6103 name += SaSint::from(p3 < 0);
6104
6105 i += 4;
6106 }
6107
6108 j += 64 + 3;
6109 while i < j {
6110 let p = sa_head[i as usize];
6111 let d = ((p & SAINT_MAX) >> 1) as usize;
6112 sam[d] = name | SAINT_MIN;
6113 name += SaSint::from(p < 0);
6114 i += 1;
6115 }
6116
6117 name
6118}
6119
6120#[doc(hidden)]
6122pub fn gather_marked_lms_suffixes(
6123 sa: &mut [SaSint],
6124 m: SaSint,
6125 l: FastSint,
6126 omp_block_start: FastSint,
6127 omp_block_size: FastSint,
6128) -> FastSint {
6129 if omp_block_size <= 0 {
6130 return l;
6131 }
6132
6133 let mut l = l - 1;
6134 let mut i = m as FastSint + omp_block_start + omp_block_size - 1;
6135 let mut j = m as FastSint + omp_block_start + 3;
6136
6137 while i >= j {
6138 let i0 = i as usize;
6139 let s0 = sa[i0];
6140 sa[l as usize] = s0 & SAINT_MAX;
6141 l -= FastSint::from(s0 < 0);
6142
6143 let s1 = sa[i0 - 1];
6144 sa[l as usize] = s1 & SAINT_MAX;
6145 l -= FastSint::from(s1 < 0);
6146
6147 let s2 = sa[i0 - 2];
6148 sa[l as usize] = s2 & SAINT_MAX;
6149 l -= FastSint::from(s2 < 0);
6150
6151 let s3 = sa[i0 - 3];
6152 sa[l as usize] = s3 & SAINT_MAX;
6153 l -= FastSint::from(s3 < 0);
6154
6155 i -= 4;
6156 }
6157
6158 j -= 3;
6159 while i >= j {
6160 let s = sa[i as usize];
6161 sa[l as usize] = s & SAINT_MAX;
6162 l -= FastSint::from(s < 0);
6163 i -= 1;
6164 }
6165
6166 l + 1
6167}
6168
6169#[doc(hidden)]
6171pub fn renumber_lms_suffixes_8u_omp(
6172 sa: &mut [SaSint],
6173 m: SaSint,
6174 threads: SaSint,
6175 thread_state: &mut [ThreadState],
6176) -> SaSint {
6177 let mut name = 0;
6178 let omp_num_threads = if threads > 1 && m >= 65_536 {
6179 usize::try_from(threads)
6180 .expect("threads must be non-negative")
6181 .min(thread_state.len())
6182 .max(1)
6183 } else {
6184 1
6185 };
6186 let omp_block_stride = (m as FastSint / omp_num_threads as FastSint) & !15;
6187
6188 if omp_num_threads == 1 {
6189 name = renumber_lms_suffixes_8u(sa, m, 0, 0, m as FastSint);
6190 } else {
6191 for omp_thread_num in 0..omp_num_threads {
6192 let omp_block_start = omp_thread_num as FastSint * omp_block_stride;
6193 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
6194 omp_block_stride
6195 } else {
6196 m as FastSint - omp_block_start
6197 };
6198 thread_state[omp_thread_num].count =
6199 count_negative_marked_suffixes(sa, omp_block_start, omp_block_size) as FastSint;
6200 }
6201
6202 for omp_thread_num in 0..omp_num_threads {
6203 let omp_block_start = omp_thread_num as FastSint * omp_block_stride;
6204 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
6205 omp_block_stride
6206 } else {
6207 m as FastSint - omp_block_start
6208 };
6209
6210 let mut count: FastSint = 0;
6211 for t in 0..omp_thread_num {
6212 count += thread_state[t].count;
6213 }
6214
6215 if omp_thread_num + 1 == omp_num_threads {
6216 name = (count + thread_state[omp_thread_num].count) as SaSint;
6217 }
6218
6219 let _ =
6220 renumber_lms_suffixes_8u(sa, m, count as SaSint, omp_block_start, omp_block_size);
6221 }
6222 }
6223
6224 name
6225}
6226
6227#[doc(hidden)]
6229pub fn gather_marked_lms_suffixes_omp(
6230 sa: &mut [SaSint],
6231 n: SaSint,
6232 m: SaSint,
6233 fs: SaSint,
6234 threads: SaSint,
6235 thread_state: &mut [ThreadState],
6236) {
6237 let n_fast = n as FastSint;
6238 let m_fast = m as FastSint;
6239 let omp_num_threads = if threads > 1 && n >= 131_072 {
6240 usize::try_from(threads)
6241 .expect("threads must be non-negative")
6242 .min(thread_state.len())
6243 .max(1)
6244 } else {
6245 1
6246 };
6247 let omp_block_stride = ((n_fast >> 1) / omp_num_threads as FastSint) & !15;
6248
6249 if omp_num_threads == 1 {
6250 let _ = gather_marked_lms_suffixes(sa, m, n_fast + fs as FastSint, 0, n_fast >> 1);
6251 } else {
6252 for omp_thread_num in 0..omp_num_threads {
6253 let omp_block_start = omp_thread_num as FastSint * omp_block_stride;
6254 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
6255 omp_block_stride
6256 } else {
6257 (n_fast >> 1) - omp_block_start
6258 };
6259
6260 if omp_thread_num < omp_num_threads - 1 {
6261 thread_state[omp_thread_num].position = gather_marked_lms_suffixes(
6262 sa,
6263 m,
6264 m_fast + omp_block_start + omp_block_size,
6265 omp_block_start,
6266 omp_block_size,
6267 );
6268 thread_state[omp_thread_num].count = m_fast + omp_block_start + omp_block_size
6269 - thread_state[omp_thread_num].position;
6270 } else {
6271 thread_state[omp_thread_num].position = gather_marked_lms_suffixes(
6272 sa,
6273 m,
6274 n_fast + fs as FastSint,
6275 omp_block_start,
6276 omp_block_size,
6277 );
6278 thread_state[omp_thread_num].count =
6279 n_fast + fs as FastSint - thread_state[omp_thread_num].position;
6280 }
6281 }
6282
6283 let mut position = n_fast + fs as FastSint;
6284 for t in (0..omp_num_threads).rev() {
6285 position -= thread_state[t].count;
6286 if t + 1 != omp_num_threads && thread_state[t].count > 0 {
6287 let src = usize::try_from(thread_state[t].position)
6288 .expect("position must be non-negative");
6289 let len =
6290 usize::try_from(thread_state[t].count).expect("count must be non-negative");
6291 let dst = usize::try_from(position).expect("position must be non-negative");
6292 sa.copy_within(src..src + len, dst);
6293 }
6294 }
6295 }
6296}
6297
6298#[doc(hidden)]
6300pub fn renumber_and_gather_lms_suffixes_omp(
6301 sa: &mut [SaSint],
6302 n: SaSint,
6303 m: SaSint,
6304 fs: SaSint,
6305 threads: SaSint,
6306 thread_state: &mut [ThreadState],
6307) -> SaSint {
6308 let m_usize = usize::try_from(m).expect("m must be non-negative");
6309 let half_n = usize::try_from(n >> 1).expect("n must be non-negative");
6310 sa[m_usize..m_usize + half_n].fill(0);
6311
6312 let name = renumber_lms_suffixes_8u_omp(sa, m, threads, thread_state);
6313 if name < m {
6314 gather_marked_lms_suffixes_omp(sa, n, m, fs, threads, thread_state);
6315 } else {
6316 let mut i = 0;
6317 while i < m_usize {
6318 sa[i] &= SAINT_MAX;
6319 i += 1;
6320 }
6321 }
6322
6323 name
6324}
6325
6326#[doc(hidden)]
6328pub fn renumber_distinct_lms_suffixes_32s_4k(
6329 sa: &mut [SaSint],
6330 m: SaSint,
6331 mut name: SaSint,
6332 omp_block_start: FastSint,
6333 omp_block_size: FastSint,
6334) -> SaSint {
6335 if omp_block_size <= 0 {
6336 return name;
6337 }
6338
6339 let prefetch_distance = 64usize;
6340 let m_usize = usize::try_from(m).expect("m must be non-negative");
6341 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
6342 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
6343 let (sa_head, sam) = sa.split_at_mut(m_usize);
6344 let mut i = start;
6345 let mut j = start
6346 .saturating_add(size)
6347 .saturating_sub(prefetch_distance + 3);
6348 let mut p0;
6349 let mut p1;
6350 let mut p2;
6351 let mut p3 = 0;
6352
6353 while i < j {
6354 p0 = sa_head[i];
6355 sa_head[i] = p0 & SAINT_MAX;
6356 sam[(sa_head[i] >> 1) as usize] = name | (p0 & p3 & SAINT_MIN);
6357 name += SaSint::from(p0 < 0);
6358
6359 p1 = sa_head[i + 1];
6360 sa_head[i + 1] = p1 & SAINT_MAX;
6361 sam[(sa_head[i + 1] >> 1) as usize] = name | (p1 & p0 & SAINT_MIN);
6362 name += SaSint::from(p1 < 0);
6363
6364 p2 = sa_head[i + 2];
6365 sa_head[i + 2] = p2 & SAINT_MAX;
6366 sam[(sa_head[i + 2] >> 1) as usize] = name | (p2 & p1 & SAINT_MIN);
6367 name += SaSint::from(p2 < 0);
6368
6369 p3 = sa_head[i + 3];
6370 sa_head[i + 3] = p3 & SAINT_MAX;
6371 sam[(sa_head[i + 3] >> 1) as usize] = name | (p3 & p2 & SAINT_MIN);
6372 name += SaSint::from(p3 < 0);
6373
6374 i += 4;
6375 }
6376
6377 j = start + size;
6378 while i < j {
6379 p2 = p3;
6380 p3 = sa_head[i];
6381 sa_head[i] = p3 & SAINT_MAX;
6382 sam[(sa_head[i] >> 1) as usize] = name | (p3 & p2 & SAINT_MIN);
6383 name += SaSint::from(p3 < 0);
6384 i += 1;
6385 }
6386
6387 name
6388}
6389
6390#[doc(hidden)]
6392pub fn mark_distinct_lms_suffixes_32s(
6393 sa: &mut [SaSint],
6394 m: SaSint,
6395 omp_block_start: FastSint,
6396 omp_block_size: FastSint,
6397) {
6398 if omp_block_size <= 0 {
6399 return;
6400 }
6401
6402 let m_usize = usize::try_from(m).expect("m must be non-negative");
6403 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
6404 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
6405 let mut i = m_usize + start;
6406 let mut j = m_usize + start + size.saturating_sub(3);
6407 let mut p3 = 0;
6408
6409 while i < j {
6410 let mut p0 = sa[i];
6411 sa[i] = p0 & (p3 | SAINT_MAX);
6412 p0 = if p0 == 0 { p3 } else { p0 };
6413
6414 let mut p1 = sa[i + 1];
6415 sa[i + 1] = p1 & (p0 | SAINT_MAX);
6416 p1 = if p1 == 0 { p0 } else { p1 };
6417
6418 let mut p2 = sa[i + 2];
6419 sa[i + 2] = p2 & (p1 | SAINT_MAX);
6420 p2 = if p2 == 0 { p1 } else { p2 };
6421
6422 p3 = sa[i + 3];
6423 sa[i + 3] = p3 & (p2 | SAINT_MAX);
6424 p3 = if p3 == 0 { p2 } else { p3 };
6425
6426 i += 4;
6427 }
6428
6429 j = m_usize + start + size;
6430 while i < j {
6431 let p2 = p3;
6432 p3 = sa[i];
6433 sa[i] = p3 & (p2 | SAINT_MAX);
6434 p3 = if p3 == 0 { p2 } else { p3 };
6435 i += 1;
6436 }
6437}
6438
6439#[doc(hidden)]
6441pub fn clamp_lms_suffixes_length_32s(
6442 sa: &mut [SaSint],
6443 m: SaSint,
6444 omp_block_start: FastSint,
6445 omp_block_size: FastSint,
6446) {
6447 if omp_block_size <= 0 {
6448 return;
6449 }
6450
6451 let m_usize = usize::try_from(m).expect("m must be non-negative");
6452 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
6453 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
6454 let mut i = m_usize + start;
6455 let mut j = m_usize + start + size.saturating_sub(3);
6456
6457 while i < j {
6458 let s0 = sa[i];
6459 sa[i] = if s0 < 0 { s0 } else { 0 } & SAINT_MAX;
6460
6461 let s1 = sa[i + 1];
6462 sa[i + 1] = if s1 < 0 { s1 } else { 0 } & SAINT_MAX;
6463
6464 let s2 = sa[i + 2];
6465 sa[i + 2] = if s2 < 0 { s2 } else { 0 } & SAINT_MAX;
6466
6467 let s3 = sa[i + 3];
6468 sa[i + 3] = if s3 < 0 { s3 } else { 0 } & SAINT_MAX;
6469
6470 i += 4;
6471 }
6472
6473 j = m_usize + start + size;
6474 while i < j {
6475 let s = sa[i];
6476 sa[i] = if s < 0 { s } else { 0 } & SAINT_MAX;
6477 i += 1;
6478 }
6479}
6480
6481#[doc(hidden)]
6483pub fn renumber_distinct_lms_suffixes_32s_4k_omp(
6484 sa: &mut [SaSint],
6485 m: SaSint,
6486 threads: SaSint,
6487 thread_state: &mut [ThreadState],
6488) -> SaSint {
6489 let mut name = 0;
6490 let m_usize = usize::try_from(m).expect("m must be non-negative");
6491 let omp_num_threads = if threads > 1 && m >= 65_536 {
6492 usize::try_from(threads)
6493 .expect("threads must be non-negative")
6494 .min(thread_state.len())
6495 .max(1)
6496 } else {
6497 1
6498 };
6499 let omp_block_stride = (m_usize / omp_num_threads) & !15usize;
6500
6501 if omp_num_threads == 1 {
6502 let omp_block_start = 0usize;
6503 let omp_block_size = m_usize - omp_block_start;
6504 name = renumber_distinct_lms_suffixes_32s_4k(
6505 sa,
6506 m,
6507 1,
6508 omp_block_start as FastSint,
6509 omp_block_size as FastSint,
6510 );
6511 } else {
6512 for omp_thread_num in 0..omp_num_threads {
6513 let omp_block_start = omp_thread_num * omp_block_stride;
6514 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
6515 omp_block_stride
6516 } else {
6517 m_usize - omp_block_start
6518 };
6519 thread_state[omp_thread_num].count = count_negative_marked_suffixes(
6520 sa,
6521 omp_block_start as FastSint,
6522 omp_block_size as FastSint,
6523 ) as FastSint;
6524 }
6525
6526 for omp_thread_num in 0..omp_num_threads {
6527 let omp_block_start = omp_thread_num * omp_block_stride;
6528 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
6529 omp_block_stride
6530 } else {
6531 m_usize - omp_block_start
6532 };
6533
6534 let mut count: FastSint = 1;
6535 for t in 0..omp_thread_num {
6536 count += thread_state[t].count;
6537 }
6538
6539 if omp_thread_num + 1 == omp_num_threads {
6540 name = (count + thread_state[omp_thread_num].count) as SaSint;
6541 }
6542
6543 let _ = renumber_distinct_lms_suffixes_32s_4k(
6544 sa,
6545 m,
6546 count as SaSint,
6547 omp_block_start as FastSint,
6548 omp_block_size as FastSint,
6549 );
6550 }
6551 }
6552
6553 name - 1
6554}
6555
6556#[doc(hidden)]
6558pub fn mark_distinct_lms_suffixes_32s_omp(
6559 sa: &mut [SaSint],
6560 n: SaSint,
6561 m: SaSint,
6562 threads: SaSint,
6563) {
6564 let half_n = usize::try_from(n >> 1).expect("n must be non-negative");
6565 let omp_num_threads = if threads > 1 && n >= 131_072 {
6566 usize::try_from(threads)
6567 .expect("threads must be non-negative")
6568 .max(1)
6569 } else {
6570 1
6571 };
6572 let omp_block_stride = (half_n / omp_num_threads) & !15usize;
6573
6574 for omp_thread_num in 0..omp_num_threads {
6575 let omp_block_start = omp_thread_num * omp_block_stride;
6576 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
6577 omp_block_stride
6578 } else {
6579 half_n - omp_block_start
6580 };
6581 mark_distinct_lms_suffixes_32s(
6582 sa,
6583 m,
6584 omp_block_start as FastSint,
6585 omp_block_size as FastSint,
6586 );
6587 }
6588}
6589
6590#[doc(hidden)]
6592pub fn clamp_lms_suffixes_length_32s_omp(sa: &mut [SaSint], n: SaSint, m: SaSint, threads: SaSint) {
6593 let half_n = usize::try_from(n >> 1).expect("n must be non-negative");
6594 let omp_num_threads = if threads > 1 && n >= 131_072 {
6595 usize::try_from(threads)
6596 .expect("threads must be non-negative")
6597 .max(1)
6598 } else {
6599 1
6600 };
6601 let omp_block_stride = (half_n / omp_num_threads) & !15usize;
6602
6603 for omp_thread_num in 0..omp_num_threads {
6604 let omp_block_start = omp_thread_num * omp_block_stride;
6605 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
6606 omp_block_stride
6607 } else {
6608 half_n - omp_block_start
6609 };
6610 clamp_lms_suffixes_length_32s(
6611 sa,
6612 m,
6613 omp_block_start as FastSint,
6614 omp_block_size as FastSint,
6615 );
6616 }
6617}
6618
6619#[doc(hidden)]
6621pub fn renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(
6622 sa: &mut [SaSint],
6623 n: SaSint,
6624 m: SaSint,
6625 threads: SaSint,
6626 thread_state: &mut [ThreadState],
6627) -> SaSint {
6628 let m_usize = usize::try_from(m).expect("m must be non-negative");
6629 let half_n = usize::try_from(n >> 1).expect("n must be non-negative");
6630 sa[m_usize..m_usize + half_n].fill(0);
6631
6632 let name = renumber_distinct_lms_suffixes_32s_4k_omp(sa, m, threads, thread_state);
6633 if name < m {
6634 mark_distinct_lms_suffixes_32s_omp(sa, n, m, threads);
6635 }
6636
6637 name
6638}
6639
6640#[doc(hidden)]
6642pub fn renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(
6643 t: &[SaSint],
6644 sa: &mut [SaSint],
6645 n: SaSint,
6646 m: SaSint,
6647 threads: SaSint,
6648) -> SaSint {
6649 let m_usize = usize::try_from(m).expect("m must be non-negative");
6650 let n_usize = usize::try_from(n).expect("n must be non-negative");
6651
6652 let _ = gather_lms_suffixes_32s(t, sa, n);
6653
6654 let zero_len = n_usize
6655 .checked_sub(m_usize)
6656 .and_then(|v| v.checked_sub(m_usize))
6657 .expect("n must be at least 2*m");
6658 sa[m_usize..m_usize + zero_len].fill(0);
6659
6660 {
6661 let prefetch_distance: FastSint = 64;
6662 let mut i = n as FastSint - m as FastSint;
6663 let mut j = n as FastSint - 1 - prefetch_distance - 3;
6664
6665 while i < j {
6666 let iu = i as usize;
6667 let s0 = (sa[iu] as SaUint >> 1) as usize;
6668 let s1 = (sa[iu + 1] as SaUint >> 1) as usize;
6669 let s2 = (sa[iu + 2] as SaUint >> 1) as usize;
6670 let s3 = (sa[iu + 3] as SaUint >> 1) as usize;
6671
6672 sa[m_usize + s0] = sa[iu + 1] - sa[iu] + 1 + SAINT_MIN;
6673 sa[m_usize + s1] = sa[iu + 2] - sa[iu + 1] + 1 + SAINT_MIN;
6674 sa[m_usize + s2] = sa[iu + 3] - sa[iu + 2] + 1 + SAINT_MIN;
6675 sa[m_usize + s3] = sa[iu + 4] - sa[iu + 3] + 1 + SAINT_MIN;
6676 i += 4;
6677 }
6678
6679 j += prefetch_distance + 3;
6680 while i < j {
6681 let iu = i as usize;
6682 let s = (sa[iu] as SaUint >> 1) as usize;
6683 sa[m_usize + s] = sa[iu + 1] - sa[iu] + 1 + SAINT_MIN;
6684 i += 1;
6685 }
6686
6687 let tail = (sa[n_usize - 1] as SaUint >> 1) as usize;
6688 sa[m_usize + tail] = 1 + SAINT_MIN;
6689 }
6690
6691 clamp_lms_suffixes_length_32s_omp(sa, n, m, threads);
6692
6693 let mut name = 1;
6694 if m_usize > 0 {
6695 let (sa_head, sam) = sa.split_at_mut(m_usize);
6696 let mut i = 1usize;
6697 let prefetch_distance = 64usize;
6698 let mut j = m_usize.saturating_sub(prefetch_distance + 1);
6699 let mut p = usize::try_from(sa_head[0]).expect("suffix index must be non-negative");
6700 let mut plen = sam[p >> 1];
6701 let mut pdiff = SAINT_MIN;
6702
6703 while i < j {
6704 let q = usize::try_from(sa_head[i]).expect("suffix index must be non-negative");
6705 let qlen = sam[q >> 1];
6706 let mut qdiff = SAINT_MIN;
6707 if plen == qlen {
6708 let mut l = 0usize;
6709 while l < qlen as usize {
6710 if t[p + l] != t[q + l] {
6711 break;
6712 }
6713 l += 1;
6714 }
6715 qdiff = ((l as SaSint) - qlen) & SAINT_MIN;
6716 }
6717 sam[p >> 1] = name | (pdiff & qdiff);
6718 name += SaSint::from(qdiff < 0);
6719
6720 p = usize::try_from(sa_head[i + 1]).expect("suffix index must be non-negative");
6721 plen = sam[p >> 1];
6722 pdiff = SAINT_MIN;
6723 if qlen == plen {
6724 let mut l = 0usize;
6725 while l < plen as usize {
6726 if t[q + l] != t[p + l] {
6727 break;
6728 }
6729 l += 1;
6730 }
6731 pdiff = ((l as SaSint) - plen) & SAINT_MIN;
6732 }
6733 sam[q >> 1] = name | (qdiff & pdiff);
6734 name += SaSint::from(pdiff < 0);
6735 i += 2;
6736 }
6737
6738 j = m_usize;
6739 while i < j {
6740 let q = usize::try_from(sa_head[i]).expect("suffix index must be non-negative");
6741 let qlen = sam[q >> 1];
6742 let mut qdiff = SAINT_MIN;
6743 if plen == qlen {
6744 let mut l = 0usize;
6745 while l < plen as usize {
6746 if t[p + l] != t[q + l] {
6747 break;
6748 }
6749 l += 1;
6750 }
6751 qdiff = ((l as SaSint) - plen) & SAINT_MIN;
6752 }
6753 sam[p >> 1] = name | (pdiff & qdiff);
6754 name += SaSint::from(qdiff < 0);
6755
6756 p = q;
6757 plen = qlen;
6758 pdiff = qdiff;
6759 i += 1;
6760 }
6761
6762 sam[p >> 1] = name | pdiff;
6763 name += 1;
6764 }
6765
6766 if name <= m {
6767 mark_distinct_lms_suffixes_32s_omp(sa, n, m, threads);
6768 }
6769
6770 name - 1
6771}
6772
6773#[doc(hidden)]
6775pub fn reconstruct_lms_suffixes(
6776 sa: &mut [SaSint],
6777 n: SaSint,
6778 m: SaSint,
6779 omp_block_start: FastSint,
6780 omp_block_size: FastSint,
6781) {
6782 if omp_block_size <= 0 {
6783 return;
6784 }
6785
6786 let prefetch_distance: FastSint = 64;
6787 let base = (n - m) as usize;
6788 let mut i = omp_block_start;
6789 let mut j = omp_block_start + omp_block_size - prefetch_distance - 3;
6790
6791 while i < j {
6792 let iu = i as usize;
6793 let s0 = sa[iu] as usize;
6794 let s1 = sa[iu + 1] as usize;
6795 let s2 = sa[iu + 2] as usize;
6796 let s3 = sa[iu + 3] as usize;
6797 sa[iu] = sa[base + s0];
6798 sa[iu + 1] = sa[base + s1];
6799 sa[iu + 2] = sa[base + s2];
6800 sa[iu + 3] = sa[base + s3];
6801 i += 4;
6802 }
6803
6804 j += prefetch_distance + 3;
6805 while i < j {
6806 let iu = i as usize;
6807 let s = sa[iu] as usize;
6808 sa[iu] = sa[base + s];
6809 i += 1;
6810 }
6811}
6812
6813#[doc(hidden)]
6815pub fn reconstruct_lms_suffixes_omp(sa: &mut [SaSint], n: SaSint, m: SaSint, threads: SaSint) {
6816 let m_usize = usize::try_from(m).expect("m must be non-negative");
6817 let omp_num_threads = if threads > 1 && m >= 65_536 {
6818 usize::try_from(threads)
6819 .expect("threads must be non-negative")
6820 .max(1)
6821 } else {
6822 1
6823 };
6824 let omp_block_stride = (m_usize / omp_num_threads) & !15usize;
6825
6826 for omp_thread_num in 0..omp_num_threads {
6827 let omp_block_start = omp_thread_num * omp_block_stride;
6828 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
6829 omp_block_stride
6830 } else {
6831 m_usize - omp_block_start
6832 };
6833 reconstruct_lms_suffixes(
6834 sa,
6835 n,
6836 m,
6837 omp_block_start as FastSint,
6838 omp_block_size as FastSint,
6839 );
6840 }
6841}
6842
6843#[doc(hidden)]
6845pub fn place_lms_suffixes_interval_8u(
6846 sa: &mut [SaSint],
6847 n: SaSint,
6848 mut m: SaSint,
6849 flags: SaSint,
6850 buckets: &mut [SaSint],
6851) {
6852 let bucket_end_base = 7 * ALPHABET_SIZE;
6853 if (flags & LIBSAIS_FLAGS_GSA) != 0 {
6854 buckets[bucket_end_base] -= 1;
6855 }
6856
6857 let mut j = usize::try_from(n).expect("n must be non-negative");
6858 for c in (0..ALPHABET_SIZE - 1).rev() {
6859 let l = usize::try_from(
6860 buckets[buckets_index2(c, 1) + buckets_index2(1, 0)] - buckets[buckets_index2(c, 1)],
6861 )
6862 .expect("interval length must be non-negative");
6863 if l > 0 {
6864 let i = usize::try_from(buckets[bucket_end_base + c])
6865 .expect("bucket end must be non-negative");
6866 if j > i {
6867 sa[i..j].fill(0);
6868 }
6869
6870 let new_j = i - l;
6871 let src_end = usize::try_from(m).expect("m must be non-negative");
6872 let src_start = src_end - l;
6873 sa.copy_within(src_start..src_end, new_j);
6874 m -= l as SaSint;
6875 j = new_j;
6876 }
6877 }
6878
6879 sa[..j].fill(0);
6880
6881 if (flags & LIBSAIS_FLAGS_GSA) != 0 {
6882 buckets[bucket_end_base] += 1;
6883 }
6884}
6885
6886#[doc(hidden)]
6888pub fn place_lms_suffixes_interval_32s_4k(
6889 sa: &mut [SaSint],
6890 n: SaSint,
6891 k: SaSint,
6892 mut m: SaSint,
6893 buckets: &[SaSint],
6894) {
6895 let k_usize = usize::try_from(k).expect("k must be non-negative");
6896 let bucket_end = &buckets[3 * k_usize..4 * k_usize];
6897
6898 let mut j = usize::try_from(n).expect("n must be non-negative");
6899 for c in (0..k_usize - 1).rev() {
6900 let l = usize::try_from(
6901 buckets[buckets_index2(c, 1) + buckets_index2(1, 0)] - buckets[buckets_index2(c, 1)],
6902 )
6903 .expect("interval length must be non-negative");
6904 if l > 0 {
6905 let i = usize::try_from(bucket_end[c]).expect("bucket end must be non-negative");
6906 if j > i {
6907 sa[i..j].fill(0);
6908 }
6909
6910 let new_j = i - l;
6911 let src_end = usize::try_from(m).expect("m must be non-negative");
6912 let src_start = src_end - l;
6913 sa.copy_within(src_start..src_end, new_j);
6914 m -= l as SaSint;
6915 j = new_j;
6916 }
6917 }
6918
6919 sa[..j].fill(0);
6920}
6921
6922#[doc(hidden)]
6924pub fn place_lms_suffixes_interval_32s_2k(
6925 sa: &mut [SaSint],
6926 n: SaSint,
6927 k: SaSint,
6928 mut m: SaSint,
6929 buckets: &[SaSint],
6930) {
6931 let k_usize = usize::try_from(k).expect("k must be non-negative");
6932 let mut j = usize::try_from(n).expect("n must be non-negative");
6933
6934 if k_usize > 1 {
6935 let mut c = buckets_index2(k_usize - 2, 0) as isize;
6936 while c >= buckets_index2(0, 0) as isize {
6937 let c_usize = c as usize;
6938 let l = usize::try_from(
6939 buckets[c_usize + buckets_index2(1, 1)] - buckets[c_usize + buckets_index2(0, 1)],
6940 )
6941 .expect("interval length must be non-negative");
6942 if l > 0 {
6943 let i =
6944 usize::try_from(buckets[c_usize]).expect("bucket start must be non-negative");
6945 if j > i {
6946 sa[i..j].fill(0);
6947 }
6948
6949 let new_j = i - l;
6950 let src_end = usize::try_from(m).expect("m must be non-negative");
6951 let src_start = src_end - l;
6952 sa.copy_within(src_start..src_end, new_j);
6953 m -= l as SaSint;
6954 j = new_j;
6955 }
6956 c -= buckets_index2(1, 0) as isize;
6957 }
6958 }
6959
6960 sa[..j].fill(0);
6961}
6962
6963#[doc(hidden)]
6965pub fn place_lms_suffixes_interval_32s_1k(
6966 t: &[SaSint],
6967 sa: &mut [SaSint],
6968 k: SaSint,
6969 m: SaSint,
6970 buckets: &[SaSint],
6971) {
6972 let mut c = k - 1;
6973 let c_usize = usize::try_from(c).expect("k must be positive");
6974 let mut l = usize::try_from(buckets[c_usize]).expect("bucket end must be non-negative");
6975
6976 let m_usize = usize::try_from(m).expect("m must be non-negative");
6977 for i in (0..m_usize).rev() {
6978 let p = usize::try_from(sa[i]).expect("suffix index must be non-negative");
6979 let tp = t[p];
6980 if tp != c {
6981 c = tp;
6982 let bucket = usize::try_from(c).expect("bucket index must be non-negative");
6983 let bucket_pos =
6984 usize::try_from(buckets[bucket]).expect("bucket end must be non-negative");
6985 if l > bucket_pos {
6986 sa[bucket_pos..l].fill(0);
6987 }
6988 l = bucket_pos;
6989 }
6990 l -= 1;
6991 sa[l] = p as SaSint;
6992 }
6993
6994 sa[..l].fill(0);
6995}
6996
6997#[doc(hidden)]
6999pub fn place_lms_suffixes_histogram_32s_6k(
7000 sa: &mut [SaSint],
7001 n: SaSint,
7002 k: SaSint,
7003 mut m: SaSint,
7004 buckets: &[SaSint],
7005) {
7006 let k_usize = usize::try_from(k).expect("k must be non-negative");
7007 let bucket_end = &buckets[5 * k_usize..6 * k_usize];
7008
7009 let mut j = usize::try_from(n).expect("n must be non-negative");
7010 for c in (0..k_usize - 1).rev() {
7011 let l = usize::try_from(buckets[buckets_index4(c, 1)])
7012 .expect("histogram length must be non-negative");
7013 if l > 0 {
7014 let i = usize::try_from(bucket_end[c]).expect("bucket end must be non-negative");
7015 if j > i {
7016 sa[i..j].fill(0);
7017 }
7018
7019 let new_j = i - l;
7020 let src_end = usize::try_from(m).expect("m must be non-negative");
7021 let src_start = src_end - l;
7022 sa.copy_within(src_start..src_end, new_j);
7023 m -= l as SaSint;
7024 j = new_j;
7025 }
7026 }
7027
7028 sa[..j].fill(0);
7029}
7030
7031#[doc(hidden)]
7033pub fn place_lms_suffixes_histogram_32s_4k(
7034 sa: &mut [SaSint],
7035 n: SaSint,
7036 k: SaSint,
7037 mut m: SaSint,
7038 buckets: &[SaSint],
7039) {
7040 let k_usize = usize::try_from(k).expect("k must be non-negative");
7041 let bucket_end = &buckets[3 * k_usize..4 * k_usize];
7042
7043 let mut j = usize::try_from(n).expect("n must be non-negative");
7044 for c in (0..k_usize - 1).rev() {
7045 let l = usize::try_from(buckets[buckets_index2(c, 1)])
7046 .expect("histogram length must be non-negative");
7047 if l > 0 {
7048 let i = usize::try_from(bucket_end[c]).expect("bucket end must be non-negative");
7049 if j > i {
7050 sa[i..j].fill(0);
7051 }
7052
7053 let new_j = i - l;
7054 let src_end = usize::try_from(m).expect("m must be non-negative");
7055 let src_start = src_end - l;
7056 sa.copy_within(src_start..src_end, new_j);
7057 m -= l as SaSint;
7058 j = new_j;
7059 }
7060 }
7061
7062 sa[..j].fill(0);
7063}
7064
7065#[doc(hidden)]
7067pub fn place_lms_suffixes_histogram_32s_2k(
7068 sa: &mut [SaSint],
7069 n: SaSint,
7070 k: SaSint,
7071 mut m: SaSint,
7072 buckets: &[SaSint],
7073) {
7074 let k_usize = usize::try_from(k).expect("k must be non-negative");
7075 let mut j = usize::try_from(n).expect("n must be non-negative");
7076
7077 if k_usize > 1 {
7078 let mut c = buckets_index2(k_usize - 2, 0) as isize;
7079 while c >= buckets_index2(0, 0) as isize {
7080 let c_usize = c as usize;
7081 let l = usize::try_from(buckets[c_usize + buckets_index2(0, 1)])
7082 .expect("histogram length must be non-negative");
7083 if l > 0 {
7084 let i =
7085 usize::try_from(buckets[c_usize]).expect("bucket start must be non-negative");
7086 if j > i {
7087 sa[i..j].fill(0);
7088 }
7089
7090 let new_j = i - l;
7091 let src_end = usize::try_from(m).expect("m must be non-negative");
7092 let src_start = src_end - l;
7093 sa.copy_within(src_start..src_end, new_j);
7094 m -= l as SaSint;
7095 j = new_j;
7096 }
7097 c -= buckets_index2(1, 0) as isize;
7098 }
7099 }
7100
7101 sa[..j].fill(0);
7102}
7103
7104#[doc(hidden)]
7106pub fn final_bwt_scan_left_to_right_8u(
7107 t: &[u8],
7108 sa: &mut [SaSint],
7109 induction_bucket: &mut [SaSint],
7110 omp_block_start: FastSint,
7111 omp_block_size: FastSint,
7112) {
7113 if omp_block_size <= 0 {
7114 return;
7115 }
7116
7117 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
7118 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
7119 for i in start..start + size {
7120 let mut p = sa[i];
7121 sa[i] = p & SAINT_MAX;
7122 if p > 0 {
7123 p -= 1;
7124 let p_usize = usize::try_from(p).expect("suffix index must be non-negative");
7125 sa[i] = t[p_usize] as SaSint | SAINT_MIN;
7126 let bucket = t[p_usize] as usize;
7127 let slot = usize::try_from(induction_bucket[bucket])
7128 .expect("bucket slot must be non-negative");
7129 sa[slot] = p
7130 | ((usize::from(t[p_usize - usize::from(p > 0)] < t[p_usize]) as SaSint)
7131 << (SAINT_BIT - 1));
7132 induction_bucket[bucket] += 1;
7133 }
7134 }
7135}
7136
7137#[doc(hidden)]
7139pub fn final_bwt_aux_scan_left_to_right_8u(
7140 t: &[u8],
7141 sa: &mut [SaSint],
7142 rm: SaSint,
7143 i_out: &mut [SaSint],
7144 induction_bucket: &mut [SaSint],
7145 omp_block_start: FastSint,
7146 omp_block_size: FastSint,
7147) {
7148 if omp_block_size <= 0 {
7149 return;
7150 }
7151
7152 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
7153 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
7154 for i in start..start + size {
7155 let mut p = sa[i];
7156 sa[i] = p & SAINT_MAX;
7157 if p > 0 {
7158 p -= 1;
7159 let p_usize = usize::try_from(p).expect("suffix index must be non-negative");
7160 sa[i] = t[p_usize] as SaSint | SAINT_MIN;
7161 let bucket = t[p_usize] as usize;
7162 let slot = usize::try_from(induction_bucket[bucket])
7163 .expect("bucket slot must be non-negative");
7164 sa[slot] = p
7165 | ((usize::from(t[p_usize - usize::from(p > 0)] < t[p_usize]) as SaSint)
7166 << (SAINT_BIT - 1));
7167 induction_bucket[bucket] += 1;
7168 if (p & rm) == 0 {
7169 let out_idx =
7170 usize::try_from(p / (rm + 1)).expect("sample index must be non-negative");
7171 i_out[out_idx] = induction_bucket[bucket];
7172 }
7173 }
7174 }
7175}
7176
7177#[doc(hidden)]
7179pub fn final_sorting_scan_left_to_right_8u(
7180 t: &[u8],
7181 sa: &mut [SaSint],
7182 induction_bucket: &mut [SaSint],
7183 omp_block_start: FastSint,
7184 omp_block_size: FastSint,
7185) {
7186 if omp_block_size <= 0 {
7187 return;
7188 }
7189
7190 let prefetch_distance = 64usize;
7191 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
7192 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
7193
7194 let mut i = start;
7195 let mut j = if size > prefetch_distance + 1 {
7196 start + size - (prefetch_distance + 1)
7197 } else {
7198 start
7199 };
7200 while i < j {
7201 let mut p0 = sa[i];
7202 sa[i] = p0 ^ SAINT_MIN;
7203 if p0 > 0 {
7204 p0 -= 1;
7205 let p0_usize = p0 as usize;
7206 let bucket0 = t[p0_usize] as usize;
7207 let slot0 = induction_bucket[bucket0] as usize;
7208 sa[slot0] = p0
7209 | ((usize::from(t[p0_usize - usize::from(p0 > 0)] < t[p0_usize]) as SaSint)
7210 << (SAINT_BIT - 1));
7211 induction_bucket[bucket0] += 1;
7212 }
7213
7214 let mut p1 = sa[i + 1];
7215 sa[i + 1] = p1 ^ SAINT_MIN;
7216 if p1 > 0 {
7217 p1 -= 1;
7218 let p1_usize = p1 as usize;
7219 let bucket1 = t[p1_usize] as usize;
7220 let slot1 = induction_bucket[bucket1] as usize;
7221 sa[slot1] = p1
7222 | ((usize::from(t[p1_usize - usize::from(p1 > 0)] < t[p1_usize]) as SaSint)
7223 << (SAINT_BIT - 1));
7224 induction_bucket[bucket1] += 1;
7225 }
7226
7227 i += 2;
7228 }
7229
7230 j = start + size;
7231 while i < j {
7232 let mut p = sa[i];
7233 sa[i] = p ^ SAINT_MIN;
7234 if p > 0 {
7235 p -= 1;
7236 let p_usize = p as usize;
7237 let bucket = t[p_usize] as usize;
7238 let slot = induction_bucket[bucket] as usize;
7239 sa[slot] = p
7240 | ((usize::from(t[p_usize - usize::from(p > 0)] < t[p_usize]) as SaSint)
7241 << (SAINT_BIT - 1));
7242 induction_bucket[bucket] += 1;
7243 }
7244 i += 1;
7245 }
7246}
7247
7248#[doc(hidden)]
7250pub fn final_sorting_scan_left_to_right_32s(
7251 t: &[SaSint],
7252 sa: &mut [SaSint],
7253 induction_bucket: &mut [SaSint],
7254 omp_block_start: FastSint,
7255 omp_block_size: FastSint,
7256) {
7257 if omp_block_size <= 0 {
7258 return;
7259 }
7260
7261 let prefetch_distance: FastSint = 64;
7262 let mut i = omp_block_start;
7263 let mut j = omp_block_start + omp_block_size - 2 * prefetch_distance - 1;
7264
7265 while i < j {
7266 let i0 = i as usize;
7267 let mut p0 = sa[i0];
7268 sa[i0] = p0 ^ SAINT_MIN;
7269 if p0 > 0 {
7270 p0 -= 1;
7271 let p0u = p0 as usize;
7272 let bucket0 = t[p0u] as usize;
7273 let slot0 = induction_bucket[bucket0] as usize;
7274 sa[slot0] = p0
7275 | ((usize::from(t[p0u - usize::from(p0 > 0)] < t[p0u]) as SaSint)
7276 << (SAINT_BIT - 1));
7277 induction_bucket[bucket0] += 1;
7278 }
7279
7280 let i1 = (i + 1) as usize;
7281 let mut p1 = sa[i1];
7282 sa[i1] = p1 ^ SAINT_MIN;
7283 if p1 > 0 {
7284 p1 -= 1;
7285 let p1u = p1 as usize;
7286 let bucket1 = t[p1u] as usize;
7287 let slot1 = induction_bucket[bucket1] as usize;
7288 sa[slot1] = p1
7289 | ((usize::from(t[p1u - usize::from(p1 > 0)] < t[p1u]) as SaSint)
7290 << (SAINT_BIT - 1));
7291 induction_bucket[bucket1] += 1;
7292 }
7293 i += 2;
7294 }
7295
7296 j += 2 * prefetch_distance + 1;
7297 while i < j {
7298 let iu = i as usize;
7299 let mut p = sa[iu];
7300 sa[iu] = p ^ SAINT_MIN;
7301 if p > 0 {
7302 p -= 1;
7303 let pu = p as usize;
7304 let bucket = t[pu] as usize;
7305 let slot = induction_bucket[bucket] as usize;
7306 sa[slot] = p
7307 | ((usize::from(t[pu - usize::from(p > 0)] < t[pu]) as SaSint) << (SAINT_BIT - 1));
7308 induction_bucket[bucket] += 1;
7309 }
7310 i += 1;
7311 }
7312}
7313
7314#[doc(hidden)]
7316pub fn final_bwt_scan_left_to_right_8u_block_prepare(
7317 t: &[u8],
7318 sa: &mut [SaSint],
7319 k: SaSint,
7320 buckets: &mut [SaSint],
7321 cache: &mut [ThreadCache],
7322 omp_block_start: FastSint,
7323 omp_block_size: FastSint,
7324) -> FastSint {
7325 if omp_block_size <= 0 {
7326 return 0;
7327 }
7328
7329 let k_usize = usize::try_from(k).expect("k must be non-negative");
7330 buckets[..k_usize].fill(0);
7331
7332 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
7333 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
7334 let mut count = 0usize;
7335 for i in start..start + size {
7336 let mut p = sa[i];
7337 sa[i] = p & SAINT_MAX;
7338 if p > 0 {
7339 p -= 1;
7340 let p_usize = usize::try_from(p).expect("suffix index must be non-negative");
7341 let symbol = t[p_usize] as usize;
7342 sa[i] = t[p_usize] as SaSint | SAINT_MIN;
7343 buckets[symbol] += 1;
7344 cache[count].symbol = symbol as SaSint;
7345 cache[count].index = p
7346 | ((usize::from(t[p_usize - usize::from(p > 0)] < t[p_usize]) as SaSint)
7347 << (SAINT_BIT - 1));
7348 count += 1;
7349 }
7350 }
7351
7352 count as FastSint
7353}
7354
7355#[doc(hidden)]
7357pub fn final_sorting_scan_left_to_right_8u_block_prepare(
7358 t: &[u8],
7359 sa: &mut [SaSint],
7360 k: SaSint,
7361 buckets: &mut [SaSint],
7362 cache: &mut [ThreadCache],
7363 omp_block_start: FastSint,
7364 omp_block_size: FastSint,
7365) -> FastSint {
7366 if omp_block_size <= 0 {
7367 return 0;
7368 }
7369
7370 let k_usize = usize::try_from(k).expect("k must be non-negative");
7371 buckets[..k_usize].fill(0);
7372
7373 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
7374 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
7375 let mut count = 0usize;
7376 for i in start..start + size {
7377 let mut p = sa[i];
7378 sa[i] = p ^ SAINT_MIN;
7379 if p > 0 {
7380 p -= 1;
7381 let p_usize = usize::try_from(p).expect("suffix index must be non-negative");
7382 let symbol = t[p_usize] as usize;
7383 buckets[symbol] += 1;
7384 cache[count].symbol = symbol as SaSint;
7385 cache[count].index = p
7386 | ((usize::from(t[p_usize - usize::from(p > 0)] < t[p_usize]) as SaSint)
7387 << (SAINT_BIT - 1));
7388 count += 1;
7389 }
7390 }
7391
7392 count as FastSint
7393}
7394
7395#[doc(hidden)]
7397pub fn final_order_scan_left_to_right_8u_block_place(
7398 sa: &mut [SaSint],
7399 buckets: &mut [SaSint],
7400 cache: &[ThreadCache],
7401 count: FastSint,
7402) {
7403 if count <= 0 {
7404 return;
7405 }
7406
7407 let count_usize = usize::try_from(count).expect("count must be non-negative");
7408 for entry in &cache[..count_usize] {
7409 let symbol = usize::try_from(entry.symbol).expect("cache symbol must be non-negative");
7410 let slot = usize::try_from(buckets[symbol]).expect("bucket slot must be non-negative");
7411 sa[slot] = entry.index;
7412 buckets[symbol] += 1;
7413 }
7414}
7415
7416#[doc(hidden)]
7418pub fn final_bwt_aux_scan_left_to_right_8u_block_place(
7419 sa: &mut [SaSint],
7420 rm: SaSint,
7421 i_out: &mut [SaSint],
7422 buckets: &mut [SaSint],
7423 cache: &[ThreadCache],
7424 count: FastSint,
7425) {
7426 if count <= 0 {
7427 return;
7428 }
7429
7430 let count_usize = usize::try_from(count).expect("count must be non-negative");
7431 for entry in &cache[..count_usize] {
7432 let symbol = usize::try_from(entry.symbol).expect("cache symbol must be non-negative");
7433 let slot = usize::try_from(buckets[symbol]).expect("bucket slot must be non-negative");
7434 sa[slot] = entry.index;
7435 buckets[symbol] += 1;
7436 if (entry.index & rm) == 0 {
7437 let sample_index = usize::try_from((entry.index & SAINT_MAX) / (rm + 1))
7438 .expect("sample index must be non-negative");
7439 i_out[sample_index] = buckets[symbol];
7440 }
7441 }
7442}
7443
7444#[doc(hidden)]
7446pub fn final_sorting_scan_left_to_right_32s_block_gather(
7447 t: &[SaSint],
7448 sa: &mut [SaSint],
7449 cache: &mut [ThreadCache],
7450 omp_block_start: FastSint,
7451 omp_block_size: FastSint,
7452) {
7453 if omp_block_size <= 0 {
7454 return;
7455 }
7456 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
7457 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
7458 for offset in 0..size {
7459 let i = start + offset;
7460 let mut symbol = SAINT_MIN;
7461 let mut p = sa[i];
7462 sa[i] = p ^ SAINT_MIN;
7463 if p > 0 {
7464 p -= 1;
7465 let p_usize = p as usize;
7466 cache[offset].index = p
7467 | ((usize::from(t[p_usize - usize::from(p > 0)] < t[p_usize]) as SaSint)
7468 << (SAINT_BIT - 1));
7469 symbol = t[p_usize];
7470 }
7471 cache[offset].symbol = symbol;
7472 }
7473}
7474
7475#[doc(hidden)]
7477pub fn final_sorting_scan_left_to_right_32s_block_sort(
7478 t: &[SaSint],
7479 induction_bucket: &mut [SaSint],
7480 cache: &mut [ThreadCache],
7481 omp_block_start: FastSint,
7482 omp_block_size: FastSint,
7483) {
7484 if omp_block_size <= 0 {
7485 return;
7486 }
7487 let prefetch_distance = 64usize;
7488 let start = omp_block_start as usize;
7489 let block_end = start + omp_block_size as usize;
7490 let mut i = start;
7491 let mut j = start + (omp_block_size as usize).saturating_sub(prefetch_distance + 1);
7492
7493 while i < j {
7494 let ci = i - start;
7495 let v0 = cache[ci].symbol;
7496 if v0 >= 0 {
7497 let bucket_index0 = v0 as usize;
7498 cache[ci].symbol = induction_bucket[bucket_index0];
7499 induction_bucket[bucket_index0] += 1;
7500 if cache[ci].symbol < block_end as SaSint {
7501 let ni = cache[ci].symbol as usize;
7502 let cni = ni - start;
7503 let mut np = cache[ci].index;
7504 cache[ci].index = np ^ SAINT_MIN;
7505 if np > 0 {
7506 np -= 1;
7507 let np_usize = np as usize;
7508 cache[cni].index = np
7509 | ((usize::from(t[np_usize - usize::from(np > 0)] < t[np_usize])
7510 as SaSint)
7511 << (SAINT_BIT - 1));
7512 cache[cni].symbol = t[np_usize];
7513 }
7514 }
7515 }
7516
7517 let i1 = i + 1;
7518 let ci1 = i1 - start;
7519 let v1 = cache[ci1].symbol;
7520 if v1 >= 0 {
7521 let bucket_index1 = v1 as usize;
7522 cache[ci1].symbol = induction_bucket[bucket_index1];
7523 induction_bucket[bucket_index1] += 1;
7524 if cache[ci1].symbol < block_end as SaSint {
7525 let ni = cache[ci1].symbol as usize;
7526 let cni = ni - start;
7527 let mut np = cache[ci1].index;
7528 cache[ci1].index = np ^ SAINT_MIN;
7529 if np > 0 {
7530 np -= 1;
7531 let np_usize = np as usize;
7532 cache[cni].index = np
7533 | ((usize::from(t[np_usize - usize::from(np > 0)] < t[np_usize])
7534 as SaSint)
7535 << (SAINT_BIT - 1));
7536 cache[cni].symbol = t[np_usize];
7537 }
7538 }
7539 }
7540
7541 i += 2;
7542 }
7543
7544 j = block_end;
7545 while i < j {
7546 let ci = i - start;
7547 let v = cache[ci].symbol;
7548 if v >= 0 {
7549 let bucket_index = v as usize;
7550 cache[ci].symbol = induction_bucket[bucket_index];
7551 induction_bucket[bucket_index] += 1;
7552 if cache[ci].symbol < block_end as SaSint {
7553 let ni = cache[ci].symbol as usize;
7554 let cni = ni - start;
7555 let mut np = cache[ci].index;
7556 cache[ci].index = np ^ SAINT_MIN;
7557 if np > 0 {
7558 np -= 1;
7559 let np_usize = np as usize;
7560 cache[cni].index = np
7561 | ((usize::from(t[np_usize - usize::from(np > 0)] < t[np_usize])
7562 as SaSint)
7563 << (SAINT_BIT - 1));
7564 cache[cni].symbol = t[np_usize];
7565 }
7566 }
7567 }
7568 i += 1;
7569 }
7570}
7571
7572#[doc(hidden)]
7574pub fn final_bwt_scan_left_to_right_8u_block_omp(
7575 t: &[u8],
7576 sa: &mut [SaSint],
7577 k: SaSint,
7578 induction_bucket: &mut [SaSint],
7579 block_start: FastSint,
7580 block_size: FastSint,
7581 threads: SaSint,
7582 thread_state: &mut [ThreadState],
7583) {
7584 if block_size <= 0 {
7585 return;
7586 }
7587
7588 let k_usize = usize::try_from(k).expect("k must be non-negative");
7589 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
7590 let omp_num_threads = if threads > 1 && block_size_usize >= 64 * k_usize.max(256) {
7591 usize::try_from(threads)
7592 .expect("threads must be non-negative")
7593 .min(thread_state.len())
7594 .max(1)
7595 } else {
7596 1
7597 };
7598
7599 if omp_num_threads == 1 {
7600 final_bwt_scan_left_to_right_8u(t, sa, induction_bucket, block_start, block_size);
7601 return;
7602 }
7603
7604 let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
7605 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
7606 for (thread_num, state) in thread_state.iter_mut().take(omp_num_threads).enumerate() {
7607 let relative_start = thread_num * omp_block_stride;
7608 let size = if thread_num + 1 < omp_num_threads {
7609 omp_block_stride
7610 } else {
7611 block_size_usize - relative_start
7612 };
7613 state.count = final_bwt_scan_left_to_right_8u_block_prepare(
7614 t,
7615 sa,
7616 k,
7617 &mut state.buckets,
7618 &mut state.cache,
7619 (block_start_usize + relative_start) as FastSint,
7620 size as FastSint,
7621 );
7622 }
7623
7624 for state in thread_state.iter_mut().take(omp_num_threads) {
7625 for (c, bucket) in induction_bucket.iter_mut().take(k_usize).enumerate() {
7626 let a = *bucket;
7627 let b = state.buckets[c];
7628 *bucket = a + b;
7629 state.buckets[c] = a;
7630 }
7631 }
7632
7633 for state in thread_state.iter_mut().take(omp_num_threads) {
7634 final_order_scan_left_to_right_8u_block_place(
7635 sa,
7636 &mut state.buckets,
7637 &state.cache,
7638 state.count,
7639 );
7640 }
7641}
7642
7643#[doc(hidden)]
7645pub fn final_bwt_aux_scan_left_to_right_8u_block_omp(
7646 t: &[u8],
7647 sa: &mut [SaSint],
7648 k: SaSint,
7649 rm: SaSint,
7650 i_out: &mut [SaSint],
7651 induction_bucket: &mut [SaSint],
7652 block_start: FastSint,
7653 block_size: FastSint,
7654 threads: SaSint,
7655 thread_state: &mut [ThreadState],
7656) {
7657 if block_size <= 0 {
7658 return;
7659 }
7660
7661 let k_usize = usize::try_from(k).expect("k must be non-negative");
7662 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
7663 let omp_num_threads = if threads > 1 && block_size_usize >= 64 * k_usize.max(256) {
7664 usize::try_from(threads)
7665 .expect("threads must be non-negative")
7666 .min(thread_state.len())
7667 .max(1)
7668 } else {
7669 1
7670 };
7671
7672 if omp_num_threads == 1 {
7673 final_bwt_aux_scan_left_to_right_8u(
7674 t,
7675 sa,
7676 rm,
7677 i_out,
7678 induction_bucket,
7679 block_start,
7680 block_size,
7681 );
7682 return;
7683 }
7684
7685 let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
7686 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
7687 for (thread_num, state) in thread_state.iter_mut().take(omp_num_threads).enumerate() {
7688 let relative_start = thread_num * omp_block_stride;
7689 let size = if thread_num + 1 < omp_num_threads {
7690 omp_block_stride
7691 } else {
7692 block_size_usize - relative_start
7693 };
7694 state.count = final_bwt_scan_left_to_right_8u_block_prepare(
7695 t,
7696 sa,
7697 k,
7698 &mut state.buckets,
7699 &mut state.cache,
7700 (block_start_usize + relative_start) as FastSint,
7701 size as FastSint,
7702 );
7703 }
7704
7705 for state in thread_state.iter_mut().take(omp_num_threads) {
7706 for (c, bucket) in induction_bucket.iter_mut().take(k_usize).enumerate() {
7707 let a = *bucket;
7708 let b = state.buckets[c];
7709 *bucket = a + b;
7710 state.buckets[c] = a;
7711 }
7712 }
7713
7714 for state in thread_state.iter_mut().take(omp_num_threads) {
7715 final_bwt_aux_scan_left_to_right_8u_block_place(
7716 sa,
7717 rm,
7718 i_out,
7719 &mut state.buckets,
7720 &state.cache,
7721 state.count,
7722 );
7723 }
7724}
7725
7726#[doc(hidden)]
7728pub fn final_sorting_scan_left_to_right_8u_block_omp(
7729 t: &[u8],
7730 sa: &mut [SaSint],
7731 k: SaSint,
7732 induction_bucket: &mut [SaSint],
7733 block_start: FastSint,
7734 block_size: FastSint,
7735 threads: SaSint,
7736 thread_state: &mut [ThreadState],
7737) {
7738 if block_size <= 0 {
7739 return;
7740 }
7741
7742 let k_usize = usize::try_from(k).expect("k must be non-negative");
7743 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
7744 let omp_num_threads = if threads > 1 && block_size_usize >= 64 * k_usize.max(256) {
7745 usize::try_from(threads)
7746 .expect("threads must be non-negative")
7747 .min(thread_state.len())
7748 .max(1)
7749 } else {
7750 1
7751 };
7752
7753 if omp_num_threads == 1 {
7754 final_sorting_scan_left_to_right_8u(t, sa, induction_bucket, block_start, block_size);
7755 return;
7756 }
7757
7758 let block_start_usize = usize::try_from(block_start).expect("block_start must be non-negative");
7759 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
7760 for (thread_num, state) in thread_state.iter_mut().take(omp_num_threads).enumerate() {
7761 let relative_start = thread_num * omp_block_stride;
7762 let size = if thread_num + 1 < omp_num_threads {
7763 omp_block_stride
7764 } else {
7765 block_size_usize - relative_start
7766 };
7767 state.count = final_sorting_scan_left_to_right_8u_block_prepare(
7768 t,
7769 sa,
7770 k,
7771 &mut state.buckets,
7772 &mut state.cache,
7773 (block_start_usize + relative_start) as FastSint,
7774 size as FastSint,
7775 );
7776 }
7777
7778 for state in thread_state.iter_mut().take(omp_num_threads) {
7779 for (c, bucket) in induction_bucket.iter_mut().take(k_usize).enumerate() {
7780 let a = *bucket;
7781 let b = state.buckets[c];
7782 *bucket = a + b;
7783 state.buckets[c] = a;
7784 }
7785 }
7786
7787 for state in thread_state.iter_mut().take(omp_num_threads) {
7788 final_order_scan_left_to_right_8u_block_place(
7789 sa,
7790 &mut state.buckets,
7791 &state.cache,
7792 state.count,
7793 );
7794 }
7795}
7796
7797#[doc(hidden)]
7799pub fn final_sorting_scan_left_to_right_32s_block_omp(
7800 t: &[SaSint],
7801 sa: &mut [SaSint],
7802 buckets: &mut [SaSint],
7803 cache: &mut [ThreadCache],
7804 block_start: FastSint,
7805 block_size: FastSint,
7806 threads: SaSint,
7807) {
7808 if threads <= 1 || block_size < 16_384 {
7809 final_sorting_scan_left_to_right_32s(t, sa, buckets, block_start, block_size);
7810 return;
7811 }
7812
7813 final_sorting_scan_left_to_right_32s_block_gather(t, sa, cache, block_start, block_size);
7814 final_sorting_scan_left_to_right_32s_block_sort(t, buckets, cache, block_start, block_size);
7815 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
7816 let threads_usize = usize::try_from(threads.max(1)).expect("threads must be positive");
7817 let omp_num_threads = threads_usize.min(block_size_usize);
7818 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
7819 for omp_thread_num in 0..omp_num_threads {
7820 let omp_block_start = omp_thread_num * omp_block_stride;
7821 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
7822 omp_block_stride
7823 } else {
7824 block_size_usize - omp_block_start
7825 };
7826 compact_and_place_cached_suffixes(
7827 sa,
7828 cache,
7829 omp_block_start as FastSint,
7830 omp_block_size as FastSint,
7831 );
7832 }
7833}
7834
7835#[doc(hidden)]
7837pub fn final_bwt_scan_left_to_right_8u_omp(
7838 t: &[u8],
7839 sa: &mut [SaSint],
7840 n: FastSint,
7841 k: SaSint,
7842 induction_bucket: &mut [SaSint],
7843 threads: SaSint,
7844 thread_state: &mut [ThreadState],
7845) {
7846 let n_usize = usize::try_from(n).expect("n must be non-negative");
7847 let last = n_usize - 1;
7848 let bucket = t[last] as usize;
7849 let slot = usize::try_from(induction_bucket[bucket]).expect("bucket slot must be non-negative");
7850 sa[slot] =
7851 (n as SaSint - 1) | ((usize::from(t[last - 1] < t[last]) as SaSint) << (SAINT_BIT - 1));
7852 induction_bucket[bucket] += 1;
7853
7854 if threads == 1 || n < 65_536 {
7855 final_bwt_scan_left_to_right_8u(t, sa, induction_bucket, 0, n);
7856 return;
7857 }
7858
7859 let mut block_start = 0usize;
7860 while block_start < n_usize {
7861 if sa[block_start] == 0 {
7862 block_start += 1;
7863 } else {
7864 let threads_usize = usize::try_from(threads)
7865 .expect("threads must be non-negative")
7866 .min(thread_state.len())
7867 .max(1);
7868 let max_span = threads_usize * (LIBSAIS_PER_THREAD_CACHE_SIZE - 16 * threads_usize);
7869 let block_max_end = (block_start + max_span).min(n_usize);
7870 let mut block_end = block_start + 1;
7871 while block_end < block_max_end && sa[block_end] != 0 {
7872 block_end += 1;
7873 }
7874 let size = block_end - block_start;
7875
7876 if size < 32 {
7877 final_bwt_scan_left_to_right_8u(
7878 t,
7879 sa,
7880 induction_bucket,
7881 block_start as FastSint,
7882 size as FastSint,
7883 );
7884 } else {
7885 final_bwt_scan_left_to_right_8u_block_omp(
7886 t,
7887 sa,
7888 k,
7889 induction_bucket,
7890 block_start as FastSint,
7891 size as FastSint,
7892 threads,
7893 thread_state,
7894 );
7895 }
7896 block_start = block_end;
7897 }
7898 }
7899}
7900
7901#[doc(hidden)]
7903pub fn final_bwt_aux_scan_left_to_right_8u_omp(
7904 t: &[u8],
7905 sa: &mut [SaSint],
7906 n: FastSint,
7907 k: SaSint,
7908 rm: SaSint,
7909 i_out: &mut [SaSint],
7910 induction_bucket: &mut [SaSint],
7911 threads: SaSint,
7912 thread_state: &mut [ThreadState],
7913) {
7914 let n_usize = usize::try_from(n).expect("n must be non-negative");
7915 let last = n_usize - 1;
7916 let bucket = t[last] as usize;
7917 let slot = usize::try_from(induction_bucket[bucket]).expect("bucket slot must be non-negative");
7918 sa[slot] =
7919 (n as SaSint - 1) | ((usize::from(t[last - 1] < t[last]) as SaSint) << (SAINT_BIT - 1));
7920 induction_bucket[bucket] += 1;
7921 if (((n as SaSint) - 1) & rm) == 0 {
7922 i_out[last / usize::try_from(rm + 1).expect("rm must allow positive step")] =
7923 induction_bucket[bucket];
7924 }
7925
7926 if threads == 1 || n < 65_536 {
7927 final_bwt_aux_scan_left_to_right_8u(t, sa, rm, i_out, induction_bucket, 0, n);
7928 return;
7929 }
7930
7931 let mut block_start = 0usize;
7932 while block_start < n_usize {
7933 if sa[block_start] == 0 {
7934 block_start += 1;
7935 } else {
7936 let threads_usize = usize::try_from(threads)
7937 .expect("threads must be non-negative")
7938 .min(thread_state.len())
7939 .max(1);
7940 let max_span = threads_usize * (LIBSAIS_PER_THREAD_CACHE_SIZE - 16 * threads_usize);
7941 let block_max_end = (block_start + max_span).min(n_usize);
7942 let mut block_end = block_start + 1;
7943 while block_end < block_max_end && sa[block_end] != 0 {
7944 block_end += 1;
7945 }
7946 let size = block_end - block_start;
7947
7948 if size < 32 {
7949 final_bwt_aux_scan_left_to_right_8u(
7950 t,
7951 sa,
7952 rm,
7953 i_out,
7954 induction_bucket,
7955 block_start as FastSint,
7956 size as FastSint,
7957 );
7958 } else {
7959 final_bwt_aux_scan_left_to_right_8u_block_omp(
7960 t,
7961 sa,
7962 k,
7963 rm,
7964 i_out,
7965 induction_bucket,
7966 block_start as FastSint,
7967 size as FastSint,
7968 threads,
7969 thread_state,
7970 );
7971 }
7972 block_start = block_end;
7973 }
7974 }
7975}
7976
7977#[doc(hidden)]
7979pub fn final_sorting_scan_left_to_right_8u_omp(
7980 t: &[u8],
7981 sa: &mut [SaSint],
7982 n: FastSint,
7983 k: SaSint,
7984 induction_bucket: &mut [SaSint],
7985 threads: SaSint,
7986 thread_state: &mut [ThreadState],
7987) {
7988 let n_usize = usize::try_from(n).expect("n must be non-negative");
7989 let last = n_usize - 1;
7990 let bucket = t[last] as usize;
7991 let slot = usize::try_from(induction_bucket[bucket]).expect("bucket slot must be non-negative");
7992 sa[slot] =
7993 (n as SaSint - 1) | ((usize::from(t[last - 1] < t[last]) as SaSint) << (SAINT_BIT - 1));
7994 induction_bucket[bucket] += 1;
7995
7996 if threads == 1 || n < 65_536 {
7997 final_sorting_scan_left_to_right_8u(t, sa, induction_bucket, 0, n);
7998 return;
7999 }
8000
8001 let mut block_start = 0usize;
8002 while block_start < n_usize {
8003 if sa[block_start] == 0 {
8004 block_start += 1;
8005 } else {
8006 let threads_usize = usize::try_from(threads)
8007 .expect("threads must be non-negative")
8008 .min(thread_state.len())
8009 .max(1);
8010 let max_span = threads_usize * (LIBSAIS_PER_THREAD_CACHE_SIZE - 16 * threads_usize);
8011 let block_max_end = (block_start + max_span).min(n_usize);
8012 let mut block_end = block_start + 1;
8013 while block_end < block_max_end && sa[block_end] != 0 {
8014 block_end += 1;
8015 }
8016 let size = block_end - block_start;
8017
8018 if size < 32 {
8019 final_sorting_scan_left_to_right_8u(
8020 t,
8021 sa,
8022 induction_bucket,
8023 block_start as FastSint,
8024 size as FastSint,
8025 );
8026 } else {
8027 final_sorting_scan_left_to_right_8u_block_omp(
8028 t,
8029 sa,
8030 k,
8031 induction_bucket,
8032 block_start as FastSint,
8033 size as FastSint,
8034 threads,
8035 thread_state,
8036 );
8037 }
8038 block_start = block_end;
8039 }
8040 }
8041}
8042
8043#[doc(hidden)]
8045pub fn final_sorting_scan_left_to_right_32s_omp(
8046 t: &[SaSint],
8047 sa: &mut [SaSint],
8048 n: SaSint,
8049 induction_bucket: &mut [SaSint],
8050 threads: SaSint,
8051 thread_state: &mut [ThreadState],
8052) {
8053 let n_usize = usize::try_from(n).expect("n must be non-negative");
8054 let last = n_usize - 1;
8055 let bucket = usize::try_from(t[last]).expect("bucket symbol must be non-negative");
8056 let slot = usize::try_from(induction_bucket[bucket]).expect("bucket slot must be non-negative");
8057 sa[slot] = (n - 1) | ((usize::from(t[last - 1] < t[last]) as SaSint) << (SAINT_BIT - 1));
8058 induction_bucket[bucket] += 1;
8059
8060 if threads == 1 || n < 65_536 {
8061 final_sorting_scan_left_to_right_32s(t, sa, induction_bucket, 0, n as FastSint);
8062 return;
8063 }
8064
8065 if thread_state.is_empty() {
8066 final_sorting_scan_left_to_right_32s(t, sa, induction_bucket, 0, n as FastSint);
8067 return;
8068 }
8069
8070 let threads_usize = usize::try_from(threads)
8071 .expect("threads must be non-negative")
8072 .max(1);
8073 let mut cache = vec![ThreadCache::default(); threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE];
8074 let mut block_start = 0usize;
8075 while block_start < n_usize {
8076 let block_end = (block_start + threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE).min(n_usize);
8077 final_sorting_scan_left_to_right_32s_block_omp(
8078 t,
8079 sa,
8080 induction_bucket,
8081 &mut cache,
8082 block_start as FastSint,
8083 (block_end - block_start) as FastSint,
8084 threads,
8085 );
8086 block_start = block_end;
8087 }
8088}
8089
8090#[doc(hidden)]
8092pub fn final_bwt_scan_right_to_left_8u(
8093 t: &[u8],
8094 sa: &mut [SaSint],
8095 induction_bucket: &mut [SaSint],
8096 omp_block_start: FastSint,
8097 omp_block_size: FastSint,
8098) -> SaSint {
8099 if omp_block_size <= 0 {
8100 return -1;
8101 }
8102
8103 let mut index = -1;
8104
8105 let start =
8106 usize::try_from(omp_block_start).expect("omp_block_start must be non-negative") as FastSint;
8107 let mut i = omp_block_start + omp_block_size - 1;
8108 let mut j = start + 1;
8109 while i >= j {
8110 let i0 = usize::try_from(i).expect("loop index must be non-negative");
8111 let i1 = usize::try_from(i - 1).expect("loop index must be non-negative");
8112
8113 let mut p0 = sa[i0];
8114 if p0 == 0 {
8115 index = i0 as SaSint;
8116 }
8117 sa[i0] = p0 & SAINT_MAX;
8118 if p0 > 0 {
8119 p0 -= 1;
8120 let p0_usize = usize::try_from(p0).expect("suffix index must be non-negative");
8121 let c0 = t[p0_usize - usize::from(p0 > 0)] as SaSint;
8122 let c1 = t[p0_usize] as SaSint;
8123 sa[i0] = c1;
8124 induction_bucket[c1 as usize] -= 1;
8125 let slot = usize::try_from(induction_bucket[c1 as usize])
8126 .expect("bucket slot must be non-negative");
8127 let marked = c0 | SAINT_MIN;
8128 sa[slot] = if c0 <= c1 { p0 } else { marked };
8129 }
8130
8131 let mut p1 = sa[i1];
8132 if p1 == 0 {
8133 index = i1 as SaSint;
8134 }
8135 sa[i1] = p1 & SAINT_MAX;
8136 if p1 > 0 {
8137 p1 -= 1;
8138 let p1_usize = usize::try_from(p1).expect("suffix index must be non-negative");
8139 let c0 = t[p1_usize - usize::from(p1 > 0)] as SaSint;
8140 let c1 = t[p1_usize] as SaSint;
8141 sa[i1] = c1;
8142 induction_bucket[c1 as usize] -= 1;
8143 let slot = usize::try_from(induction_bucket[c1 as usize])
8144 .expect("bucket slot must be non-negative");
8145 let marked = c0 | SAINT_MIN;
8146 sa[slot] = if c0 <= c1 { p1 } else { marked };
8147 }
8148
8149 i -= 2;
8150 }
8151
8152 j -= 1;
8153 while i >= j {
8154 let idx = usize::try_from(i).expect("loop index must be non-negative");
8155 let mut p = sa[idx];
8156 if p == 0 {
8157 index = idx as SaSint;
8158 }
8159 sa[idx] = p & SAINT_MAX;
8160 if p > 0 {
8161 p -= 1;
8162 let p_usize = usize::try_from(p).expect("suffix index must be non-negative");
8163 let c0 = t[p_usize - usize::from(p > 0)] as SaSint;
8164 let c1 = t[p_usize] as SaSint;
8165 sa[idx] = c1;
8166 induction_bucket[c1 as usize] -= 1;
8167 let slot = usize::try_from(induction_bucket[c1 as usize])
8168 .expect("bucket slot must be non-negative");
8169 let marked = c0 | SAINT_MIN;
8170 sa[slot] = if c0 <= c1 { p } else { marked };
8171 }
8172
8173 i -= 1;
8174 }
8175
8176 index
8177}
8178
8179#[doc(hidden)]
8181pub fn final_bwt_aux_scan_right_to_left_8u(
8182 t: &[u8],
8183 sa: &mut [SaSint],
8184 rm: SaSint,
8185 i_out: &mut [SaSint],
8186 induction_bucket: &mut [SaSint],
8187 omp_block_start: FastSint,
8188 omp_block_size: FastSint,
8189) {
8190 if omp_block_size <= 0 {
8191 return;
8192 }
8193
8194 let start =
8195 usize::try_from(omp_block_start).expect("omp_block_start must be non-negative") as FastSint;
8196 let mut i = omp_block_start + omp_block_size - 1;
8197 let mut j = start + 1;
8198 while i >= j {
8199 let i0 = usize::try_from(i).expect("loop index must be non-negative");
8200 let i1 = usize::try_from(i - 1).expect("loop index must be non-negative");
8201
8202 let mut p0 = sa[i0];
8203 sa[i0] = p0 & SAINT_MAX;
8204 if p0 > 0 {
8205 p0 -= 1;
8206 let p0_usize = usize::try_from(p0).expect("suffix index must be non-negative");
8207 let c0 = t[p0_usize - usize::from(p0 > 0)] as SaSint;
8208 let c1 = t[p0_usize] as SaSint;
8209 sa[i0] = c1;
8210 induction_bucket[c1 as usize] -= 1;
8211 let slot = usize::try_from(induction_bucket[c1 as usize])
8212 .expect("bucket slot must be non-negative");
8213 let marked = c0 | SAINT_MIN;
8214 sa[slot] = if c0 <= c1 { p0 } else { marked };
8215 if (p0 & rm) == 0 {
8216 let out_idx =
8217 usize::try_from(p0 / (rm + 1)).expect("sample index must be non-negative");
8218 i_out[out_idx] = induction_bucket[t[p0_usize] as usize] + 1;
8219 }
8220 }
8221
8222 let mut p1 = sa[i1];
8223 sa[i1] = p1 & SAINT_MAX;
8224 if p1 > 0 {
8225 p1 -= 1;
8226 let p1_usize = usize::try_from(p1).expect("suffix index must be non-negative");
8227 let c0 = t[p1_usize - usize::from(p1 > 0)] as SaSint;
8228 let c1 = t[p1_usize] as SaSint;
8229 sa[i1] = c1;
8230 induction_bucket[c1 as usize] -= 1;
8231 let slot = usize::try_from(induction_bucket[c1 as usize])
8232 .expect("bucket slot must be non-negative");
8233 let marked = c0 | SAINT_MIN;
8234 sa[slot] = if c0 <= c1 { p1 } else { marked };
8235 if (p1 & rm) == 0 {
8236 let out_idx =
8237 usize::try_from(p1 / (rm + 1)).expect("sample index must be non-negative");
8238 i_out[out_idx] = induction_bucket[t[p1_usize] as usize] + 1;
8239 }
8240 }
8241
8242 i -= 2;
8243 }
8244
8245 j -= 1;
8246 while i >= j {
8247 let idx = usize::try_from(i).expect("loop index must be non-negative");
8248 let mut p = sa[idx];
8249 sa[idx] = p & SAINT_MAX;
8250 if p > 0 {
8251 p -= 1;
8252 let p_usize = usize::try_from(p).expect("suffix index must be non-negative");
8253 let c0 = t[p_usize - usize::from(p > 0)] as SaSint;
8254 let c1 = t[p_usize] as SaSint;
8255 sa[idx] = c1;
8256 induction_bucket[c1 as usize] -= 1;
8257 let slot = usize::try_from(induction_bucket[c1 as usize])
8258 .expect("bucket slot must be non-negative");
8259 let marked = c0 | SAINT_MIN;
8260 sa[slot] = if c0 <= c1 { p } else { marked };
8261 if (p & rm) == 0 {
8262 let out_idx =
8263 usize::try_from(p / (rm + 1)).expect("sample index must be non-negative");
8264 i_out[out_idx] = induction_bucket[t[p_usize] as usize] + 1;
8265 }
8266 }
8267
8268 i -= 1;
8269 }
8270}
8271
8272#[doc(hidden)]
8274pub fn final_sorting_scan_right_to_left_8u(
8275 t: &[u8],
8276 sa: &mut [SaSint],
8277 induction_bucket: &mut [SaSint],
8278 omp_block_start: FastSint,
8279 omp_block_size: FastSint,
8280) {
8281 if omp_block_size <= 0 {
8282 return;
8283 }
8284
8285 let prefetch_distance = 64usize;
8286 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
8287 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
8288 let mut i = start + size - 1;
8289 let mut j = start + prefetch_distance + 1;
8290
8291 while i >= j {
8292 let mut p0 = sa[i];
8293 sa[i] = p0 & SAINT_MAX;
8294 if p0 > 0 {
8295 p0 -= 1;
8296 let p0_usize = p0 as usize;
8297 let bucket0 = t[p0_usize] as usize;
8298 induction_bucket[bucket0] -= 1;
8299 let slot0 = induction_bucket[bucket0] as usize;
8300 sa[slot0] = p0
8301 | ((usize::from(t[p0_usize - usize::from(p0 > 0)] > t[p0_usize]) as SaSint)
8302 << (SAINT_BIT - 1));
8303 }
8304
8305 let mut p1 = sa[i - 1];
8306 sa[i - 1] = p1 & SAINT_MAX;
8307 if p1 > 0 {
8308 p1 -= 1;
8309 let p1_usize = p1 as usize;
8310 let bucket1 = t[p1_usize] as usize;
8311 induction_bucket[bucket1] -= 1;
8312 let slot1 = induction_bucket[bucket1] as usize;
8313 sa[slot1] = p1
8314 | ((usize::from(t[p1_usize - usize::from(p1 > 0)] > t[p1_usize]) as SaSint)
8315 << (SAINT_BIT - 1));
8316 }
8317
8318 i -= 2;
8319 }
8320
8321 j -= prefetch_distance + 1;
8322 while i >= j {
8323 let mut p = sa[i];
8324 sa[i] = p & SAINT_MAX;
8325 if p > 0 {
8326 p -= 1;
8327 let p_usize = p as usize;
8328 let bucket = t[p_usize] as usize;
8329 induction_bucket[bucket] -= 1;
8330 let slot = induction_bucket[bucket] as usize;
8331 sa[slot] = p
8332 | ((usize::from(t[p_usize - usize::from(p > 0)] > t[p_usize]) as SaSint)
8333 << (SAINT_BIT - 1));
8334 }
8335
8336 if i == 0 {
8337 break;
8338 }
8339 i -= 1;
8340 }
8341}
8342
8343#[doc(hidden)]
8345pub fn final_gsa_scan_right_to_left_8u(
8346 t: &[u8],
8347 sa: &mut [SaSint],
8348 induction_bucket: &mut [SaSint],
8349 omp_block_start: FastSint,
8350 omp_block_size: FastSint,
8351) {
8352 if omp_block_size <= 0 {
8353 return;
8354 }
8355
8356 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
8357 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
8358 let mut i = start + size;
8359 while i > start {
8360 i -= 1;
8361 let mut p = sa[i];
8362 sa[i] = p & SAINT_MAX;
8363 if p > 0 {
8364 let p_usize = usize::try_from(p).expect("suffix index must be non-negative");
8365 if t[p_usize - 1] > 0 {
8366 p -= 1;
8367 let bucket =
8368 t[usize::try_from(p).expect("suffix index must be non-negative")] as usize;
8369 induction_bucket[bucket] -= 1;
8370 let slot = usize::try_from(induction_bucket[bucket])
8371 .expect("bucket slot must be non-negative");
8372 let p_usize = usize::try_from(p).expect("suffix index must be non-negative");
8373 sa[slot] = p
8374 | ((usize::from(t[p_usize - usize::from(p > 0)] > t[p_usize]) as SaSint)
8375 << (SAINT_BIT - 1));
8376 }
8377 }
8378 }
8379}
8380
8381#[doc(hidden)]
8383pub fn final_sorting_scan_right_to_left_32s(
8384 t: &[SaSint],
8385 sa: &mut [SaSint],
8386 induction_bucket: &mut [SaSint],
8387 omp_block_start: FastSint,
8388 omp_block_size: FastSint,
8389) {
8390 if omp_block_size <= 0 {
8391 return;
8392 }
8393
8394 let prefetch_distance: FastSint = 64;
8395 let mut i = omp_block_start + omp_block_size - 1;
8396 let mut j = omp_block_start + 2 * prefetch_distance + 1;
8397
8398 while i >= j {
8399 let i0 = i as usize;
8400 let mut p0 = sa[i0];
8401 sa[i0] = p0 & SAINT_MAX;
8402 if p0 > 0 {
8403 p0 -= 1;
8404 let p0u = p0 as usize;
8405 let bucket0 = t[p0u] as usize;
8406 induction_bucket[bucket0] -= 1;
8407 let slot0 = induction_bucket[bucket0] as usize;
8408 sa[slot0] = p0
8409 | ((usize::from(t[p0u - usize::from(p0 > 0)] > t[p0u]) as SaSint)
8410 << (SAINT_BIT - 1));
8411 }
8412
8413 let i1 = (i - 1) as usize;
8414 let mut p1 = sa[i1];
8415 sa[i1] = p1 & SAINT_MAX;
8416 if p1 > 0 {
8417 p1 -= 1;
8418 let p1u = p1 as usize;
8419 let bucket1 = t[p1u] as usize;
8420 induction_bucket[bucket1] -= 1;
8421 let slot1 = induction_bucket[bucket1] as usize;
8422 sa[slot1] = p1
8423 | ((usize::from(t[p1u - usize::from(p1 > 0)] > t[p1u]) as SaSint)
8424 << (SAINT_BIT - 1));
8425 }
8426 i -= 2;
8427 }
8428
8429 j -= 2 * prefetch_distance + 1;
8430 while i >= j {
8431 let iu = i as usize;
8432 let mut p = sa[iu];
8433 sa[iu] = p & SAINT_MAX;
8434 if p > 0 {
8435 p -= 1;
8436 let pu = p as usize;
8437 let bucket = t[pu] as usize;
8438 induction_bucket[bucket] -= 1;
8439 let slot = induction_bucket[bucket] as usize;
8440 sa[slot] = p
8441 | ((usize::from(t[pu - usize::from(p > 0)] > t[pu]) as SaSint) << (SAINT_BIT - 1));
8442 }
8443 i -= 1;
8444 }
8445}
8446
8447#[doc(hidden)]
8449pub fn final_bwt_scan_right_to_left_8u_block_prepare(
8450 t: &[u8],
8451 sa: &mut [SaSint],
8452 k: SaSint,
8453 buckets: &mut [SaSint],
8454 cache: &mut [ThreadCache],
8455 omp_block_start: FastSint,
8456 omp_block_size: FastSint,
8457) -> FastSint {
8458 if omp_block_size <= 0 {
8459 return 0;
8460 }
8461 let k_usize = usize::try_from(k).expect("k must be non-negative");
8462 buckets[..k_usize].fill(0);
8463 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
8464 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
8465 let mut count = 0usize;
8466 let mut i = start + size;
8467 while i > start {
8468 i -= 1;
8469 let mut p = sa[i];
8470 sa[i] = p & SAINT_MAX;
8471 if p > 0 {
8472 p -= 1;
8473 let p_usize = usize::try_from(p).expect("suffix index must be non-negative");
8474 let c0 = t[p_usize - usize::from(p > 0)] as SaSint;
8475 let c1 = t[p_usize] as SaSint;
8476 sa[i] = c1;
8477 buckets[c1 as usize] += 1;
8478 cache[count].symbol = c1;
8479 cache[count].index = if c0 <= c1 { p } else { c0 | SAINT_MIN };
8480 count += 1;
8481 }
8482 }
8483 count as FastSint
8484}
8485
8486#[doc(hidden)]
8488pub fn final_bwt_aux_scan_right_to_left_8u_block_prepare(
8489 t: &[u8],
8490 sa: &mut [SaSint],
8491 k: SaSint,
8492 buckets: &mut [SaSint],
8493 cache: &mut [ThreadCache],
8494 omp_block_start: FastSint,
8495 omp_block_size: FastSint,
8496) -> FastSint {
8497 if omp_block_size <= 0 {
8498 return 0;
8499 }
8500 let k_usize = usize::try_from(k).expect("k must be non-negative");
8501 buckets[..k_usize].fill(0);
8502 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
8503 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
8504 let mut count = 0usize;
8505 let mut i = start + size;
8506 while i > start {
8507 i -= 1;
8508 let mut p = sa[i];
8509 sa[i] = p & SAINT_MAX;
8510 if p > 0 {
8511 p -= 1;
8512 let p_usize = usize::try_from(p).expect("suffix index must be non-negative");
8513 let c0 = t[p_usize - usize::from(p > 0)] as SaSint;
8514 let c1 = t[p_usize] as SaSint;
8515 sa[i] = c1;
8516 buckets[c1 as usize] += 1;
8517 cache[count].symbol = c1;
8518 cache[count].index = if c0 <= c1 { p } else { c0 | SAINT_MIN };
8519 cache[count + 1].index = p;
8520 count += 2;
8521 }
8522 }
8523 count as FastSint
8524}
8525
8526#[doc(hidden)]
8528pub fn final_sorting_scan_right_to_left_8u_block_prepare(
8529 t: &[u8],
8530 sa: &mut [SaSint],
8531 k: SaSint,
8532 buckets: &mut [SaSint],
8533 cache: &mut [ThreadCache],
8534 omp_block_start: FastSint,
8535 omp_block_size: FastSint,
8536) -> FastSint {
8537 if omp_block_size <= 0 {
8538 return 0;
8539 }
8540
8541 let k_usize = usize::try_from(k).expect("k must be non-negative");
8542 buckets[..k_usize].fill(0);
8543
8544 let start =
8545 usize::try_from(omp_block_start).expect("omp_block_start must be non-negative") as FastSint;
8546 let mut i = omp_block_start + omp_block_size - 1;
8547 let mut j = start + 1;
8548 let mut count = 0usize;
8549
8550 while i >= j {
8551 let i0 = usize::try_from(i).expect("loop index must be non-negative");
8552 let i1 = usize::try_from(i - 1).expect("loop index must be non-negative");
8553
8554 let mut p0 = sa[i0];
8555 sa[i0] = p0 & SAINT_MAX;
8556 if p0 > 0 {
8557 p0 -= 1;
8558 let p0_usize = usize::try_from(p0).expect("suffix index must be non-negative");
8559 let c0 = t[p0_usize] as SaSint;
8560 buckets[c0 as usize] += 1;
8561 cache[count].symbol = c0;
8562 cache[count].index = p0
8563 | ((usize::from(t[p0_usize - usize::from(p0 > 0)] > t[p0_usize]) as SaSint)
8564 << (SAINT_BIT - 1));
8565 count += 1;
8566 }
8567
8568 let mut p1 = sa[i1];
8569 sa[i1] = p1 & SAINT_MAX;
8570 if p1 > 0 {
8571 p1 -= 1;
8572 let p1_usize = usize::try_from(p1).expect("suffix index must be non-negative");
8573 let c1 = t[p1_usize] as SaSint;
8574 buckets[c1 as usize] += 1;
8575 cache[count].symbol = c1;
8576 cache[count].index = p1
8577 | ((usize::from(t[p1_usize - usize::from(p1 > 0)] > t[p1_usize]) as SaSint)
8578 << (SAINT_BIT - 1));
8579 count += 1;
8580 }
8581
8582 i -= 2;
8583 }
8584
8585 j -= 1;
8586 while i >= j {
8587 let idx = usize::try_from(i).expect("loop index must be non-negative");
8588 let mut p = sa[idx];
8589 sa[idx] = p & SAINT_MAX;
8590 if p > 0 {
8591 p -= 1;
8592 let p_usize = usize::try_from(p).expect("suffix index must be non-negative");
8593 let c = t[p_usize] as SaSint;
8594 buckets[c as usize] += 1;
8595 cache[count].symbol = c;
8596 cache[count].index = p
8597 | ((usize::from(t[p_usize - usize::from(p > 0)] > t[p_usize]) as SaSint)
8598 << (SAINT_BIT - 1));
8599 count += 1;
8600 }
8601
8602 i -= 1;
8603 }
8604
8605 count as FastSint
8606}
8607
8608#[doc(hidden)]
8610pub fn final_order_scan_right_to_left_8u_block_place(
8611 sa: &mut [SaSint],
8612 buckets: &mut [SaSint],
8613 cache: &[ThreadCache],
8614 count: FastSint,
8615) {
8616 if count <= 0 {
8617 return;
8618 }
8619 let count_usize = usize::try_from(count).expect("count must be non-negative");
8620 for entry in &cache[..count_usize] {
8621 let symbol = usize::try_from(entry.symbol).expect("cache symbol must be non-negative");
8622 buckets[symbol] -= 1;
8623 let slot = usize::try_from(buckets[symbol]).expect("bucket slot must be non-negative");
8624 sa[slot] = entry.index;
8625 }
8626}
8627
8628#[doc(hidden)]
8630pub fn final_gsa_scan_right_to_left_8u_block_place(
8631 sa: &mut [SaSint],
8632 buckets: &mut [SaSint],
8633 cache: &[ThreadCache],
8634 count: FastSint,
8635) {
8636 if count <= 0 {
8637 return;
8638 }
8639 let count_usize = usize::try_from(count).expect("count must be non-negative");
8640 for entry in &cache[..count_usize] {
8641 if entry.symbol > 0 {
8642 let symbol = usize::try_from(entry.symbol).expect("cache symbol must be non-negative");
8643 buckets[symbol] -= 1;
8644 let slot = usize::try_from(buckets[symbol]).expect("bucket slot must be non-negative");
8645 sa[slot] = entry.index;
8646 }
8647 }
8648}
8649
8650#[doc(hidden)]
8652pub fn final_bwt_aux_scan_right_to_left_8u_block_place(
8653 sa: &mut [SaSint],
8654 rm: SaSint,
8655 i_out: &mut [SaSint],
8656 buckets: &mut [SaSint],
8657 cache: &[ThreadCache],
8658 count: FastSint,
8659) {
8660 if count <= 0 {
8661 return;
8662 }
8663 let count_usize = usize::try_from(count).expect("count must be non-negative");
8664 let mut i = 0usize;
8665 while i < count_usize {
8666 let symbol = usize::try_from(cache[i].symbol).expect("cache symbol must be non-negative");
8667 buckets[symbol] -= 1;
8668 let slot = usize::try_from(buckets[symbol]).expect("bucket slot must be non-negative");
8669 sa[slot] = cache[i].index;
8670 if (cache[i + 1].index & rm) == 0 {
8671 let sample_index = usize::try_from((cache[i + 1].index & SAINT_MAX) / (rm + 1))
8672 .expect("sample index must be non-negative");
8673 i_out[sample_index] = buckets[symbol] + 1;
8674 }
8675 i += 2;
8676 }
8677}
8678
8679#[doc(hidden)]
8681pub fn final_sorting_scan_right_to_left_32s_block_gather(
8682 t: &[SaSint],
8683 sa: &mut [SaSint],
8684 cache: &mut [ThreadCache],
8685 omp_block_start: FastSint,
8686 omp_block_size: FastSint,
8687) {
8688 if omp_block_size <= 0 {
8689 return;
8690 }
8691 let prefetch_distance = 64usize;
8692 let start = omp_block_start as usize;
8693 let block_end = start + omp_block_size as usize;
8694 let mut i = start;
8695 let mut j = block_end.saturating_sub(prefetch_distance + 1);
8696
8697 while i < j {
8698 let ci = i - start;
8699 let mut symbol0 = SAINT_MIN;
8700 let mut p0 = sa[i];
8701 sa[i] = p0 & SAINT_MAX;
8702 if p0 > 0 {
8703 p0 -= 1;
8704 let p0_usize = p0 as usize;
8705 cache[ci].index = p0
8706 | ((usize::from(t[p0_usize - usize::from(p0 > 0)] > t[p0_usize]) as SaSint)
8707 << (SAINT_BIT - 1));
8708 symbol0 = t[p0_usize];
8709 }
8710 cache[ci].symbol = symbol0;
8711
8712 let i1 = i + 1;
8713 let ci1 = i1 - start;
8714 let mut symbol1 = SAINT_MIN;
8715 let mut p1 = sa[i1];
8716 sa[i1] = p1 & SAINT_MAX;
8717 if p1 > 0 {
8718 p1 -= 1;
8719 let p1_usize = p1 as usize;
8720 cache[ci1].index = p1
8721 | ((usize::from(t[p1_usize - usize::from(p1 > 0)] > t[p1_usize]) as SaSint)
8722 << (SAINT_BIT - 1));
8723 symbol1 = t[p1_usize];
8724 }
8725 cache[ci1].symbol = symbol1;
8726
8727 i += 2;
8728 }
8729
8730 j = block_end;
8731 while i < j {
8732 let ci = i - start;
8733 let mut symbol = SAINT_MIN;
8734 let mut p = sa[i];
8735 sa[i] = p & SAINT_MAX;
8736 if p > 0 {
8737 p -= 1;
8738 let p_usize = p as usize;
8739 cache[ci].index = p
8740 | ((usize::from(t[p_usize - usize::from(p > 0)] > t[p_usize]) as SaSint)
8741 << (SAINT_BIT - 1));
8742 symbol = t[p_usize];
8743 }
8744 cache[ci].symbol = symbol;
8745 i += 1;
8746 }
8747}
8748
8749#[doc(hidden)]
8751pub fn final_sorting_scan_right_to_left_32s_block_sort(
8752 t: &[SaSint],
8753 induction_bucket: &mut [SaSint],
8754 cache: &mut [ThreadCache],
8755 omp_block_start: FastSint,
8756 omp_block_size: FastSint,
8757) {
8758 if omp_block_size <= 0 {
8759 return;
8760 }
8761 let prefetch_distance = 64usize;
8762 let start = omp_block_start as usize;
8763 let mut i = start + omp_block_size as usize - 1;
8764 let mut j = start + prefetch_distance + 1;
8765
8766 while i >= j {
8767 let ci = i - start;
8768 let v0 = cache[ci].symbol;
8769 if v0 >= 0 {
8770 let bucket_index0 = v0 as usize;
8771 induction_bucket[bucket_index0] -= 1;
8772 cache[ci].symbol = induction_bucket[bucket_index0];
8773 if cache[ci].symbol >= omp_block_start as SaSint {
8774 let ni = cache[ci].symbol as usize;
8775 let cni = ni - start;
8776 let mut np = cache[ci].index;
8777 cache[ci].index = np & SAINT_MAX;
8778 if np > 0 {
8779 np -= 1;
8780 let np_usize = np as usize;
8781 cache[cni].index = np
8782 | ((usize::from(t[np_usize - usize::from(np > 0)] > t[np_usize])
8783 as SaSint)
8784 << (SAINT_BIT - 1));
8785 cache[cni].symbol = t[np_usize];
8786 }
8787 }
8788 }
8789
8790 let i1 = i - 1;
8791 let ci1 = i1 - start;
8792 let v1 = cache[ci1].symbol;
8793 if v1 >= 0 {
8794 let bucket_index1 = v1 as usize;
8795 induction_bucket[bucket_index1] -= 1;
8796 cache[ci1].symbol = induction_bucket[bucket_index1];
8797 if cache[ci1].symbol >= omp_block_start as SaSint {
8798 let ni = cache[ci1].symbol as usize;
8799 let cni = ni - start;
8800 let mut np = cache[ci1].index;
8801 cache[ci1].index = np & SAINT_MAX;
8802 if np > 0 {
8803 np -= 1;
8804 let np_usize = np as usize;
8805 cache[cni].index = np
8806 | ((usize::from(t[np_usize - usize::from(np > 0)] > t[np_usize])
8807 as SaSint)
8808 << (SAINT_BIT - 1));
8809 cache[cni].symbol = t[np_usize];
8810 }
8811 }
8812 }
8813
8814 i -= 2;
8815 }
8816
8817 j -= prefetch_distance + 1;
8818 while i >= j {
8819 let ci = i - start;
8820 let v = cache[ci].symbol;
8821 if v >= 0 {
8822 let bucket_index = v as usize;
8823 induction_bucket[bucket_index] -= 1;
8824 cache[ci].symbol = induction_bucket[bucket_index];
8825 if cache[ci].symbol >= omp_block_start as SaSint {
8826 let ni = cache[ci].symbol as usize;
8827 let cni = ni - start;
8828 let mut np = cache[ci].index;
8829 cache[ci].index = np & SAINT_MAX;
8830 if np > 0 {
8831 np -= 1;
8832 let np_usize = np as usize;
8833 cache[cni].index = np
8834 | ((usize::from(t[np_usize - usize::from(np > 0)] > t[np_usize])
8835 as SaSint)
8836 << (SAINT_BIT - 1));
8837 cache[cni].symbol = t[np_usize];
8838 }
8839 }
8840 }
8841
8842 if i == 0 {
8843 break;
8844 }
8845 i -= 1;
8846 }
8847}
8848
8849#[doc(hidden)]
8851pub fn final_bwt_scan_right_to_left_8u_block_omp(
8852 t: &[u8],
8853 sa: &mut [SaSint],
8854 k: SaSint,
8855 induction_bucket: &mut [SaSint],
8856 block_start: FastSint,
8857 block_size: FastSint,
8858 threads: SaSint,
8859 thread_state: &mut [ThreadState],
8860) {
8861 if block_size <= 0 {
8862 return;
8863 }
8864 let k_usize = usize::try_from(k).expect("k must be non-negative");
8865 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
8866 let threads_usize = usize::try_from(threads.max(1)).expect("threads must be positive");
8867 let omp_num_threads = threads_usize.min(thread_state.len()).min(block_size_usize);
8868 if omp_num_threads <= 1 || block_size < 64 * k.max(256) as FastSint {
8869 let _ = final_bwt_scan_right_to_left_8u(t, sa, induction_bucket, block_start, block_size);
8870 return;
8871 }
8872
8873 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
8874 for (omp_thread_num, state) in thread_state.iter_mut().take(omp_num_threads).enumerate() {
8875 let omp_block_start = omp_thread_num * omp_block_stride;
8876 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
8877 omp_block_stride
8878 } else {
8879 block_size_usize - omp_block_start
8880 };
8881 state.count = final_bwt_scan_right_to_left_8u_block_prepare(
8882 t,
8883 sa,
8884 k,
8885 &mut state.buckets,
8886 &mut state.cache,
8887 block_start + omp_block_start as FastSint,
8888 omp_block_size as FastSint,
8889 );
8890 }
8891 for state in thread_state.iter_mut().take(omp_num_threads).rev() {
8892 for c in 0..k_usize {
8893 let a = induction_bucket[c];
8894 let b = state.buckets[c];
8895 induction_bucket[c] = a - b;
8896 state.buckets[c] = a;
8897 }
8898 }
8899 for state in thread_state.iter_mut().take(omp_num_threads) {
8900 final_order_scan_right_to_left_8u_block_place(
8901 sa,
8902 &mut state.buckets,
8903 &state.cache,
8904 state.count,
8905 );
8906 }
8907}
8908
8909#[doc(hidden)]
8911pub fn final_bwt_aux_scan_right_to_left_8u_block_omp(
8912 t: &[u8],
8913 sa: &mut [SaSint],
8914 k: SaSint,
8915 rm: SaSint,
8916 i_out: &mut [SaSint],
8917 induction_bucket: &mut [SaSint],
8918 block_start: FastSint,
8919 block_size: FastSint,
8920 threads: SaSint,
8921 thread_state: &mut [ThreadState],
8922) {
8923 if block_size <= 0 {
8924 return;
8925 }
8926 let k_usize = usize::try_from(k).expect("k must be non-negative");
8927 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
8928 let threads_usize = usize::try_from(threads.max(1)).expect("threads must be positive");
8929 let omp_num_threads = threads_usize.min(thread_state.len()).min(block_size_usize);
8930 if omp_num_threads <= 1 || block_size < 64 * k.max(256) as FastSint {
8931 final_bwt_aux_scan_right_to_left_8u(
8932 t,
8933 sa,
8934 rm,
8935 i_out,
8936 induction_bucket,
8937 block_start,
8938 block_size,
8939 );
8940 return;
8941 }
8942
8943 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
8944 for (omp_thread_num, state) in thread_state.iter_mut().take(omp_num_threads).enumerate() {
8945 let omp_block_start = omp_thread_num * omp_block_stride;
8946 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
8947 omp_block_stride
8948 } else {
8949 block_size_usize - omp_block_start
8950 };
8951 state.count = final_bwt_aux_scan_right_to_left_8u_block_prepare(
8952 t,
8953 sa,
8954 k,
8955 &mut state.buckets,
8956 &mut state.cache,
8957 block_start + omp_block_start as FastSint,
8958 omp_block_size as FastSint,
8959 );
8960 }
8961 for state in thread_state.iter_mut().take(omp_num_threads).rev() {
8962 for c in 0..k_usize {
8963 let a = induction_bucket[c];
8964 let b = state.buckets[c];
8965 induction_bucket[c] = a - b;
8966 state.buckets[c] = a;
8967 }
8968 }
8969 for state in thread_state.iter_mut().take(omp_num_threads) {
8970 final_bwt_aux_scan_right_to_left_8u_block_place(
8971 sa,
8972 rm,
8973 i_out,
8974 &mut state.buckets,
8975 &state.cache,
8976 state.count,
8977 );
8978 }
8979}
8980
8981#[doc(hidden)]
8983pub fn final_sorting_scan_right_to_left_8u_block_omp(
8984 t: &[u8],
8985 sa: &mut [SaSint],
8986 k: SaSint,
8987 induction_bucket: &mut [SaSint],
8988 block_start: FastSint,
8989 block_size: FastSint,
8990 threads: SaSint,
8991 thread_state: &mut [ThreadState],
8992) {
8993 if block_size <= 0 {
8994 return;
8995 }
8996 let k_usize = usize::try_from(k).expect("k must be non-negative");
8997 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
8998 let threads_usize = usize::try_from(threads.max(1)).expect("threads must be positive");
8999 let omp_num_threads = threads_usize.min(thread_state.len()).min(block_size_usize);
9000 if omp_num_threads <= 1 || block_size < 64 * k.max(256) as FastSint {
9001 final_sorting_scan_right_to_left_8u(t, sa, induction_bucket, block_start, block_size);
9002 return;
9003 }
9004
9005 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
9006 for (omp_thread_num, state) in thread_state.iter_mut().take(omp_num_threads).enumerate() {
9007 let omp_block_start = omp_thread_num * omp_block_stride;
9008 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
9009 omp_block_stride
9010 } else {
9011 block_size_usize - omp_block_start
9012 };
9013 state.count = final_sorting_scan_right_to_left_8u_block_prepare(
9014 t,
9015 sa,
9016 k,
9017 &mut state.buckets,
9018 &mut state.cache,
9019 block_start + omp_block_start as FastSint,
9020 omp_block_size as FastSint,
9021 );
9022 }
9023 for state in thread_state.iter_mut().take(omp_num_threads).rev() {
9024 for c in 0..k_usize {
9025 let a = induction_bucket[c];
9026 let b = state.buckets[c];
9027 induction_bucket[c] = a - b;
9028 state.buckets[c] = a;
9029 }
9030 }
9031 for state in thread_state.iter_mut().take(omp_num_threads) {
9032 final_order_scan_right_to_left_8u_block_place(
9033 sa,
9034 &mut state.buckets,
9035 &state.cache,
9036 state.count,
9037 );
9038 }
9039}
9040
9041#[doc(hidden)]
9043pub fn final_gsa_scan_right_to_left_8u_block_omp(
9044 t: &[u8],
9045 sa: &mut [SaSint],
9046 k: SaSint,
9047 induction_bucket: &mut [SaSint],
9048 block_start: FastSint,
9049 block_size: FastSint,
9050 threads: SaSint,
9051 thread_state: &mut [ThreadState],
9052) {
9053 if block_size <= 0 {
9054 return;
9055 }
9056 let k_usize = usize::try_from(k).expect("k must be non-negative");
9057 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
9058 let threads_usize = usize::try_from(threads.max(1)).expect("threads must be positive");
9059 let omp_num_threads = threads_usize.min(thread_state.len()).min(block_size_usize);
9060 if omp_num_threads <= 1 || block_size < 64 * k.max(256) as FastSint {
9061 final_gsa_scan_right_to_left_8u(t, sa, induction_bucket, block_start, block_size);
9062 return;
9063 }
9064
9065 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
9066 for (omp_thread_num, state) in thread_state.iter_mut().take(omp_num_threads).enumerate() {
9067 let omp_block_start = omp_thread_num * omp_block_stride;
9068 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
9069 omp_block_stride
9070 } else {
9071 block_size_usize - omp_block_start
9072 };
9073 state.count = final_sorting_scan_right_to_left_8u_block_prepare(
9074 t,
9075 sa,
9076 k,
9077 &mut state.buckets,
9078 &mut state.cache,
9079 block_start + omp_block_start as FastSint,
9080 omp_block_size as FastSint,
9081 );
9082 }
9083 for state in thread_state.iter_mut().take(omp_num_threads).rev() {
9084 for c in 0..k_usize {
9085 let a = induction_bucket[c];
9086 let b = state.buckets[c];
9087 induction_bucket[c] = a - b;
9088 state.buckets[c] = a;
9089 }
9090 }
9091 for state in thread_state.iter_mut().take(omp_num_threads) {
9092 final_gsa_scan_right_to_left_8u_block_place(
9093 sa,
9094 &mut state.buckets,
9095 &state.cache,
9096 state.count,
9097 );
9098 }
9099}
9100
9101#[doc(hidden)]
9103pub fn final_sorting_scan_right_to_left_32s_block_omp(
9104 t: &[SaSint],
9105 sa: &mut [SaSint],
9106 buckets: &mut [SaSint],
9107 cache: &mut [ThreadCache],
9108 block_start: FastSint,
9109 block_size: FastSint,
9110 threads: SaSint,
9111) {
9112 if threads <= 1 || block_size < 16_384 {
9113 final_sorting_scan_right_to_left_32s(t, sa, buckets, block_start, block_size);
9114 return;
9115 }
9116
9117 final_sorting_scan_right_to_left_32s_block_gather(t, sa, cache, block_start, block_size);
9118 final_sorting_scan_right_to_left_32s_block_sort(t, buckets, cache, block_start, block_size);
9119 let block_size_usize = usize::try_from(block_size).expect("block_size must be non-negative");
9120 let threads_usize = usize::try_from(threads.max(1)).expect("threads must be positive");
9121 let omp_num_threads = threads_usize.min(block_size_usize);
9122 let omp_block_stride = (block_size_usize / omp_num_threads) & !15usize;
9123 for omp_thread_num in 0..omp_num_threads {
9124 let omp_block_start = omp_thread_num * omp_block_stride;
9125 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
9126 omp_block_stride
9127 } else {
9128 block_size_usize - omp_block_start
9129 };
9130 compact_and_place_cached_suffixes(
9131 sa,
9132 cache,
9133 omp_block_start as FastSint,
9134 omp_block_size as FastSint,
9135 );
9136 }
9137}
9138
9139#[doc(hidden)]
9141pub fn final_bwt_scan_right_to_left_8u_omp(
9142 t: &[u8],
9143 sa: &mut [SaSint],
9144 n: SaSint,
9145 k: SaSint,
9146 induction_bucket: &mut [SaSint],
9147 threads: SaSint,
9148 thread_state: &mut [ThreadState],
9149) -> SaSint {
9150 if threads == 1 || n < 65_536 {
9151 return final_bwt_scan_right_to_left_8u(t, sa, induction_bucket, 0, n as FastSint);
9152 }
9153 let mut index = -1;
9154 let mut block_start = usize::try_from(n).expect("n must be non-negative");
9155 while block_start > 0 {
9156 block_start -= 1;
9157 if sa[block_start] == 0 {
9158 index = block_start as SaSint;
9159 } else {
9160 let threads_usize = usize::try_from(threads)
9161 .expect("threads must be non-negative")
9162 .min(thread_state.len())
9163 .max(1);
9164 let max_back =
9165 threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE.saturating_sub(16 * threads_usize);
9166 let block_max_end = block_start.saturating_sub(max_back);
9167 let mut block_end = block_start;
9168 while block_end > block_max_end && sa[block_end - 1] != 0 {
9169 block_end -= 1;
9170 }
9171 let size = block_start - block_end + 1;
9172 if size < 32 {
9173 let res = final_bwt_scan_right_to_left_8u(
9174 t,
9175 sa,
9176 induction_bucket,
9177 block_end as FastSint,
9178 size as FastSint,
9179 );
9180 if res >= 0 {
9181 index = res;
9182 }
9183 } else {
9184 final_bwt_scan_right_to_left_8u_block_omp(
9185 t,
9186 sa,
9187 k,
9188 induction_bucket,
9189 block_end as FastSint,
9190 size as FastSint,
9191 threads,
9192 thread_state,
9193 );
9194 }
9195 block_start = block_end;
9196 }
9197 }
9198 index
9199}
9200
9201#[doc(hidden)]
9203pub fn final_bwt_aux_scan_right_to_left_8u_omp(
9204 t: &[u8],
9205 sa: &mut [SaSint],
9206 n: SaSint,
9207 k: SaSint,
9208 rm: SaSint,
9209 i_out: &mut [SaSint],
9210 induction_bucket: &mut [SaSint],
9211 threads: SaSint,
9212 thread_state: &mut [ThreadState],
9213) {
9214 if threads == 1 || n < 65_536 {
9215 final_bwt_aux_scan_right_to_left_8u(t, sa, rm, i_out, induction_bucket, 0, n as FastSint);
9216 return;
9217 }
9218 let mut block_start = usize::try_from(n).expect("n must be non-negative");
9219 while block_start > 0 {
9220 block_start -= 1;
9221 if sa[block_start] != 0 {
9222 let threads_usize = usize::try_from(threads)
9223 .expect("threads must be non-negative")
9224 .min(thread_state.len())
9225 .max(1);
9226 let max_back = threads_usize
9227 * (LIBSAIS_PER_THREAD_CACHE_SIZE.saturating_sub(16 * threads_usize) / 2);
9228 let block_max_end = block_start.saturating_sub(max_back);
9229 let mut block_end = block_start;
9230 while block_end > block_max_end && sa[block_end - 1] != 0 {
9231 block_end -= 1;
9232 }
9233 let size = block_start - block_end + 1;
9234 if size < 32 {
9235 final_bwt_aux_scan_right_to_left_8u(
9236 t,
9237 sa,
9238 rm,
9239 i_out,
9240 induction_bucket,
9241 block_end as FastSint,
9242 size as FastSint,
9243 );
9244 } else {
9245 final_bwt_aux_scan_right_to_left_8u_block_omp(
9246 t,
9247 sa,
9248 k,
9249 rm,
9250 i_out,
9251 induction_bucket,
9252 block_end as FastSint,
9253 size as FastSint,
9254 threads,
9255 thread_state,
9256 );
9257 }
9258 block_start = block_end;
9259 }
9260 }
9261}
9262
9263#[doc(hidden)]
9265pub fn final_sorting_scan_right_to_left_8u_omp(
9266 t: &[u8],
9267 sa: &mut [SaSint],
9268 omp_block_start: FastSint,
9269 omp_block_size: FastSint,
9270 k: SaSint,
9271 induction_bucket: &mut [SaSint],
9272 threads: SaSint,
9273 thread_state: &mut [ThreadState],
9274) {
9275 if threads == 1 || omp_block_size < 65_536 {
9276 final_sorting_scan_right_to_left_8u(
9277 t,
9278 sa,
9279 induction_bucket,
9280 omp_block_start,
9281 omp_block_size,
9282 );
9283 return;
9284 }
9285 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
9286 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
9287 let mut block_start = start + size;
9288 while block_start > start {
9289 block_start -= 1;
9290 if sa[block_start] != 0 {
9291 let threads_usize = usize::try_from(threads)
9292 .expect("threads must be non-negative")
9293 .min(thread_state.len())
9294 .max(1);
9295 let max_back =
9296 threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE.saturating_sub(16 * threads_usize);
9297 let block_max_end = block_start.saturating_sub(max_back).max(start);
9298 let mut block_end = block_start;
9299 while block_end > block_max_end && sa[block_end - 1] != 0 {
9300 block_end -= 1;
9301 }
9302 let span = block_start - block_end + 1;
9303 if span < 32 {
9304 final_sorting_scan_right_to_left_8u(
9305 t,
9306 sa,
9307 induction_bucket,
9308 block_end as FastSint,
9309 span as FastSint,
9310 );
9311 } else {
9312 final_sorting_scan_right_to_left_8u_block_omp(
9313 t,
9314 sa,
9315 k,
9316 induction_bucket,
9317 block_end as FastSint,
9318 span as FastSint,
9319 threads,
9320 thread_state,
9321 );
9322 }
9323 block_start = block_end;
9324 }
9325 }
9326}
9327
9328#[doc(hidden)]
9330pub fn final_gsa_scan_right_to_left_8u_omp(
9331 t: &[u8],
9332 sa: &mut [SaSint],
9333 omp_block_start: FastSint,
9334 omp_block_size: FastSint,
9335 k: SaSint,
9336 induction_bucket: &mut [SaSint],
9337 threads: SaSint,
9338 thread_state: &mut [ThreadState],
9339) {
9340 if threads == 1 || omp_block_size < 65_536 {
9341 final_gsa_scan_right_to_left_8u(t, sa, induction_bucket, omp_block_start, omp_block_size);
9342 return;
9343 }
9344 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
9345 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
9346 let mut block_start = start + size;
9347 while block_start > start {
9348 block_start -= 1;
9349 if sa[block_start] != 0 {
9350 let threads_usize = usize::try_from(threads)
9351 .expect("threads must be non-negative")
9352 .min(thread_state.len())
9353 .max(1);
9354 let max_back =
9355 threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE.saturating_sub(16 * threads_usize);
9356 let block_max_end = block_start.saturating_sub(max_back).max(start);
9357 let mut block_end = block_start;
9358 while block_end > block_max_end && sa[block_end - 1] != 0 {
9359 block_end -= 1;
9360 }
9361 let span = block_start - block_end + 1;
9362 if span < 32 {
9363 final_gsa_scan_right_to_left_8u(
9364 t,
9365 sa,
9366 induction_bucket,
9367 block_end as FastSint,
9368 span as FastSint,
9369 );
9370 } else {
9371 final_gsa_scan_right_to_left_8u_block_omp(
9372 t,
9373 sa,
9374 k,
9375 induction_bucket,
9376 block_end as FastSint,
9377 span as FastSint,
9378 threads,
9379 thread_state,
9380 );
9381 }
9382 block_start = block_end;
9383 }
9384 }
9385}
9386
9387#[doc(hidden)]
9389pub fn final_sorting_scan_right_to_left_32s_omp(
9390 t: &[SaSint],
9391 sa: &mut [SaSint],
9392 n: SaSint,
9393 induction_bucket: &mut [SaSint],
9394 threads: SaSint,
9395 thread_state: &mut [ThreadState],
9396) {
9397 if threads == 1 || n < 65_536 {
9398 final_sorting_scan_right_to_left_32s(t, sa, induction_bucket, 0, n as FastSint);
9399 return;
9400 }
9401 if thread_state.is_empty() {
9402 final_sorting_scan_right_to_left_32s(t, sa, induction_bucket, 0, n as FastSint);
9403 return;
9404 }
9405 let threads_usize = usize::try_from(threads)
9406 .expect("threads must be non-negative")
9407 .max(1);
9408 let mut cache = vec![ThreadCache::default(); threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE];
9409 let mut block_start = isize::try_from(n).expect("n must fit isize") - 1;
9410 while block_start >= 0 {
9411 let block_end = (block_start
9412 - isize::try_from(threads_usize * LIBSAIS_PER_THREAD_CACHE_SIZE)
9413 .expect("block span must fit isize"))
9414 .max(-1);
9415 final_sorting_scan_right_to_left_32s_block_omp(
9416 t,
9417 sa,
9418 induction_bucket,
9419 &mut cache,
9420 (block_end + 1) as FastSint,
9421 (block_start - block_end) as FastSint,
9422 threads,
9423 );
9424 block_start = block_end;
9425 }
9426}
9427
9428#[doc(hidden)]
9430pub fn clear_lms_suffixes_omp(
9431 sa: &mut [SaSint],
9432 n: SaSint,
9433 k: SaSint,
9434 bucket_start: &[SaSint],
9435 bucket_end: &[SaSint],
9436 threads: SaSint,
9437) {
9438 let k_usize = usize::try_from(k).expect("k must be non-negative");
9439 let thread_count = if threads > 1 && n >= 65536 {
9440 usize::try_from(threads).expect("threads must be positive")
9441 } else {
9442 1
9443 };
9444 for t in 0..thread_count {
9445 let mut c = t;
9446 while c < k_usize {
9447 if bucket_end[c] > bucket_start[c] {
9448 let start =
9449 usize::try_from(bucket_start[c]).expect("bucket start must be non-negative");
9450 let end = usize::try_from(bucket_end[c]).expect("bucket end must be non-negative");
9451 sa[start..end].fill(0);
9452 }
9453 c += thread_count;
9454 }
9455 }
9456}
9457
9458#[doc(hidden)]
9460pub fn induce_final_order_8u_omp(
9461 t: &[u8],
9462 sa: &mut [SaSint],
9463 n: SaSint,
9464 k: SaSint,
9465 flags: SaSint,
9466 r: SaSint,
9467 i_out: Option<&mut [SaSint]>,
9468 buckets: &mut [SaSint],
9469 threads: SaSint,
9470 thread_state: &mut [ThreadState],
9471) -> SaSint {
9472 if (flags & LIBSAIS_FLAGS_BWT) == 0 {
9473 if (flags & LIBSAIS_FLAGS_GSA) != 0 {
9474 buckets[6 * ALPHABET_SIZE] = buckets[7 * ALPHABET_SIZE] - 1;
9475 }
9476
9477 let (left_buckets, right_tail) = buckets.split_at_mut(7 * ALPHABET_SIZE);
9478 let bucket_start = &mut left_buckets[6 * ALPHABET_SIZE..7 * ALPHABET_SIZE];
9479 let bucket_end = &mut right_tail[..ALPHABET_SIZE];
9480
9481 final_sorting_scan_left_to_right_8u_omp(
9482 t,
9483 sa,
9484 n as FastSint,
9485 k,
9486 bucket_start,
9487 threads,
9488 thread_state,
9489 );
9490 if threads > 1 && n >= 65_536 {
9491 clear_lms_suffixes_omp(
9492 sa,
9493 n,
9494 ALPHABET_SIZE as SaSint,
9495 bucket_start,
9496 bucket_end,
9497 threads,
9498 );
9499 }
9500
9501 if (flags & LIBSAIS_FLAGS_GSA) != 0 {
9502 flip_suffix_markers_omp(sa, bucket_end[0], threads);
9503 final_gsa_scan_right_to_left_8u_omp(
9504 t,
9505 sa,
9506 bucket_end[0] as FastSint,
9507 n as FastSint - bucket_end[0] as FastSint,
9508 k,
9509 bucket_end,
9510 1,
9511 thread_state,
9512 );
9513 } else {
9514 final_sorting_scan_right_to_left_8u_omp(
9515 t,
9516 sa,
9517 0,
9518 n as FastSint,
9519 k,
9520 bucket_end,
9521 threads,
9522 thread_state,
9523 );
9524 }
9525
9526 0
9527 } else if let Some(i_out) = i_out {
9528 let (left_buckets, right_tail) = buckets.split_at_mut(7 * ALPHABET_SIZE);
9529 let bucket_start = &mut left_buckets[6 * ALPHABET_SIZE..7 * ALPHABET_SIZE];
9530 let bucket_end = &mut right_tail[..ALPHABET_SIZE];
9531
9532 final_bwt_aux_scan_left_to_right_8u_omp(
9533 t,
9534 sa,
9535 n as FastSint,
9536 k,
9537 r - 1,
9538 i_out,
9539 bucket_start,
9540 threads,
9541 thread_state,
9542 );
9543 if threads > 1 && n >= 65_536 {
9544 clear_lms_suffixes_omp(
9545 sa,
9546 n,
9547 ALPHABET_SIZE as SaSint,
9548 bucket_start,
9549 bucket_end,
9550 threads,
9551 );
9552 }
9553 final_bwt_aux_scan_right_to_left_8u_omp(
9554 t,
9555 sa,
9556 n,
9557 k,
9558 r - 1,
9559 i_out,
9560 bucket_end,
9561 threads,
9562 thread_state,
9563 );
9564 0
9565 } else {
9566 let (left_buckets, right_tail) = buckets.split_at_mut(7 * ALPHABET_SIZE);
9567 let bucket_start = &mut left_buckets[6 * ALPHABET_SIZE..7 * ALPHABET_SIZE];
9568 let bucket_end = &mut right_tail[..ALPHABET_SIZE];
9569
9570 final_bwt_scan_left_to_right_8u_omp(
9571 t,
9572 sa,
9573 n as FastSint,
9574 k,
9575 bucket_start,
9576 threads,
9577 thread_state,
9578 );
9579 if threads > 1 && n >= 65_536 {
9580 clear_lms_suffixes_omp(
9581 sa,
9582 n,
9583 ALPHABET_SIZE as SaSint,
9584 bucket_start,
9585 bucket_end,
9586 threads,
9587 );
9588 }
9589 final_bwt_scan_right_to_left_8u_omp(t, sa, n, k, bucket_end, threads, thread_state)
9590 }
9591}
9592
9593#[doc(hidden)]
9595pub fn induce_final_order_32s_6k(
9596 t: &[SaSint],
9597 sa: &mut [SaSint],
9598 n: SaSint,
9599 k: SaSint,
9600 buckets: &mut [SaSint],
9601 threads: SaSint,
9602 thread_state: &mut [ThreadState],
9603) {
9604 let k_usize = usize::try_from(k).expect("k must be non-negative");
9605 let (_head, tail) = buckets.split_at_mut(4 * k_usize);
9606 let (left, right) = tail.split_at_mut(k_usize);
9607 final_sorting_scan_left_to_right_32s_omp(t, sa, n, left, threads, thread_state);
9608 final_sorting_scan_right_to_left_32s_omp(t, sa, n, right, threads, thread_state);
9609}
9610
9611#[doc(hidden)]
9613pub fn induce_final_order_32s_4k(
9614 t: &[SaSint],
9615 sa: &mut [SaSint],
9616 n: SaSint,
9617 k: SaSint,
9618 buckets: &mut [SaSint],
9619 threads: SaSint,
9620 thread_state: &mut [ThreadState],
9621) {
9622 let k_usize = usize::try_from(k).expect("k must be non-negative");
9623 let (_head, tail) = buckets.split_at_mut(2 * k_usize);
9624 let (left, right) = tail.split_at_mut(k_usize);
9625 final_sorting_scan_left_to_right_32s_omp(t, sa, n, left, threads, thread_state);
9626 final_sorting_scan_right_to_left_32s_omp(t, sa, n, right, threads, thread_state);
9627}
9628
9629#[doc(hidden)]
9631pub fn induce_final_order_32s_2k(
9632 t: &[SaSint],
9633 sa: &mut [SaSint],
9634 n: SaSint,
9635 k: SaSint,
9636 buckets: &mut [SaSint],
9637 threads: SaSint,
9638 thread_state: &mut [ThreadState],
9639) {
9640 let k_usize = usize::try_from(k).expect("k must be non-negative");
9641 let (right, left) = buckets.split_at_mut(k_usize);
9642 final_sorting_scan_left_to_right_32s_omp(t, sa, n, left, threads, thread_state);
9643 final_sorting_scan_right_to_left_32s_omp(t, sa, n, right, threads, thread_state);
9644}
9645
9646#[doc(hidden)]
9648pub fn induce_final_order_32s_1k(
9649 t: &[SaSint],
9650 sa: &mut [SaSint],
9651 n: SaSint,
9652 k: SaSint,
9653 buckets: &mut [SaSint],
9654 threads: SaSint,
9655 thread_state: &mut [ThreadState],
9656) {
9657 count_suffixes_32s(t, n, k, buckets);
9658 initialize_buckets_start_32s_1k(k, buckets);
9659 final_sorting_scan_left_to_right_32s_omp(t, sa, n, buckets, threads, thread_state);
9660
9661 count_suffixes_32s(t, n, k, buckets);
9662 initialize_buckets_end_32s_1k(k, buckets);
9663 final_sorting_scan_right_to_left_32s_omp(t, sa, n, buckets, threads, thread_state);
9664}
9665
9666#[doc(hidden)]
9668pub fn renumber_unique_and_nonunique_lms_suffixes_32s(
9669 t: &mut [SaSint],
9670 sa: &mut [SaSint],
9671 m: SaSint,
9672 mut f: SaSint,
9673 omp_block_start: FastSint,
9674 omp_block_size: FastSint,
9675) -> SaSint {
9676 if omp_block_size <= 0 {
9677 return f;
9678 }
9679
9680 let prefetch_distance = 64 as SaSint;
9681 let m_usize = usize::try_from(m).expect("m must be non-negative");
9682 let (sa_head, sam) = sa.split_at_mut(m_usize);
9683 let mut i = omp_block_start as SaSint;
9684 let mut j = omp_block_start as SaSint + omp_block_size as SaSint - 2 * prefetch_distance - 3;
9685
9686 while i < j {
9687 let p0 = sa_head[i as usize] as SaUint;
9688 let p0_half = (p0 >> 1) as usize;
9689 let mut s0 = sam[p0_half];
9690 if s0 < 0 {
9691 t[p0 as usize] |= SAINT_MIN;
9692 f += 1;
9693 s0 = i + SAINT_MIN + f;
9694 }
9695 sam[p0_half] = s0 - f;
9696
9697 let p1 = sa_head[(i + 1) as usize] as SaUint;
9698 let p1_half = (p1 >> 1) as usize;
9699 let mut s1 = sam[p1_half];
9700 if s1 < 0 {
9701 t[p1 as usize] |= SAINT_MIN;
9702 f += 1;
9703 s1 = i + 1 + SAINT_MIN + f;
9704 }
9705 sam[p1_half] = s1 - f;
9706
9707 let p2 = sa_head[(i + 2) as usize] as SaUint;
9708 let p2_half = (p2 >> 1) as usize;
9709 let mut s2 = sam[p2_half];
9710 if s2 < 0 {
9711 t[p2 as usize] |= SAINT_MIN;
9712 f += 1;
9713 s2 = i + 2 + SAINT_MIN + f;
9714 }
9715 sam[p2_half] = s2 - f;
9716
9717 let p3 = sa_head[(i + 3) as usize] as SaUint;
9718 let p3_half = (p3 >> 1) as usize;
9719 let mut s3 = sam[p3_half];
9720 if s3 < 0 {
9721 t[p3 as usize] |= SAINT_MIN;
9722 f += 1;
9723 s3 = i + 3 + SAINT_MIN + f;
9724 }
9725 sam[p3_half] = s3 - f;
9726
9727 i += 4;
9728 }
9729
9730 j += 2 * prefetch_distance + 3;
9731 while i < j {
9732 let p = sa_head[i as usize] as SaUint;
9733 let p_half = (p >> 1) as usize;
9734 let mut s = sam[p_half];
9735 if s < 0 {
9736 t[p as usize] |= SAINT_MIN;
9737 f += 1;
9738 s = i + SAINT_MIN + f;
9739 }
9740 sam[p_half] = s - f;
9741 i += 1;
9742 }
9743
9744 f
9745}
9746
9747#[doc(hidden)]
9749pub fn compact_unique_and_nonunique_lms_suffixes_32s(
9750 sa: &mut [SaSint],
9751 m: SaSint,
9752 pl: &mut FastSint,
9753 pr: &mut FastSint,
9754 omp_block_start: FastSint,
9755 omp_block_size: FastSint,
9756) {
9757 if omp_block_size <= 0 {
9758 return;
9759 }
9760
9761 let m_usize = usize::try_from(m).expect("m must be non-negative");
9762 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
9763 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
9764
9765 let source: Vec<SaSint> = sa[m_usize + start..m_usize + start + size].to_vec();
9766 let mut l = usize::try_from(*pl - 1).expect("left position must be positive");
9767 let mut r = usize::try_from(*pr - 1).expect("right position must be positive");
9768
9769 for &p in source.iter().rev() {
9770 let pu = p as SaUint;
9771 sa[l] = (pu & SAINT_MAX as SaUint) as SaSint;
9772 l = l.saturating_sub(usize::from((pu as SaSint) < 0));
9773
9774 sa[r] = pu.wrapping_sub(1) as SaSint;
9775 r = r.saturating_sub(usize::from((pu as SaSint) > 0));
9776 }
9777
9778 *pl = l as FastSint + 1;
9779 *pr = r as FastSint + 1;
9780}
9781
9782#[doc(hidden)]
9784pub fn count_unique_suffixes(
9785 sa: &[SaSint],
9786 m: SaSint,
9787 omp_block_start: FastSint,
9788 omp_block_size: FastSint,
9789) -> SaSint {
9790 if omp_block_size <= 0 {
9791 return 0;
9792 }
9793
9794 let m_usize = usize::try_from(m).expect("m must be non-negative");
9795 let sam = &sa[m_usize..];
9796 let mut i = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
9797 let block_end =
9798 i + usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
9799 let j = block_end.saturating_sub(67);
9800 let mut f0 = 0;
9801 let mut f1 = 0;
9802 let mut f2 = 0;
9803 let mut f3 = 0;
9804
9805 while i < j {
9806 f0 += SaSint::from(
9807 sam[usize::try_from((sa[i] as SaUint) >> 1).expect("name slot must fit usize")] < 0,
9808 );
9809 f1 += SaSint::from(
9810 sam[usize::try_from((sa[i + 1] as SaUint) >> 1).expect("name slot must fit usize")] < 0,
9811 );
9812 f2 += SaSint::from(
9813 sam[usize::try_from((sa[i + 2] as SaUint) >> 1).expect("name slot must fit usize")] < 0,
9814 );
9815 f3 += SaSint::from(
9816 sam[usize::try_from((sa[i + 3] as SaUint) >> 1).expect("name slot must fit usize")] < 0,
9817 );
9818 i += 4;
9819 }
9820
9821 while i < block_end {
9822 f0 += SaSint::from(
9823 sam[usize::try_from((sa[i] as SaUint) >> 1).expect("name slot must fit usize")] < 0,
9824 );
9825 i += 1;
9826 }
9827
9828 f0 + f1 + f2 + f3
9829}
9830
9831#[doc(hidden)]
9833pub fn renumber_unique_and_nonunique_lms_suffixes_32s_omp(
9834 t: &mut [SaSint],
9835 sa: &mut [SaSint],
9836 m: SaSint,
9837 threads: SaSint,
9838 thread_state: &mut [ThreadState],
9839) -> SaSint {
9840 let mut f = 0;
9841 if threads == 1 || m < 65_536 {
9842 f = renumber_unique_and_nonunique_lms_suffixes_32s(t, sa, m, 0, 0, m as FastSint);
9843 } else {
9844 let threads_usize = usize::try_from(threads)
9845 .expect("threads must be non-negative")
9846 .max(1);
9847 let m_usize = usize::try_from(m).expect("m must be non-negative");
9848 let omp_num_threads = threads_usize.min(m_usize.max(1));
9849 let omp_block_stride = (m_usize / omp_num_threads) & !15usize;
9850
9851 for omp_thread_num in 0..omp_num_threads {
9852 let omp_block_start = omp_thread_num * omp_block_stride;
9853 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
9854 omp_block_stride
9855 } else {
9856 m_usize - omp_block_start
9857 };
9858
9859 thread_state[omp_thread_num].count = count_unique_suffixes(
9860 sa,
9861 m,
9862 omp_block_start as FastSint,
9863 omp_block_size as FastSint,
9864 ) as FastSint;
9865 }
9866
9867 let mut count = 0 as FastSint;
9868 for omp_thread_num in 0..omp_num_threads {
9869 let omp_block_start = omp_thread_num * omp_block_stride;
9870 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
9871 omp_block_stride
9872 } else {
9873 m_usize - omp_block_start
9874 };
9875
9876 if omp_thread_num + 1 == omp_num_threads {
9877 f = (count + thread_state[omp_thread_num].count) as SaSint;
9878 }
9879
9880 renumber_unique_and_nonunique_lms_suffixes_32s(
9881 t,
9882 sa,
9883 m,
9884 count as SaSint,
9885 omp_block_start as FastSint,
9886 omp_block_size as FastSint,
9887 );
9888 count += thread_state[omp_thread_num].count;
9889 }
9890 }
9891
9892 f
9893}
9894
9895#[doc(hidden)]
9897pub fn compact_unique_and_nonunique_lms_suffixes_32s_omp(
9898 sa: &mut [SaSint],
9899 n: SaSint,
9900 m: SaSint,
9901 fs: SaSint,
9902 f: SaSint,
9903 threads: SaSint,
9904 thread_state: &mut [ThreadState],
9905) {
9906 let half_n = (n as FastSint) >> 1;
9907 if threads == 1 || n < 131_072 || m >= fs {
9908 let mut l = m as FastSint;
9909 let mut r = n as FastSint + fs as FastSint;
9910 compact_unique_and_nonunique_lms_suffixes_32s(sa, m, &mut l, &mut r, 0, half_n);
9911 } else {
9912 let threads_usize = usize::try_from(threads)
9913 .expect("threads must be non-negative")
9914 .max(1);
9915 let half_n_usize = usize::try_from(half_n).expect("half_n must be non-negative");
9916 let omp_num_threads = threads_usize.min(half_n_usize.max(1));
9917 let omp_block_stride = (half_n_usize / omp_num_threads) & !15usize;
9918
9919 for omp_thread_num in 0..omp_num_threads {
9920 let omp_block_start = omp_thread_num * omp_block_stride;
9921 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
9922 omp_block_stride
9923 } else {
9924 half_n_usize - omp_block_start
9925 };
9926
9927 thread_state[omp_thread_num].position =
9928 m as FastSint + half_n + omp_block_start as FastSint + omp_block_size as FastSint;
9929 thread_state[omp_thread_num].count =
9930 m as FastSint + omp_block_start as FastSint + omp_block_size as FastSint;
9931
9932 let mut position = thread_state[omp_thread_num].position;
9933 let mut count = thread_state[omp_thread_num].count;
9934 compact_unique_and_nonunique_lms_suffixes_32s(
9935 sa,
9936 m,
9937 &mut position,
9938 &mut count,
9939 omp_block_start as FastSint,
9940 omp_block_size as FastSint,
9941 );
9942 thread_state[omp_thread_num].position = position;
9943 thread_state[omp_thread_num].count = count;
9944 }
9945
9946 let mut position = m as FastSint;
9947 for t in (0..omp_num_threads).rev() {
9948 let omp_block_end = if t + 1 < omp_num_threads {
9949 omp_block_stride * (t + 1)
9950 } else {
9951 half_n_usize
9952 };
9953 let count =
9954 m as FastSint + half_n + omp_block_end as FastSint - thread_state[t].position;
9955 if count > 0 {
9956 position -= count;
9957 let dst = usize::try_from(position).expect("destination must be non-negative");
9958 let src =
9959 usize::try_from(thread_state[t].position).expect("source must be non-negative");
9960 let len = usize::try_from(count).expect("length must be non-negative");
9961 sa.copy_within(src..src + len, dst);
9962 }
9963 }
9964
9965 let mut position = n as FastSint + fs as FastSint;
9966 for t in (0..omp_num_threads).rev() {
9967 let omp_block_end = if t + 1 < omp_num_threads {
9968 omp_block_stride * (t + 1)
9969 } else {
9970 half_n_usize
9971 };
9972 let count = m as FastSint + omp_block_end as FastSint - thread_state[t].count;
9973 if count > 0 {
9974 position -= count;
9975 let dst = usize::try_from(position).expect("destination must be non-negative");
9976 let src =
9977 usize::try_from(thread_state[t].count).expect("source must be non-negative");
9978 let len = usize::try_from(count).expect("length must be non-negative");
9979 sa.copy_within(src..src + len, dst);
9980 }
9981 }
9982 }
9983
9984 let copy_dst = usize::try_from(n + fs - m).expect("copy destination must be non-negative");
9985 let copy_src = usize::try_from(m - f).expect("copy source must be non-negative");
9986 let copy_len = usize::try_from(f).expect("copy length must be non-negative");
9987 sa.copy_within(copy_src..copy_src + copy_len, copy_dst);
9988}
9989
9990#[doc(hidden)]
9992pub fn compact_lms_suffixes_32s_omp(
9993 t: &mut [SaSint],
9994 sa: &mut [SaSint],
9995 n: SaSint,
9996 m: SaSint,
9997 fs: SaSint,
9998 threads: SaSint,
9999 thread_state: &mut [ThreadState],
10000) -> SaSint {
10001 let f = renumber_unique_and_nonunique_lms_suffixes_32s_omp(t, sa, m, threads, thread_state);
10002 compact_unique_and_nonunique_lms_suffixes_32s_omp(sa, n, m, fs, f, threads, thread_state);
10003 f
10004}
10005
10006#[doc(hidden)]
10008pub fn merge_unique_lms_suffixes_32s(
10009 t: &mut [SaSint],
10010 sa: &mut [SaSint],
10011 n: SaSint,
10012 m: SaSint,
10013 l: FastSint,
10014 omp_block_start: FastSint,
10015 omp_block_size: FastSint,
10016) {
10017 if omp_block_size <= 0 {
10018 return;
10019 }
10020
10021 let n_usize = usize::try_from(n).expect("n must be non-negative");
10022 let m_usize = usize::try_from(m).expect("m must be non-negative");
10023 let mut src_index = n_usize - m_usize - 1 + usize::try_from(l).expect("l must be non-negative");
10024 let mut tmp = sa[src_index] as FastSint;
10025 src_index += 1;
10026
10027 let mut i = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
10028 let block_end =
10029 i + usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
10030 let j = block_end.saturating_sub(6);
10031 while i < j {
10032 let c0 = t[i];
10033 if c0 < 0 {
10034 t[i] = c0 & SAINT_MAX;
10035 sa[usize::try_from(tmp).expect("target slot must be non-negative")] = i as SaSint;
10036 i += 1;
10037 tmp = sa[src_index] as FastSint;
10038 src_index += 1;
10039 }
10040
10041 let c1 = t[i + 1];
10042 if c1 < 0 {
10043 t[i + 1] = c1 & SAINT_MAX;
10044 sa[usize::try_from(tmp).expect("target slot must be non-negative")] = i as SaSint + 1;
10045 i += 1;
10046 tmp = sa[src_index] as FastSint;
10047 src_index += 1;
10048 }
10049
10050 let c2 = t[i + 2];
10051 if c2 < 0 {
10052 t[i + 2] = c2 & SAINT_MAX;
10053 sa[usize::try_from(tmp).expect("target slot must be non-negative")] = i as SaSint + 2;
10054 i += 1;
10055 tmp = sa[src_index] as FastSint;
10056 src_index += 1;
10057 }
10058
10059 let c3 = t[i + 3];
10060 if c3 < 0 {
10061 t[i + 3] = c3 & SAINT_MAX;
10062 sa[usize::try_from(tmp).expect("target slot must be non-negative")] = i as SaSint + 3;
10063 i += 1;
10064 tmp = sa[src_index] as FastSint;
10065 src_index += 1;
10066 }
10067
10068 i += 4;
10069 }
10070
10071 while i < block_end {
10072 let c = t[i];
10073 if c < 0 {
10074 t[i] = c & SAINT_MAX;
10075 sa[usize::try_from(tmp).expect("target slot must be non-negative")] = i as SaSint;
10076 i += 1;
10077 tmp = sa[src_index] as FastSint;
10078 src_index += 1;
10079 }
10080 i += 1;
10081 }
10082}
10083
10084#[doc(hidden)]
10086pub fn merge_nonunique_lms_suffixes_32s(
10087 sa: &mut [SaSint],
10088 n: SaSint,
10089 m: SaSint,
10090 l: FastSint,
10091 omp_block_start: FastSint,
10092 omp_block_size: FastSint,
10093) {
10094 if omp_block_size <= 0 {
10095 return;
10096 }
10097
10098 let n_usize = usize::try_from(n).expect("n must be non-negative");
10099 let m_usize = usize::try_from(m).expect("m must be non-negative");
10100 let mut src_index = n_usize - m_usize - 1 + usize::try_from(l).expect("l must be non-negative");
10101 let mut tmp = sa[src_index];
10102 src_index += 1;
10103
10104 let mut i = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
10105 let block_end =
10106 i + usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
10107 let j = block_end.saturating_sub(3);
10108 while i < j {
10109 if sa[i] == 0 {
10110 sa[i] = tmp;
10111 tmp = sa[src_index];
10112 src_index += 1;
10113 }
10114 if sa[i + 1] == 0 {
10115 sa[i + 1] = tmp;
10116 tmp = sa[src_index];
10117 src_index += 1;
10118 }
10119 if sa[i + 2] == 0 {
10120 sa[i + 2] = tmp;
10121 tmp = sa[src_index];
10122 src_index += 1;
10123 }
10124 if sa[i + 3] == 0 {
10125 sa[i + 3] = tmp;
10126 tmp = sa[src_index];
10127 src_index += 1;
10128 }
10129 i += 4;
10130 }
10131
10132 while i < block_end {
10133 if sa[i] == 0 {
10134 sa[i] = tmp;
10135 tmp = sa[src_index];
10136 src_index += 1;
10137 }
10138 i += 1;
10139 }
10140}
10141
10142#[doc(hidden)]
10144pub fn merge_unique_lms_suffixes_32s_omp(
10145 t: &mut [SaSint],
10146 sa: &mut [SaSint],
10147 n: SaSint,
10148 m: SaSint,
10149 threads: SaSint,
10150 thread_state: &mut [ThreadState],
10151) {
10152 if threads == 1 || n < 65_536 {
10153 merge_unique_lms_suffixes_32s(t, sa, n, m, 0, 0, n as FastSint);
10154 return;
10155 }
10156
10157 let threads_usize = usize::try_from(threads)
10158 .expect("threads must be non-negative")
10159 .max(1);
10160 let n_usize = usize::try_from(n).expect("n must be non-negative");
10161 let omp_num_threads = threads_usize.min(n_usize.max(1));
10162 let omp_block_stride = (n_usize / omp_num_threads) & !15usize;
10163
10164 for omp_thread_num in 0..omp_num_threads {
10165 let omp_block_start = omp_thread_num * omp_block_stride;
10166 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
10167 omp_block_stride
10168 } else {
10169 n_usize - omp_block_start
10170 };
10171
10172 thread_state[omp_thread_num].count = count_negative_marked_suffixes(
10173 t,
10174 omp_block_start as FastSint,
10175 omp_block_size as FastSint,
10176 ) as FastSint;
10177 }
10178
10179 let mut count = 0 as FastSint;
10180 for omp_thread_num in 0..omp_num_threads {
10181 let omp_block_start = omp_thread_num * omp_block_stride;
10182 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
10183 omp_block_stride
10184 } else {
10185 n_usize - omp_block_start
10186 };
10187
10188 merge_unique_lms_suffixes_32s(
10189 t,
10190 sa,
10191 n,
10192 m,
10193 count,
10194 omp_block_start as FastSint,
10195 omp_block_size as FastSint,
10196 );
10197 count += thread_state[omp_thread_num].count;
10198 }
10199}
10200
10201#[doc(hidden)]
10203pub fn merge_nonunique_lms_suffixes_32s_omp(
10204 sa: &mut [SaSint],
10205 n: SaSint,
10206 m: SaSint,
10207 f: SaSint,
10208 threads: SaSint,
10209 thread_state: &mut [ThreadState],
10210) {
10211 if threads == 1 || m < 65_536 {
10212 merge_nonunique_lms_suffixes_32s(sa, n, m, f as FastSint, 0, m as FastSint);
10213 return;
10214 }
10215
10216 let threads_usize = usize::try_from(threads)
10217 .expect("threads must be non-negative")
10218 .max(1);
10219 let m_usize = usize::try_from(m).expect("m must be non-negative");
10220 let omp_num_threads = threads_usize.min(m_usize.max(1));
10221 let omp_block_stride = (m_usize / omp_num_threads) & !15usize;
10222
10223 for omp_thread_num in 0..omp_num_threads {
10224 let omp_block_start = omp_thread_num * omp_block_stride;
10225 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
10226 omp_block_stride
10227 } else {
10228 m_usize - omp_block_start
10229 };
10230
10231 thread_state[omp_thread_num].count =
10232 count_zero_marked_suffixes(sa, omp_block_start as FastSint, omp_block_size as FastSint)
10233 as FastSint;
10234 }
10235
10236 let mut count = f as FastSint;
10237 for omp_thread_num in 0..omp_num_threads {
10238 let omp_block_start = omp_thread_num * omp_block_stride;
10239 let omp_block_size = if omp_thread_num + 1 < omp_num_threads {
10240 omp_block_stride
10241 } else {
10242 m_usize - omp_block_start
10243 };
10244
10245 merge_nonunique_lms_suffixes_32s(
10246 sa,
10247 n,
10248 m,
10249 count,
10250 omp_block_start as FastSint,
10251 omp_block_size as FastSint,
10252 );
10253 count += thread_state[omp_thread_num].count;
10254 }
10255}
10256
10257#[doc(hidden)]
10259pub fn merge_compacted_lms_suffixes_32s_omp(
10260 t: &mut [SaSint],
10261 sa: &mut [SaSint],
10262 n: SaSint,
10263 m: SaSint,
10264 f: SaSint,
10265 threads: SaSint,
10266 thread_state: &mut [ThreadState],
10267) {
10268 merge_unique_lms_suffixes_32s_omp(t, sa, n, m, threads, thread_state);
10269 merge_nonunique_lms_suffixes_32s_omp(sa, n, m, f, threads, thread_state);
10270}
10271
10272#[doc(hidden)]
10274pub fn reconstruct_compacted_lms_suffixes_32s_2k_omp(
10275 t: &mut [SaSint],
10276 sa: &mut [SaSint],
10277 n: SaSint,
10278 k: SaSint,
10279 m: SaSint,
10280 fs: SaSint,
10281 f: SaSint,
10282 buckets: &mut [SaSint],
10283 local_buckets: SaSint,
10284 threads: SaSint,
10285 thread_state: &mut [ThreadState],
10286) {
10287 if f > 0 {
10288 let dst = usize::try_from(n - m - 1).expect("destination must be non-negative");
10289 let src = usize::try_from(n + fs - m).expect("source must be non-negative");
10290 let len = usize::try_from(f).expect("length must be non-negative");
10291 sa.copy_within(src..src + len, dst);
10292
10293 let _ = count_and_gather_compacted_lms_suffixes_32s_2k_omp(
10294 t,
10295 sa,
10296 n,
10297 k,
10298 buckets,
10299 local_buckets,
10300 threads,
10301 thread_state,
10302 );
10303 reconstruct_lms_suffixes_omp(sa, n, m - f, threads);
10304
10305 let src_copy = 0usize;
10306 let dst_copy = usize::try_from(n - m - 1 + f).expect("destination must be non-negative");
10307 let copy_len = usize::try_from(m - f).expect("copy length must be non-negative");
10308 sa.copy_within(src_copy..src_copy + copy_len, dst_copy);
10309 sa[..usize::try_from(m).expect("m must be non-negative")].fill(0);
10310
10311 merge_compacted_lms_suffixes_32s_omp(t, sa, n, m, f, threads, thread_state);
10312 } else {
10313 let _ = count_and_gather_lms_suffixes_32s_2k(t, sa, n, k, buckets, 0, n as FastSint);
10314 reconstruct_lms_suffixes_omp(sa, n, m, threads);
10315 }
10316}
10317
10318#[doc(hidden)]
10320pub fn reconstruct_compacted_lms_suffixes_32s_1k_omp(
10321 t: &mut [SaSint],
10322 sa: &mut [SaSint],
10323 n: SaSint,
10324 m: SaSint,
10325 fs: SaSint,
10326 f: SaSint,
10327 threads: SaSint,
10328 thread_state: &mut [ThreadState],
10329) {
10330 if f > 0 {
10331 let dst = usize::try_from(n - m - 1).expect("destination must be non-negative");
10332 let src = usize::try_from(n + fs - m).expect("source must be non-negative");
10333 let len = usize::try_from(f).expect("length must be non-negative");
10334 sa.copy_within(src..src + len, dst);
10335
10336 let _ = gather_compacted_lms_suffixes_32s(t, sa, n);
10337 reconstruct_lms_suffixes_omp(sa, n, m - f, threads);
10338
10339 let dst_copy = usize::try_from(n - m - 1 + f).expect("destination must be non-negative");
10340 let copy_len = usize::try_from(m - f).expect("copy length must be non-negative");
10341 sa.copy_within(0..copy_len, dst_copy);
10342 sa[..usize::try_from(m).expect("m must be non-negative")].fill(0);
10343
10344 merge_compacted_lms_suffixes_32s_omp(t, sa, n, m, f, threads, thread_state);
10345 } else {
10346 let _ = gather_lms_suffixes_32s(t, sa, n);
10347 reconstruct_lms_suffixes_omp(sa, n, m, threads);
10348 }
10349}
10350
10351fn normalize_omp_threads(threads: SaSint) -> SaSint {
10352 if threads > 0 {
10353 threads
10354 } else {
10355 std::thread::available_parallelism()
10356 .map(|value| value.get() as SaSint)
10357 .unwrap_or(1)
10358 .max(1)
10359 }
10360}
10361
10362fn libsais_main_32s_recursion(
10363 t_ptr: *mut SaSint,
10364 sa_ptr: *mut SaSint,
10365 sa_capacity: usize,
10366 n: SaSint,
10367 k: SaSint,
10368 fs: SaSint,
10369 threads: SaSint,
10370 thread_state: &mut [ThreadState],
10371 _local_buffer: &mut [SaSint],
10372) -> SaSint {
10373 let fs = fs.min(SAINT_MAX - n);
10374 let local_buffer_size = SaSint::try_from(LIBSAIS_LOCAL_BUFFER_SIZE).expect("fits");
10375 let n_usize = usize::try_from(n).expect("n must be non-negative");
10376 let fs_usize = usize::try_from(fs).expect("fs must be non-negative");
10377 let total_len = n_usize + fs_usize;
10378 assert!(total_len <= sa_capacity);
10379
10380 if k > 0 && ((fs / k) >= 6 || (local_buffer_size / k) >= 6) {
10381 let k_usize = usize::try_from(k).expect("k must be non-negative");
10382 let alignment = if fs >= 1024 && ((fs - 1024) / k) >= 6 {
10383 1024usize
10384 } else {
10385 16usize
10386 };
10387 let need = 6 * k_usize;
10388 let use_local_buffer = local_buffer_size > fs;
10389 let mut bucket_free_space = SaSint::from(use_local_buffer);
10390 let buckets_ptr = if use_local_buffer {
10391 _local_buffer.as_mut_ptr()
10392 } else {
10393 unsafe {
10394 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10395 let start =
10396 if fs_usize >= need + alignment && ((fs_usize - alignment) / k_usize) >= 6 {
10397 let byte_ptr = sa[total_len - need - alignment..].as_mut_ptr() as usize;
10398 let aligned = align_up(byte_ptr, alignment * mem::size_of::<SaSint>());
10399 (aligned - sa_ptr as usize) / mem::size_of::<SaSint>()
10400 } else {
10401 total_len - need
10402 };
10403 bucket_free_space =
10404 SaSint::try_from(start - n_usize).expect("bucket free space must fit SaSint");
10405 sa[start..].as_mut_ptr()
10406 }
10407 };
10408
10409 let m = unsafe {
10410 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10411 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10412 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10413 count_and_gather_lms_suffixes_32s_4k_omp(
10414 t,
10415 sa,
10416 n,
10417 k,
10418 buckets,
10419 bucket_free_space,
10420 threads,
10421 thread_state,
10422 )
10423 };
10424 if m > 1 {
10425 let m_usize = usize::try_from(m).expect("m must be non-negative");
10426 unsafe {
10427 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10428 sa[..n_usize - m_usize].fill(0);
10429 }
10430
10431 let first_lms_suffix = unsafe {
10432 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10433 sa[n_usize - m_usize]
10434 };
10435 let left_suffixes_count = unsafe {
10436 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10437 initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(
10438 std::slice::from_raw_parts_mut(t_ptr, n_usize),
10439 k,
10440 buckets,
10441 first_lms_suffix,
10442 )
10443 };
10444
10445 unsafe {
10446 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10447 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10448 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10449 let (_, induction_bucket) = buckets.split_at_mut(4 * k_usize);
10450 radix_sort_lms_suffixes_32s_6k_omp(
10451 t,
10452 sa,
10453 n,
10454 m,
10455 induction_bucket,
10456 threads,
10457 thread_state,
10458 );
10459 if (n / 8192) < k {
10460 radix_sort_set_markers_32s_6k_omp(sa, k, induction_bucket, threads);
10461 }
10462 if threads > 1 && n >= 65_536 {
10463 sa[n_usize - m_usize..n_usize].fill(0);
10464 }
10465 initialize_buckets_for_partial_sorting_32s_6k(
10466 t,
10467 k,
10468 buckets,
10469 first_lms_suffix,
10470 left_suffixes_count,
10471 );
10472 induce_partial_order_32s_6k_omp(
10473 t,
10474 sa,
10475 n,
10476 k,
10477 buckets,
10478 first_lms_suffix,
10479 left_suffixes_count,
10480 threads,
10481 thread_state,
10482 );
10483 }
10484
10485 let names = unsafe {
10486 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10487 if (n / 8192) < k {
10488 renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(
10489 sa,
10490 n,
10491 m,
10492 threads,
10493 thread_state,
10494 )
10495 } else {
10496 renumber_and_gather_lms_suffixes_omp(sa, n, m, fs, threads, thread_state)
10497 }
10498 };
10499
10500 if names < m {
10501 let f = if (n / 8192) < k {
10502 unsafe {
10503 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10504 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10505 compact_lms_suffixes_32s_omp(t, sa, n, m, fs, threads, thread_state)
10506 }
10507 } else {
10508 0
10509 };
10510
10511 let new_t_start =
10512 total_len - usize::try_from(m - f).expect("m - f must be non-negative");
10513 let recursive_n = m - f;
10514 let recursive_fs = fs + n - 2 * m + f;
10515 if libsais_main_32s_recursion(
10516 unsafe {
10517 std::slice::from_raw_parts_mut(sa_ptr, total_len)[new_t_start..]
10518 .as_mut_ptr()
10519 },
10520 sa_ptr,
10521 sa_capacity,
10522 recursive_n,
10523 names - f,
10524 recursive_fs,
10525 threads,
10526 thread_state,
10527 _local_buffer,
10528 ) != 0
10529 {
10530 return -2;
10531 }
10532
10533 unsafe {
10534 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10535 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10536 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10537 reconstruct_compacted_lms_suffixes_32s_2k_omp(
10538 t,
10539 sa,
10540 n,
10541 k,
10542 m,
10543 fs,
10544 f,
10545 buckets,
10546 SaSint::from(use_local_buffer),
10547 threads,
10548 thread_state,
10549 );
10550 }
10551 } else {
10552 unsafe {
10553 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10554 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10555 count_lms_suffixes_32s_2k(t, n, k, buckets);
10556 }
10557 }
10558
10559 unsafe {
10560 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10561 initialize_buckets_start_and_end_32s_4k(k, buckets);
10562 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10563 place_lms_suffixes_histogram_32s_4k(sa, n, k, m, buckets);
10564 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10565 induce_final_order_32s_4k(t, sa, n, k, buckets, threads, thread_state);
10566 }
10567 } else {
10568 unsafe {
10569 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10570 sa[0] = sa[n_usize - 1];
10571 }
10572
10573 unsafe {
10574 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10575 initialize_buckets_start_and_end_32s_6k(k, buckets);
10576 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10577 place_lms_suffixes_histogram_32s_6k(sa, n, k, m, buckets);
10578 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10579 induce_final_order_32s_6k(t, sa, n, k, buckets, threads, thread_state);
10580 }
10581 }
10582
10583 return 0;
10584 } else if k > 0 && n <= SAINT_MAX / 2 && ((fs / k) >= 4 || (local_buffer_size / k) >= 4) {
10585 let k_usize = usize::try_from(k).expect("k must be non-negative");
10586 let alignment = if fs >= 1024 && ((fs - 1024) / k) >= 4 {
10587 1024usize
10588 } else {
10589 16usize
10590 };
10591 let need = 4 * k_usize;
10592 let use_local_buffer = local_buffer_size > fs;
10593 let mut bucket_free_space = SaSint::from(use_local_buffer);
10594 let buckets_ptr = if use_local_buffer {
10595 _local_buffer.as_mut_ptr()
10596 } else {
10597 unsafe {
10598 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10599 let start =
10600 if fs_usize >= need + alignment && ((fs_usize - alignment) / k_usize) >= 4 {
10601 let byte_ptr = sa[total_len - need - alignment..].as_mut_ptr() as usize;
10602 let aligned = align_up(byte_ptr, alignment * mem::size_of::<SaSint>());
10603 (aligned - sa_ptr as usize) / mem::size_of::<SaSint>()
10604 } else {
10605 total_len - need
10606 };
10607 bucket_free_space =
10608 SaSint::try_from(start - n_usize).expect("bucket free space must fit SaSint");
10609 sa[start..].as_mut_ptr()
10610 }
10611 };
10612
10613 let m = unsafe {
10614 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10615 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10616 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10617 count_and_gather_lms_suffixes_32s_2k_omp(
10618 t,
10619 sa,
10620 n,
10621 k,
10622 buckets,
10623 bucket_free_space,
10624 threads,
10625 thread_state,
10626 )
10627 };
10628 if m > 1 {
10629 let m_usize = usize::try_from(m).expect("m must be non-negative");
10630 unsafe {
10631 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10632 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10633 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10634 initialize_buckets_for_radix_and_partial_sorting_32s_4k(
10635 t,
10636 k,
10637 buckets,
10638 sa[n_usize - m_usize],
10639 );
10640 let (_, induction_bucket) = buckets.split_at_mut(1);
10641 radix_sort_lms_suffixes_32s_2k_omp(
10642 t,
10643 sa,
10644 n,
10645 m,
10646 induction_bucket,
10647 threads,
10648 thread_state,
10649 );
10650 radix_sort_set_markers_32s_4k_omp(sa, k, induction_bucket, threads);
10651 place_lms_suffixes_interval_32s_4k(sa, n, k, m - 1, buckets);
10652 induce_partial_order_32s_4k_omp(t, sa, n, k, buckets, threads, thread_state);
10653 }
10654
10655 let names = unsafe {
10656 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10657 renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(sa, n, m, threads, thread_state)
10658 };
10659 if names < m {
10660 let f = unsafe {
10661 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10662 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10663 compact_lms_suffixes_32s_omp(t, sa, n, m, fs, threads, thread_state)
10664 };
10665
10666 let new_t_start =
10667 total_len - usize::try_from(m - f).expect("m - f must be non-negative");
10668 if libsais_main_32s_recursion(
10669 unsafe {
10670 std::slice::from_raw_parts_mut(sa_ptr, total_len)[new_t_start..]
10671 .as_mut_ptr()
10672 },
10673 sa_ptr,
10674 sa_capacity,
10675 m - f,
10676 names - f,
10677 fs + n - 2 * m + f,
10678 threads,
10679 thread_state,
10680 _local_buffer,
10681 ) != 0
10682 {
10683 return -2;
10684 }
10685
10686 unsafe {
10687 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10688 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10689 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10690 reconstruct_compacted_lms_suffixes_32s_2k_omp(
10691 t,
10692 sa,
10693 n,
10694 k,
10695 m,
10696 fs,
10697 f,
10698 buckets,
10699 SaSint::from(use_local_buffer),
10700 threads,
10701 thread_state,
10702 );
10703 }
10704 } else {
10705 unsafe {
10706 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10707 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10708 count_lms_suffixes_32s_2k(t, n, k, buckets);
10709 }
10710 }
10711 } else {
10712 unsafe {
10713 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10714 sa[0] = sa[n_usize - 1];
10715 }
10716 }
10717
10718 unsafe {
10719 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10720 initialize_buckets_start_and_end_32s_4k(k, buckets);
10721 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10722 place_lms_suffixes_histogram_32s_4k(sa, n, k, m, buckets);
10723 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10724 induce_final_order_32s_4k(t, sa, n, k, buckets, threads, thread_state);
10725 }
10726
10727 return 0;
10728 } else if k > 0 && ((fs / k) >= 2 || (local_buffer_size / k) >= 2) {
10729 let k_usize = usize::try_from(k).expect("k must be non-negative");
10730 let alignment = if fs >= 1024 && ((fs - 1024) / k) >= 2 {
10731 1024usize
10732 } else {
10733 16usize
10734 };
10735 let need = 2 * k_usize;
10736 let use_local_buffer = local_buffer_size > fs;
10737 let mut bucket_free_space = SaSint::from(use_local_buffer);
10738 let buckets_ptr = if use_local_buffer {
10739 _local_buffer.as_mut_ptr()
10740 } else {
10741 unsafe {
10742 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10743 let start =
10744 if fs_usize >= need + alignment && ((fs_usize - alignment) / k_usize) >= 2 {
10745 let byte_ptr = sa[total_len - need - alignment..].as_mut_ptr() as usize;
10746 let aligned = align_up(byte_ptr, alignment * mem::size_of::<SaSint>());
10747 (aligned - sa_ptr as usize) / mem::size_of::<SaSint>()
10748 } else {
10749 total_len - need
10750 };
10751 bucket_free_space =
10752 SaSint::try_from(start - n_usize).expect("bucket free space must fit SaSint");
10753 sa[start..].as_mut_ptr()
10754 }
10755 };
10756
10757 let m = unsafe {
10758 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10759 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10760 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10761 count_and_gather_lms_suffixes_32s_2k_omp(
10762 t,
10763 sa,
10764 n,
10765 k,
10766 buckets,
10767 bucket_free_space,
10768 threads,
10769 thread_state,
10770 )
10771 };
10772 if m > 1 {
10773 let m_usize = usize::try_from(m).expect("m must be non-negative");
10774 unsafe {
10775 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10776 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10777 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10778 initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(
10779 t,
10780 k,
10781 buckets,
10782 sa[n_usize - m_usize],
10783 );
10784 let (_, induction_bucket) = buckets.split_at_mut(1);
10785 radix_sort_lms_suffixes_32s_2k_omp(
10786 t,
10787 sa,
10788 n,
10789 m,
10790 induction_bucket,
10791 threads,
10792 thread_state,
10793 );
10794 place_lms_suffixes_interval_32s_2k(sa, n, k, m - 1, buckets);
10795 }
10796
10797 unsafe {
10798 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10799 initialize_buckets_start_and_end_32s_2k(k, buckets);
10800 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10801 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10802 induce_partial_order_32s_2k_omp(t, sa, n, k, buckets, threads, thread_state);
10803 }
10804
10805 let names = unsafe {
10806 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10807 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10808 renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(t, sa, n, m, threads)
10809 };
10810 if names < m {
10811 let f = unsafe {
10812 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10813 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10814 compact_lms_suffixes_32s_omp(t, sa, n, m, fs, threads, thread_state)
10815 };
10816
10817 let new_t_start =
10818 total_len - usize::try_from(m - f).expect("m - f must be non-negative");
10819 if libsais_main_32s_recursion(
10820 unsafe {
10821 std::slice::from_raw_parts_mut(sa_ptr, total_len)[new_t_start..]
10822 .as_mut_ptr()
10823 },
10824 sa_ptr,
10825 sa_capacity,
10826 m - f,
10827 names - f,
10828 fs + n - 2 * m + f,
10829 threads,
10830 thread_state,
10831 _local_buffer,
10832 ) != 0
10833 {
10834 return -2;
10835 }
10836
10837 unsafe {
10838 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10839 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10840 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10841 reconstruct_compacted_lms_suffixes_32s_2k_omp(
10842 t,
10843 sa,
10844 n,
10845 k,
10846 m,
10847 fs,
10848 f,
10849 buckets,
10850 SaSint::from(use_local_buffer),
10851 threads,
10852 thread_state,
10853 );
10854 }
10855 } else {
10856 unsafe {
10857 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10858 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10859 count_lms_suffixes_32s_2k(t, n, k, buckets);
10860 }
10861 }
10862 } else {
10863 unsafe {
10864 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10865 sa[0] = sa[n_usize - 1];
10866 }
10867 }
10868
10869 unsafe {
10870 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10871 initialize_buckets_end_32s_2k(k, buckets);
10872 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10873 place_lms_suffixes_histogram_32s_2k(sa, n, k, m, buckets);
10874 }
10875
10876 unsafe {
10877 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, need);
10878 initialize_buckets_start_and_end_32s_2k(k, buckets);
10879 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10880 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10881 induce_final_order_32s_2k(t, sa, n, k, buckets, threads, thread_state);
10882 }
10883
10884 return 0;
10885 } else {
10886 let k_usize = usize::try_from(k).expect("k must be non-negative");
10887 let mut heap_buckets = if fs < k { Some(vec![0; k_usize]) } else { None };
10888 let alignment = if fs >= 1024 && (fs - 1024) >= k {
10889 1024usize
10890 } else {
10891 16usize
10892 };
10893 let mut buckets_ptr = if let Some(ref mut heap) = heap_buckets {
10894 heap.as_mut_ptr()
10895 } else {
10896 unsafe {
10897 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10898 let start = if fs_usize >= k_usize + alignment {
10899 let byte_ptr = sa[total_len - k_usize - alignment..].as_mut_ptr() as usize;
10900 let aligned = align_up(byte_ptr, alignment * mem::size_of::<SaSint>());
10901 (aligned - sa_ptr as usize) / mem::size_of::<SaSint>()
10902 } else {
10903 total_len - k_usize
10904 };
10905 sa[start..].as_mut_ptr()
10906 }
10907 };
10908
10909 if buckets_ptr.is_null() {
10910 return -2;
10911 }
10912
10913 unsafe {
10914 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10915 sa[..n_usize].fill(0);
10916 }
10917
10918 unsafe {
10919 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10920 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
10921 count_suffixes_32s(t, n, k, buckets);
10922 }
10923 unsafe {
10924 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
10925 initialize_buckets_end_32s_1k(k, buckets);
10926 }
10927
10928 let m = unsafe {
10929 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10930 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10931 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
10932 radix_sort_lms_suffixes_32s_1k(t, sa, n, buckets)
10933 };
10934 if m > 1 {
10935 unsafe {
10936 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10937 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10938 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
10939 induce_partial_order_32s_1k_omp(t, sa, n, k, buckets, threads, thread_state);
10940 }
10941
10942 let names = unsafe {
10943 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10944 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10945 renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(t, sa, n, m, threads)
10946 };
10947 if names < m {
10948 if heap_buckets.is_some() {
10949 let _ = heap_buckets.take();
10950 buckets_ptr = std::ptr::null_mut();
10951 }
10952
10953 let f = unsafe {
10954 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10955 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10956 compact_lms_suffixes_32s_omp(t, sa, n, m, fs, threads, thread_state)
10957 };
10958
10959 let new_t_start =
10960 total_len - usize::try_from(m - f).expect("m - f must be non-negative");
10961 if libsais_main_32s_recursion(
10962 unsafe {
10963 std::slice::from_raw_parts_mut(sa_ptr, total_len)[new_t_start..]
10964 .as_mut_ptr()
10965 },
10966 sa_ptr,
10967 sa_capacity,
10968 m - f,
10969 names - f,
10970 fs + n - 2 * m + f,
10971 threads,
10972 thread_state,
10973 _local_buffer,
10974 ) != 0
10975 {
10976 return -2;
10977 }
10978
10979 unsafe {
10980 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
10981 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
10982 reconstruct_compacted_lms_suffixes_32s_1k_omp(
10983 t,
10984 sa,
10985 n,
10986 m,
10987 fs,
10988 f,
10989 threads,
10990 thread_state,
10991 );
10992 }
10993
10994 if buckets_ptr.is_null() {
10995 heap_buckets = Some(vec![0; k_usize]);
10996 buckets_ptr = heap_buckets
10997 .as_mut()
10998 .expect("heap buckets must exist")
10999 .as_mut_ptr();
11000 if buckets_ptr.is_null() {
11001 return -2;
11002 }
11003 }
11004 }
11005
11006 unsafe {
11007 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
11008 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
11009 count_suffixes_32s(t, n, k, buckets);
11010 }
11011 unsafe {
11012 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
11013 initialize_buckets_end_32s_1k(k, buckets);
11014 }
11015 unsafe {
11016 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
11017 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
11018 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
11019 place_lms_suffixes_interval_32s_1k(t, sa, k, m, buckets);
11020 }
11021 }
11022
11023 unsafe {
11024 let t = std::slice::from_raw_parts_mut(t_ptr, n_usize);
11025 let sa = std::slice::from_raw_parts_mut(sa_ptr, total_len);
11026 let buckets = std::slice::from_raw_parts_mut(buckets_ptr, k_usize);
11027 induce_final_order_32s_1k(t, sa, n, k, buckets, threads, thread_state);
11028 }
11029
11030 0
11031 }
11032}
11033
11034fn libsais_main_32s_entry(
11035 t: &mut [SaSint],
11036 sa: &mut [SaSint],
11037 n: SaSint,
11038 k: SaSint,
11039 fs: SaSint,
11040 threads: SaSint,
11041 thread_state: &mut [ThreadState],
11042) -> SaSint {
11043 let mut local_buffer = [0; 2 * LIBSAIS_LOCAL_BUFFER_SIZE];
11044 libsais_main_32s_recursion(
11045 t.as_mut_ptr(),
11046 sa.as_mut_ptr(),
11047 sa.len(),
11048 n,
11049 k,
11050 fs,
11051 threads,
11052 thread_state,
11053 &mut local_buffer[LIBSAIS_LOCAL_BUFFER_SIZE..],
11054 )
11055}
11056
11057fn libsais_main_8u(
11058 t: &[u8],
11059 sa: &mut [SaSint],
11060 buckets: &mut [SaSint],
11061 flags: SaSint,
11062 r: SaSint,
11063 i: Option<&mut [SaSint]>,
11064 fs: SaSint,
11065 freq: Option<&mut [SaSint]>,
11066 threads: SaSint,
11067 thread_state: &mut [ThreadState],
11068) -> SaSint {
11069 let n = SaSint::try_from(t.len()).expect("input length must fit SaSint");
11070 let n_usize = usize::try_from(n).expect("n must be non-negative");
11071 let fs = fs.min(SAINT_MAX - n);
11072
11073 let m = count_and_gather_lms_suffixes_8u_omp(t, sa, n, buckets, threads, thread_state);
11074 let k = initialize_buckets_start_and_end_8u(buckets, freq);
11075
11076 if (flags & LIBSAIS_FLAGS_GSA) != 0 && (buckets[0] != 0 || buckets[2] != 0 || buckets[3] != 1) {
11077 return -1;
11078 }
11079
11080 if m > 0 {
11081 let m_usize = usize::try_from(m).expect("m must be non-negative");
11082 let first_lms_suffix = sa[n_usize - m_usize];
11083 let left_suffixes_count =
11084 initialize_buckets_for_lms_suffixes_radix_sort_8u(t, buckets, first_lms_suffix);
11085
11086 if threads > 1 && n >= 65_536 {
11087 sa[..n_usize - m_usize].fill(0);
11088 }
11089 radix_sort_lms_suffixes_8u_omp(t, sa, n, m, flags, buckets, threads, thread_state);
11090 if threads > 1 && n >= 65_536 {
11091 sa[n_usize - m_usize..n_usize].fill(0);
11092 }
11093
11094 initialize_buckets_for_partial_sorting_8u(
11095 t,
11096 buckets,
11097 first_lms_suffix,
11098 left_suffixes_count,
11099 );
11100 induce_partial_order_8u_omp(
11101 t,
11102 sa,
11103 n,
11104 k,
11105 flags,
11106 buckets,
11107 first_lms_suffix,
11108 left_suffixes_count,
11109 threads,
11110 thread_state,
11111 );
11112
11113 let names = renumber_and_gather_lms_suffixes_omp(sa, n, m, fs, threads, thread_state);
11114 if names < m {
11115 let recursive_text_start =
11116 n_usize + usize::try_from(fs).expect("fs must be non-negative") - m_usize;
11117 let recursive_fs = fs + n - 2 * m;
11118
11119 let index = libsais_main_32s_entry(
11120 unsafe {
11121 std::slice::from_raw_parts_mut(sa[recursive_text_start..].as_mut_ptr(), m_usize)
11122 },
11123 sa,
11124 m,
11125 names,
11126 recursive_fs,
11127 threads,
11128 thread_state,
11129 );
11130
11131 if index != 0 {
11132 return -2;
11133 }
11134
11135 gather_lms_suffixes_8u_omp(t, sa, n, threads, thread_state);
11136 reconstruct_lms_suffixes_omp(sa, n, m, threads);
11137 }
11138
11139 place_lms_suffixes_interval_8u(sa, n, m, flags, buckets);
11140 } else {
11141 sa[..n_usize].fill(0);
11142 }
11143
11144 induce_final_order_8u_omp(t, sa, n, k, flags, r, i, buckets, threads, thread_state)
11145}
11146
11147fn libsais_main(
11148 t: &[u8],
11149 sa: &mut [SaSint],
11150 flags: SaSint,
11151 r: SaSint,
11152 i: Option<&mut [SaSint]>,
11153 fs: SaSint,
11154 freq: Option<&mut [SaSint]>,
11155 threads: SaSint,
11156) -> SaSint {
11157 let threads = normalize_omp_threads(threads);
11158 if threads > 1 {
11159 let mut thread_state = match alloc_thread_state(threads) {
11160 Some(thread_state) => thread_state,
11161 None => return -2,
11162 };
11163 let mut buckets = vec![0; 8 * ALPHABET_SIZE];
11164
11165 libsais_main_8u(
11166 t,
11167 sa,
11168 &mut buckets,
11169 flags,
11170 r,
11171 i,
11172 fs,
11173 freq,
11174 threads,
11175 &mut thread_state,
11176 )
11177 } else {
11178 let mut thread_state = [];
11179 let mut buckets = [0; 8 * ALPHABET_SIZE];
11180
11181 libsais_main_8u(
11182 t,
11183 sa,
11184 &mut buckets,
11185 flags,
11186 r,
11187 i,
11188 fs,
11189 freq,
11190 threads,
11191 &mut thread_state,
11192 )
11193 }
11194}
11195
11196fn libsais_main_int(
11197 t: &mut [SaSint],
11198 sa: &mut [SaSint],
11199 k: SaSint,
11200 fs: SaSint,
11201 threads: SaSint,
11202) -> SaSint {
11203 let threads = normalize_omp_threads(threads);
11204 let mut thread_state = if threads > 1 {
11205 match alloc_thread_state(threads) {
11206 Some(thread_state) => thread_state,
11207 None => return -2,
11208 }
11209 } else {
11210 Vec::new()
11211 };
11212
11213 libsais_main_32s_entry(
11214 t,
11215 sa,
11216 SaSint::try_from(t.len()).expect("input length must fit SaSint"),
11217 k,
11218 fs,
11219 threads,
11220 &mut thread_state,
11221 )
11222}
11223
11224#[allow(dead_code)]
11225fn main_32s_recursion(
11226 t_ptr: *mut SaSint,
11227 sa_ptr: *mut SaSint,
11228 sa_capacity: usize,
11229 n: SaSint,
11230 k: SaSint,
11231 fs: SaSint,
11232 threads: SaSint,
11233 thread_state: &mut [ThreadState],
11234 local_buffer: &mut [SaSint],
11235) -> SaSint {
11236 libsais_main_32s_recursion(
11237 t_ptr,
11238 sa_ptr,
11239 sa_capacity,
11240 n,
11241 k,
11242 fs,
11243 threads,
11244 thread_state,
11245 local_buffer,
11246 )
11247}
11248
11249#[allow(dead_code)]
11250fn main_32s_entry(
11251 t: &mut [SaSint],
11252 sa: &mut [SaSint],
11253 n: SaSint,
11254 k: SaSint,
11255 fs: SaSint,
11256 threads: SaSint,
11257 thread_state: &mut [ThreadState],
11258) -> SaSint {
11259 libsais_main_32s_entry(t, sa, n, k, fs, threads, thread_state)
11260}
11261
11262#[allow(dead_code)]
11263fn main_8u(
11264 t: &[u8],
11265 sa: &mut [SaSint],
11266 buckets: &mut [SaSint],
11267 flags: SaSint,
11268 r: SaSint,
11269 i: Option<&mut [SaSint]>,
11270 fs: SaSint,
11271 freq: Option<&mut [SaSint]>,
11272 threads: SaSint,
11273 thread_state: &mut [ThreadState],
11274) -> SaSint {
11275 libsais_main_8u(t, sa, buckets, flags, r, i, fs, freq, threads, thread_state)
11276}
11277
11278#[allow(dead_code)]
11279fn main_int(t: &mut [SaSint], sa: &mut [SaSint], k: SaSint, fs: SaSint, threads: SaSint) -> SaSint {
11280 libsais_main_int(t, sa, k, fs, threads)
11281}
11282
11283fn libsais_main_ctx(
11284 ctx: &mut Context,
11285 t: &[u8],
11286 sa: &mut [SaSint],
11287 flags: SaSint,
11288 r: SaSint,
11289 i: Option<&mut [SaSint]>,
11290 fs: SaSint,
11291 freq: Option<&mut [SaSint]>,
11292) -> SaSint {
11293 if ctx.threads <= 0 || ctx.buckets.len() != 8 * ALPHABET_SIZE {
11294 return -2;
11295 }
11296
11297 let mut empty_thread_state = [];
11298 let thread_state = if ctx.threads > 1 {
11299 match ctx.thread_state.as_deref_mut() {
11300 Some(thread_state) if thread_state.len() >= ctx.threads as usize => thread_state,
11301 None => return -2,
11302 Some(_) => return -2,
11303 }
11304 } else {
11305 &mut empty_thread_state
11306 };
11307
11308 libsais_main_8u(
11309 t,
11310 sa,
11311 &mut ctx.buckets,
11312 flags,
11313 r,
11314 i,
11315 fs,
11316 freq,
11317 ctx.threads as SaSint,
11318 thread_state,
11319 )
11320}
11321
11322#[cfg(feature = "upstream-c")]
11323unsafe extern "C" {
11324 fn probe_public_libsais_freq(
11325 t: *const u8,
11326 sa: *mut SaSint,
11327 n: SaSint,
11328 fs: SaSint,
11329 freq: *mut SaSint,
11330 ) -> SaSint;
11331
11332 fn probe_public_libsais_omp_freq(
11333 t: *const u8,
11334 sa: *mut SaSint,
11335 n: SaSint,
11336 fs: SaSint,
11337 freq: *mut SaSint,
11338 threads: SaSint,
11339 ) -> SaSint;
11340}
11341
11342#[cfg(feature = "upstream-c")]
11353pub fn libsais_upstream_c(
11354 t: &[u8],
11355 sa: &mut [SaSint],
11356 fs: SaSint,
11357 freq: Option<&mut [SaSint]>,
11358) -> SaSint {
11359 if fs < 0
11360 || t.len() > SaSint::MAX as usize
11361 || sa.len()
11362 < t.len()
11363 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
11364 {
11365 return -1;
11366 }
11367 if let Some(freq) = freq.as_ref() {
11368 if freq.len() < ALPHABET_SIZE {
11369 return -1;
11370 }
11371 }
11372
11373 let n = t.len() as SaSint;
11374 let freq_ptr = freq.map_or(std::ptr::null_mut(), |freq| freq.as_mut_ptr());
11375 unsafe { probe_public_libsais_freq(t.as_ptr(), sa.as_mut_ptr(), n, fs, freq_ptr) }
11376}
11377
11378#[cfg(feature = "upstream-c")]
11390pub fn libsais_upstream_c_omp(
11391 t: &[u8],
11392 sa: &mut [SaSint],
11393 fs: SaSint,
11394 freq: Option<&mut [SaSint]>,
11395 threads: SaSint,
11396) -> SaSint {
11397 if threads < 0 {
11398 return -1;
11399 }
11400 if fs < 0
11401 || t.len() > SaSint::MAX as usize
11402 || sa.len()
11403 < t.len()
11404 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
11405 {
11406 return -1;
11407 }
11408 if let Some(freq) = freq.as_ref() {
11409 if freq.len() < ALPHABET_SIZE {
11410 return -1;
11411 }
11412 }
11413
11414 let n = t.len() as SaSint;
11415 let freq_ptr = freq.map_or(std::ptr::null_mut(), |freq| freq.as_mut_ptr());
11416 unsafe {
11417 probe_public_libsais_omp_freq(t.as_ptr(), sa.as_mut_ptr(), n, fs, freq_ptr, threads.max(1))
11418 }
11419}
11420
11421pub fn libsais(t: &[u8], sa: &mut [SaSint], fs: SaSint, freq: Option<&mut [SaSint]>) -> SaSint {
11430 if fs < 0
11431 || sa.len()
11432 < t.len()
11433 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
11434 {
11435 return -1;
11436 }
11437 if let Some(freq) = freq.as_ref() {
11438 if freq.len() < ALPHABET_SIZE {
11439 return -1;
11440 }
11441 }
11442
11443 let n = t.len();
11444 if n <= 1 {
11445 if let Some(freq) = freq {
11446 freq[..ALPHABET_SIZE].fill(0);
11447 if n == 1 {
11448 freq[t[0] as usize] += 1;
11449 }
11450 }
11451 if n == 1 {
11452 sa[0] = 0;
11453 }
11454 return 0;
11455 }
11456
11457 libsais_main(t, sa, LIBSAIS_FLAGS_NONE, 0, None, fs, freq, 1)
11458}
11459
11460pub fn libsais_gsa(t: &[u8], sa: &mut [SaSint], fs: SaSint, freq: Option<&mut [SaSint]>) -> SaSint {
11469 if fs < 0
11470 || sa.len()
11471 < t.len()
11472 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
11473 {
11474 return -1;
11475 }
11476 if let Some(freq) = freq.as_ref() {
11477 if freq.len() < ALPHABET_SIZE {
11478 return -1;
11479 }
11480 }
11481
11482 let n = t.len();
11483 if n > 0 && t[n - 1] != 0 {
11484 return -1;
11485 }
11486
11487 if n <= 1 {
11488 if let Some(freq) = freq {
11489 freq[..ALPHABET_SIZE].fill(0);
11490 if n == 1 {
11491 freq[t[0] as usize] += 1;
11492 }
11493 }
11494 if n == 1 {
11495 sa[0] = 0;
11496 }
11497 return 0;
11498 }
11499
11500 libsais_main(t, sa, LIBSAIS_FLAGS_GSA, 0, None, fs, freq, 1)
11501}
11502
11503pub fn libsais_int(t: &mut [SaSint], sa: &mut [SaSint], k: SaSint, fs: SaSint) -> SaSint {
11514 if fs < 0
11515 || sa.len()
11516 < t.len()
11517 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
11518 {
11519 return -1;
11520 }
11521
11522 if t.len() <= 1 {
11523 if t.len() == 1 {
11524 sa[0] = 0;
11525 }
11526 return 0;
11527 }
11528
11529 libsais_main_int(t, sa, k, fs, 1)
11530}
11531
11532pub fn libsais_ctx(
11542 ctx: &mut Context,
11543 t: &[u8],
11544 sa: &mut [SaSint],
11545 fs: SaSint,
11546 freq: Option<&mut [SaSint]>,
11547) -> SaSint {
11548 if fs < 0
11549 || sa.len()
11550 < t.len()
11551 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
11552 {
11553 return -1;
11554 }
11555 if let Some(freq) = freq.as_ref() {
11556 if freq.len() < ALPHABET_SIZE {
11557 return -1;
11558 }
11559 }
11560
11561 let n = t.len();
11562 if n <= 1 {
11563 if let Some(freq) = freq {
11564 freq[..ALPHABET_SIZE].fill(0);
11565 if n == 1 {
11566 freq[t[0] as usize] += 1;
11567 }
11568 }
11569 if n == 1 {
11570 sa[0] = 0;
11571 }
11572 return 0;
11573 }
11574
11575 libsais_main_ctx(ctx, t, sa, LIBSAIS_FLAGS_NONE, 0, None, fs, freq)
11576}
11577
11578pub fn libsais_gsa_ctx(
11588 ctx: &mut Context,
11589 t: &[u8],
11590 sa: &mut [SaSint],
11591 fs: SaSint,
11592 freq: Option<&mut [SaSint]>,
11593) -> SaSint {
11594 if fs < 0
11595 || sa.len()
11596 < t.len()
11597 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
11598 {
11599 return -1;
11600 }
11601 if let Some(freq) = freq.as_ref() {
11602 if freq.len() < ALPHABET_SIZE {
11603 return -1;
11604 }
11605 }
11606
11607 let n = t.len();
11608 if n > 0 && t[n - 1] != 0 {
11609 return -1;
11610 }
11611
11612 if n <= 1 {
11613 if let Some(freq) = freq {
11614 freq[..ALPHABET_SIZE].fill(0);
11615 if n == 1 {
11616 freq[t[0] as usize] += 1;
11617 }
11618 }
11619 if n == 1 {
11620 sa[0] = 0;
11621 }
11622 return 0;
11623 }
11624
11625 libsais_main_ctx(ctx, t, sa, LIBSAIS_FLAGS_GSA, 0, None, fs, freq)
11626}
11627
11628pub fn libsais_bwt(
11638 t: &[u8],
11639 u: &mut [u8],
11640 a: &mut [SaSint],
11641 fs: SaSint,
11642 freq: Option<&mut [SaSint]>,
11643) -> SaSint {
11644 if fs < 0
11645 || u.len() < t.len()
11646 || a.len()
11647 < t.len()
11648 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
11649 {
11650 return -1;
11651 }
11652 if let Some(freq) = freq.as_ref() {
11653 if freq.len() < ALPHABET_SIZE {
11654 return -1;
11655 }
11656 }
11657
11658 let n = t.len();
11659 if n <= 1 {
11660 if let Some(freq) = freq {
11661 freq[..ALPHABET_SIZE].fill(0);
11662 if n == 1 {
11663 u[0] = t[0];
11664 freq[t[0] as usize] += 1;
11665 }
11666 } else if n == 1 {
11667 u[0] = t[0];
11668 }
11669 return n as SaSint;
11670 }
11671
11672 let mut index = libsais_main(t, a, LIBSAIS_FLAGS_BWT, 0, None, fs, freq, 1);
11673 if index >= 0 {
11674 index += 1;
11675 let split = usize::try_from(index).expect("index must be non-negative");
11676 u[0] = t[n - 1];
11677 bwt_copy_8u_omp(&mut u[1..split], &a[..split - 1], index - 1, 1);
11678 bwt_copy_8u_omp(
11679 &mut u[split..n],
11680 &a[split..n],
11681 SaSint::try_from(n - split).expect("fits"),
11682 1,
11683 );
11684 }
11685 index
11686}
11687
11688pub fn libsais_bwt_aux(
11700 t: &[u8],
11701 u: &mut [u8],
11702 a: &mut [SaSint],
11703 fs: SaSint,
11704 freq: Option<&mut [SaSint]>,
11705 r: SaSint,
11706 i: &mut [SaSint],
11707) -> SaSint {
11708 let n = t.len();
11709 if fs < 0
11710 || r < 2
11711 || (r & (r - 1)) != 0
11712 || u.len() < n
11713 || a.len() < n.saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
11714 || freq.as_ref().is_some_and(|freq| freq.len() < ALPHABET_SIZE)
11715 {
11716 return -1;
11717 }
11718 let sample_count = if n == 0 {
11719 1
11720 } else {
11721 usize::try_from((SaSint::try_from(n).expect("input length must fit SaSint") - 1) / r)
11722 .expect("sample count must be non-negative")
11723 + 1
11724 };
11725 if i.len() < sample_count {
11726 return -1;
11727 }
11728
11729 if n <= 1 {
11730 if let Some(freq) = freq {
11731 freq[..ALPHABET_SIZE].fill(0);
11732 if n == 1 {
11733 u[0] = t[0];
11734 freq[t[0] as usize] += 1;
11735 }
11736 } else if n == 1 {
11737 u[0] = t[0];
11738 }
11739 i[0] = n as SaSint;
11740 return 0;
11741 }
11742
11743 let index = libsais_main(t, a, LIBSAIS_FLAGS_BWT, r, Some(i), fs, freq, 1);
11744 if index == 0 {
11745 let split = usize::try_from(i[0]).expect("primary index must be non-negative");
11746 u[0] = t[n - 1];
11747 bwt_copy_8u_omp(&mut u[1..split], &a[..split - 1], i[0] - 1, 1);
11748 bwt_copy_8u_omp(
11749 &mut u[split..n],
11750 &a[split..n],
11751 SaSint::try_from(n - split).expect("fits"),
11752 1,
11753 );
11754 }
11755 index
11756}
11757
11758pub fn libsais_bwt_ctx(
11769 ctx: &mut Context,
11770 t: &[u8],
11771 u: &mut [u8],
11772 a: &mut [SaSint],
11773 fs: SaSint,
11774 freq: Option<&mut [SaSint]>,
11775) -> SaSint {
11776 if fs < 0
11777 || u.len() < t.len()
11778 || a.len()
11779 < t.len()
11780 .saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
11781 {
11782 return -1;
11783 }
11784 if let Some(freq) = freq.as_ref() {
11785 if freq.len() < ALPHABET_SIZE {
11786 return -1;
11787 }
11788 }
11789
11790 let n = t.len();
11791 if n <= 1 {
11792 if let Some(freq) = freq {
11793 freq[..ALPHABET_SIZE].fill(0);
11794 if n == 1 {
11795 u[0] = t[0];
11796 freq[t[0] as usize] += 1;
11797 }
11798 } else if n == 1 {
11799 u[0] = t[0];
11800 }
11801 return n as SaSint;
11802 }
11803
11804 let mut index = libsais_main_ctx(ctx, t, a, LIBSAIS_FLAGS_BWT, 0, None, fs, freq);
11805 if index >= 0 {
11806 index += 1;
11807 let split = usize::try_from(index).expect("index must be non-negative");
11808 u[0] = t[n - 1];
11809 bwt_copy_8u_omp(
11810 &mut u[1..split],
11811 &a[..split - 1],
11812 index - 1,
11813 ctx.threads as SaSint,
11814 );
11815 bwt_copy_8u_omp(
11816 &mut u[split..n],
11817 &a[split..n],
11818 SaSint::try_from(n - split).expect("fits"),
11819 ctx.threads as SaSint,
11820 );
11821 }
11822 index
11823}
11824
11825pub fn libsais_bwt_aux_ctx(
11838 ctx: &mut Context,
11839 t: &[u8],
11840 u: &mut [u8],
11841 a: &mut [SaSint],
11842 fs: SaSint,
11843 freq: Option<&mut [SaSint]>,
11844 r: SaSint,
11845 i: &mut [SaSint],
11846) -> SaSint {
11847 let n = t.len();
11848 if fs < 0
11849 || r < 2
11850 || (r & (r - 1)) != 0
11851 || u.len() < n
11852 || a.len() < n.saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
11853 {
11854 return -1;
11855 }
11856 if let Some(freq) = freq.as_ref() {
11857 if freq.len() < ALPHABET_SIZE {
11858 return -1;
11859 }
11860 }
11861 let sample_count = if n == 0 {
11862 1
11863 } else {
11864 usize::try_from((SaSint::try_from(n).expect("input length must fit SaSint") - 1) / r)
11865 .expect("sample count must be non-negative")
11866 + 1
11867 };
11868 if i.len() < sample_count {
11869 return -1;
11870 }
11871
11872 if n <= 1 {
11873 if let Some(freq) = freq {
11874 freq[..ALPHABET_SIZE].fill(0);
11875 if n == 1 {
11876 u[0] = t[0];
11877 freq[t[0] as usize] += 1;
11878 }
11879 } else if n == 1 {
11880 u[0] = t[0];
11881 }
11882 i[0] = n as SaSint;
11883 return 0;
11884 }
11885
11886 let index = libsais_main_ctx(ctx, t, a, LIBSAIS_FLAGS_BWT, r, Some(i), fs, freq);
11887 if index == 0 {
11888 let split = usize::try_from(i[0]).expect("primary index must be non-negative");
11889 u[0] = t[n - 1];
11890 bwt_copy_8u_omp(
11891 &mut u[1..split],
11892 &a[..split - 1],
11893 i[0] - 1,
11894 ctx.threads as SaSint,
11895 );
11896 bwt_copy_8u_omp(
11897 &mut u[split..n],
11898 &a[split..n],
11899 SaSint::try_from(n - split).expect("fits"),
11900 ctx.threads as SaSint,
11901 );
11902 }
11903 index
11904}
11905
11906pub fn create_ctx_omp(threads: SaSint) -> Option<Context> {
11914 if threads < 0 {
11915 return None;
11916 }
11917
11918 create_ctx_main(normalize_omp_threads(threads))
11919}
11920
11921pub fn libsais_omp(
11931 t: &[u8],
11932 sa: &mut [SaSint],
11933 fs: SaSint,
11934 freq: Option<&mut [SaSint]>,
11935 threads: SaSint,
11936) -> SaSint {
11937 if threads < 0 {
11938 return -1;
11939 }
11940 if let Some(freq) = freq.as_ref() {
11941 if freq.len() < ALPHABET_SIZE {
11942 return -1;
11943 }
11944 }
11945 let n = t.len();
11946 if n <= 1 {
11947 if let Some(freq) = freq {
11948 freq[..ALPHABET_SIZE].fill(0);
11949 if n == 1 {
11950 sa[0] = 0;
11951 freq[t[0] as usize] += 1;
11952 }
11953 } else if n == 1 {
11954 sa[0] = 0;
11955 }
11956 return 0;
11957 }
11958
11959 libsais_main(
11960 t,
11961 sa,
11962 LIBSAIS_FLAGS_NONE,
11963 0,
11964 None,
11965 fs,
11966 freq,
11967 normalize_omp_threads(threads),
11968 )
11969}
11970
11971pub fn libsais_gsa_omp(
11981 t: &[u8],
11982 sa: &mut [SaSint],
11983 fs: SaSint,
11984 freq: Option<&mut [SaSint]>,
11985 threads: SaSint,
11986) -> SaSint {
11987 if threads < 0 || t.last().copied().unwrap_or(0) != 0 {
11988 return -1;
11989 }
11990 if let Some(freq) = freq.as_ref() {
11991 if freq.len() < ALPHABET_SIZE {
11992 return -1;
11993 }
11994 }
11995 let n = t.len();
11996 if n <= 1 {
11997 if let Some(freq) = freq {
11998 freq[..ALPHABET_SIZE].fill(0);
11999 if n == 1 {
12000 sa[0] = 0;
12001 freq[t[0] as usize] += 1;
12002 }
12003 } else if n == 1 {
12004 sa[0] = 0;
12005 }
12006 return 0;
12007 }
12008
12009 libsais_main(
12010 t,
12011 sa,
12012 LIBSAIS_FLAGS_GSA,
12013 0,
12014 None,
12015 fs,
12016 freq,
12017 normalize_omp_threads(threads),
12018 )
12019}
12020
12021pub fn libsais_int_omp(
12033 t: &mut [SaSint],
12034 sa: &mut [SaSint],
12035 k: SaSint,
12036 fs: SaSint,
12037 threads: SaSint,
12038) -> SaSint {
12039 if threads < 0 {
12040 return -1;
12041 }
12042 if t.len() <= 1 {
12043 if t.len() == 1 {
12044 sa[0] = 0;
12045 }
12046 return 0;
12047 }
12048
12049 libsais_main_int(t, sa, k, fs, normalize_omp_threads(threads))
12050}
12051
12052pub fn libsais_bwt_omp(
12063 t: &[u8],
12064 u: &mut [u8],
12065 a: &mut [SaSint],
12066 fs: SaSint,
12067 freq: Option<&mut [SaSint]>,
12068 threads: SaSint,
12069) -> SaSint {
12070 let n = t.len();
12071 if threads < 0
12072 || fs < 0
12073 || u.len() < n
12074 || a.len() < n.saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
12075 || freq.as_ref().is_some_and(|freq| freq.len() < ALPHABET_SIZE)
12076 {
12077 return -1;
12078 }
12079
12080 if n <= 1 {
12081 if let Some(freq) = freq {
12082 freq[..ALPHABET_SIZE].fill(0);
12083 if n == 1 {
12084 u[0] = t[0];
12085 freq[t[0] as usize] += 1;
12086 }
12087 } else if n == 1 {
12088 u[0] = t[0];
12089 }
12090 return n as SaSint;
12091 }
12092
12093 let threads = if threads > 0 { threads } else { 1 };
12094 let mut index = libsais_main(t, a, LIBSAIS_FLAGS_BWT, 0, None, fs, freq, threads);
12095 if index >= 0 {
12096 index += 1;
12097 let index_usize = usize::try_from(index).expect("index must be non-negative");
12098 u[0] = t[n - 1];
12099 bwt_copy_8u_omp(
12100 &mut u[1..index_usize],
12101 &a[..index_usize - 1],
12102 index - 1,
12103 threads,
12104 );
12105 bwt_copy_8u_omp(
12106 &mut u[index_usize..n],
12107 &a[index_usize..n],
12108 SaSint::try_from(n - index_usize).expect("fits"),
12109 threads,
12110 );
12111 }
12112 index
12113}
12114
12115pub fn libsais_bwt_aux_omp(
12128 t: &[u8],
12129 u: &mut [u8],
12130 a: &mut [SaSint],
12131 fs: SaSint,
12132 freq: Option<&mut [SaSint]>,
12133 r: SaSint,
12134 i: &mut [SaSint],
12135 threads: SaSint,
12136) -> SaSint {
12137 let n = t.len();
12138 if threads < 0
12139 || fs < 0
12140 || r < 2
12141 || (r & (r - 1)) != 0
12142 || u.len() < n
12143 || a.len() < n.saturating_add(usize::try_from(fs).unwrap_or(usize::MAX))
12144 {
12145 return -1;
12146 }
12147 if let Some(freq) = freq.as_ref() {
12148 if freq.len() < ALPHABET_SIZE {
12149 return -1;
12150 }
12151 }
12152 let sample_count = if n == 0 {
12153 1
12154 } else {
12155 usize::try_from((SaSint::try_from(n).expect("input length must fit SaSint") - 1) / r)
12156 .expect("sample count must be non-negative")
12157 + 1
12158 };
12159 if i.len() < sample_count {
12160 return -1;
12161 }
12162 if n <= 1 {
12163 if let Some(freq) = freq {
12164 freq[..ALPHABET_SIZE].fill(0);
12165 if n == 1 {
12166 u[0] = t[0];
12167 freq[t[0] as usize] += 1;
12168 }
12169 } else if n == 1 {
12170 u[0] = t[0];
12171 }
12172 i[0] = n as SaSint;
12173 return 0;
12174 }
12175
12176 let threads = normalize_omp_threads(threads);
12177 let index = libsais_main(t, a, LIBSAIS_FLAGS_BWT, r, Some(i), fs, freq, threads);
12178 if index == 0 {
12179 let split = usize::try_from(i[0]).expect("primary index must be non-negative");
12180 u[0] = t[n - 1];
12181 bwt_copy_8u_omp(&mut u[1..split], &a[..split - 1], i[0] - 1, threads);
12182 bwt_copy_8u_omp(
12183 &mut u[split..n],
12184 &a[split..n],
12185 SaSint::try_from(n - split).expect("fits"),
12186 threads,
12187 );
12188 }
12189 index
12190}
12191
12192#[doc(hidden)]
12194pub fn compute_phi(
12195 sa: &[SaSint],
12196 plcp: &mut [SaSint],
12197 n: SaSint,
12198 omp_block_start: FastSint,
12199 omp_block_size: FastSint,
12200) {
12201 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
12202 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
12203 let end = start + size;
12204 let n_usize = usize::try_from(n).expect("n must be non-negative");
12205 let mut i = start;
12206 let mut k = if omp_block_start > 0 {
12207 sa[start - 1]
12208 } else {
12209 n
12210 };
12211
12212 let fast_end = omp_block_start + omp_block_size - 64 - 3;
12213 while (i as FastSint) < fast_end {
12214 plcp[usize::try_from(sa[i]).expect("suffix index must be non-negative")] = k;
12215 k = sa[i];
12216 plcp[usize::try_from(sa[i + 1]).expect("suffix index must be non-negative")] = k;
12217 k = sa[i + 1];
12218 plcp[usize::try_from(sa[i + 2]).expect("suffix index must be non-negative")] = k;
12219 k = sa[i + 2];
12220 plcp[usize::try_from(sa[i + 3]).expect("suffix index must be non-negative")] = k;
12221 k = sa[i + 3];
12222 i += 4;
12223 }
12224
12225 while i < end.min(n_usize) {
12226 plcp[usize::try_from(sa[i]).expect("suffix index must be non-negative")] = k;
12227 k = sa[i];
12228 i += 1;
12229 }
12230}
12231
12232#[doc(hidden)]
12234pub fn compute_phi_omp(sa: &[SaSint], plcp: &mut [SaSint], n: SaSint, threads: SaSint) {
12235 if threads == 1 || n < 65_536 {
12236 compute_phi(sa, plcp, n, 0, n as FastSint);
12237 return;
12238 }
12239
12240 let threads_usize = usize::try_from(threads).expect("threads must be non-negative");
12241 let block_stride = ((n as FastSint) / (threads as FastSint)) & !15;
12242 let plcp_addr = plcp.as_mut_ptr() as usize;
12243 let n_usize = usize::try_from(n).expect("n must be non-negative");
12244
12245 run_rayon_with_threads(threads_usize, || {
12246 (0..threads_usize).into_par_iter().for_each(|thread| {
12247 let block_start = thread as FastSint * block_stride;
12248 let block_size = if thread + 1 < threads_usize {
12249 block_stride
12250 } else {
12251 n as FastSint - block_start
12252 };
12253 let start = usize::try_from(block_start).expect("omp_block_start must be non-negative");
12254 let size = usize::try_from(block_size).expect("omp_block_size must be non-negative");
12255 let end = start + size;
12256 let mut i = start;
12257 let mut k = if block_start > 0 { sa[start - 1] } else { n };
12258 let plcp_ptr = plcp_addr as *mut SaSint;
12259
12260 let fast_end = block_start + block_size - 64 - 3;
12261 while (i as FastSint) < fast_end {
12262 unsafe {
12263 *plcp_ptr
12265 .add(usize::try_from(sa[i]).expect("suffix index must be non-negative")) =
12266 k;
12267 k = sa[i];
12268 *plcp_ptr.add(
12269 usize::try_from(sa[i + 1]).expect("suffix index must be non-negative"),
12270 ) = k;
12271 k = sa[i + 1];
12272 *plcp_ptr.add(
12273 usize::try_from(sa[i + 2]).expect("suffix index must be non-negative"),
12274 ) = k;
12275 k = sa[i + 2];
12276 *plcp_ptr.add(
12277 usize::try_from(sa[i + 3]).expect("suffix index must be non-negative"),
12278 ) = k;
12279 k = sa[i + 3];
12280 }
12281 i += 4;
12282 }
12283
12284 while i < end.min(n_usize) {
12285 unsafe {
12286 *plcp_ptr
12288 .add(usize::try_from(sa[i]).expect("suffix index must be non-negative")) =
12289 k;
12290 }
12291 k = sa[i];
12292 i += 1;
12293 }
12294 });
12295 });
12296}
12297
12298#[doc(hidden)]
12300pub fn compute_plcp(
12301 t: &[u8],
12302 plcp: &mut [SaSint],
12303 n: FastSint,
12304 omp_block_start: FastSint,
12305 omp_block_size: FastSint,
12306) {
12307 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
12308 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
12309 let end = start + size;
12310 let n_usize = usize::try_from(n).expect("n must be non-negative");
12311 let mut l = 0usize;
12312
12313 for i in start..end.min(n_usize) {
12314 let k = usize::try_from(plcp[i]).expect("phi entry must be non-negative");
12315 let m = n_usize - i.max(k);
12316 while l < m && t[i + l] == t[k + l] {
12317 l += 1;
12318 }
12319 plcp[i] = SaSint::try_from(l).expect("LCP length must fit SaSint");
12320 l = l.saturating_sub(1);
12321 }
12322}
12323
12324#[doc(hidden)]
12326pub fn compute_plcp_omp(t: &[u8], plcp: &mut [SaSint], n: SaSint, threads: SaSint) {
12327 if threads == 1 || n < 65_536 {
12328 compute_plcp(t, plcp, n as FastSint, 0, n as FastSint);
12329 return;
12330 }
12331
12332 let n_usize = usize::try_from(n).expect("n must be non-negative");
12333 let threads_usize = usize::try_from(threads).expect("threads must be non-negative");
12334 let chunk_size = ((n_usize / threads_usize) & !15usize).max(16);
12335 run_rayon_with_threads(threads_usize, || {
12336 plcp[..n_usize]
12337 .par_chunks_mut(chunk_size)
12338 .enumerate()
12339 .for_each(|(chunk_index, chunk)| {
12340 let start = chunk_index * chunk_size;
12341 let mut l = 0usize;
12342 for (offset, value) in chunk.iter_mut().enumerate() {
12343 let i = start + offset;
12344 let k = usize::try_from(*value).expect("phi entry must be non-negative");
12345 let m = n_usize - i.max(k);
12346 while l < m && t[i + l] == t[k + l] {
12347 l += 1;
12348 }
12349 *value = SaSint::try_from(l).expect("LCP length must fit SaSint");
12350 l = l.saturating_sub(1);
12351 }
12352 });
12353 });
12354}
12355
12356#[doc(hidden)]
12358pub fn compute_plcp_gsa(
12359 t: &[u8],
12360 plcp: &mut [SaSint],
12361 omp_block_start: FastSint,
12362 omp_block_size: FastSint,
12363) {
12364 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
12365 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
12366 let end = start + size;
12367 let mut l = 0usize;
12368
12369 for i in start..end.min(t.len()) {
12370 let k = usize::try_from(plcp[i]).expect("phi entry must be non-negative");
12371 while t[i + l] > 0 && t[i + l] == t[k + l] {
12372 l += 1;
12373 }
12374 plcp[i] = SaSint::try_from(l).expect("LCP length must fit SaSint");
12375 l = l.saturating_sub(1);
12376 }
12377}
12378
12379#[doc(hidden)]
12381pub fn compute_plcp_gsa_omp(t: &[u8], plcp: &mut [SaSint], n: SaSint, threads: SaSint) {
12382 if threads == 1 || n < 65_536 {
12383 compute_plcp_gsa(t, plcp, 0, n as FastSint);
12384 return;
12385 }
12386
12387 let n_usize = usize::try_from(n).expect("n must be non-negative");
12388 let threads_usize = usize::try_from(threads).expect("threads must be non-negative");
12389 let chunk_size = ((n_usize / threads_usize) & !15usize).max(16);
12390 run_rayon_with_threads(threads_usize, || {
12391 plcp[..n_usize]
12392 .par_chunks_mut(chunk_size)
12393 .enumerate()
12394 .for_each(|(chunk_index, chunk)| {
12395 let start = chunk_index * chunk_size;
12396 let mut l = 0usize;
12397 for (offset, value) in chunk.iter_mut().enumerate() {
12398 let i = start + offset;
12399 let k = usize::try_from(*value).expect("phi entry must be non-negative");
12400 while t[i + l] > 0 && t[i + l] == t[k + l] {
12401 l += 1;
12402 }
12403 *value = SaSint::try_from(l).expect("LCP length must fit SaSint");
12404 l = l.saturating_sub(1);
12405 }
12406 });
12407 });
12408}
12409
12410#[doc(hidden)]
12412pub fn compute_plcp_int(
12413 t: &[SaSint],
12414 plcp: &mut [SaSint],
12415 n: FastSint,
12416 omp_block_start: FastSint,
12417 omp_block_size: FastSint,
12418) {
12419 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
12420 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
12421 let end = start + size;
12422 let n_usize = usize::try_from(n).expect("n must be non-negative");
12423 let mut l = 0usize;
12424
12425 for i in start..end.min(n_usize) {
12426 let k = usize::try_from(plcp[i]).expect("phi entry must be non-negative");
12427 let m = n_usize - i.max(k);
12428 while l < m && t[i + l] == t[k + l] {
12429 l += 1;
12430 }
12431 plcp[i] = SaSint::try_from(l).expect("LCP length must fit SaSint");
12432 l = l.saturating_sub(1);
12433 }
12434}
12435
12436#[doc(hidden)]
12438pub fn compute_plcp_int_omp(t: &[SaSint], plcp: &mut [SaSint], n: SaSint, threads: SaSint) {
12439 if threads == 1 || n < 65_536 {
12440 compute_plcp_int(t, plcp, n as FastSint, 0, n as FastSint);
12441 return;
12442 }
12443
12444 let n_usize = usize::try_from(n).expect("n must be non-negative");
12445 let threads_usize = usize::try_from(threads).expect("threads must be non-negative");
12446 let chunk_size = ((n_usize / threads_usize) & !15usize).max(16);
12447 run_rayon_with_threads(threads_usize, || {
12448 plcp[..n_usize]
12449 .par_chunks_mut(chunk_size)
12450 .enumerate()
12451 .for_each(|(chunk_index, chunk)| {
12452 let start = chunk_index * chunk_size;
12453 let mut l = 0usize;
12454 for (offset, value) in chunk.iter_mut().enumerate() {
12455 let i = start + offset;
12456 let k = usize::try_from(*value).expect("phi entry must be non-negative");
12457 let m = n_usize - i.max(k);
12458 while l < m && t[i + l] == t[k + l] {
12459 l += 1;
12460 }
12461 *value = SaSint::try_from(l).expect("LCP length must fit SaSint");
12462 l = l.saturating_sub(1);
12463 }
12464 });
12465 });
12466}
12467
12468#[doc(hidden)]
12470pub fn compute_lcp(
12471 plcp: &[SaSint],
12472 sa: &[SaSint],
12473 lcp: &mut [SaSint],
12474 omp_block_start: FastSint,
12475 omp_block_size: FastSint,
12476) {
12477 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
12478 let size = usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
12479 let end = start + size;
12480
12481 for i in start..end.min(sa.len()) {
12482 lcp[i] = plcp[usize::try_from(sa[i]).expect("suffix index must be non-negative")];
12483 }
12484}
12485
12486#[doc(hidden)]
12488pub fn compute_lcp_omp(
12489 plcp: &[SaSint],
12490 sa: &[SaSint],
12491 lcp: &mut [SaSint],
12492 n: SaSint,
12493 threads: SaSint,
12494) {
12495 if threads == 1 || n < 65_536 {
12496 compute_lcp(plcp, sa, lcp, 0, n as FastSint);
12497 return;
12498 }
12499
12500 let n_usize = usize::try_from(n).expect("n must be non-negative");
12501 assert!(plcp.len() >= n_usize);
12502 assert!(sa.len() >= n_usize);
12503 assert!(lcp.len() >= n_usize);
12504 let threads_usize = usize::try_from(threads).expect("threads must be non-negative");
12505 let chunk_size = ((n_usize / threads_usize) & !15usize).max(16);
12506 let plcp_ptr = plcp.as_ptr() as usize;
12507 let sa_ptr = sa.as_ptr() as usize;
12508 run_rayon_with_threads(threads_usize, || {
12509 lcp[..n_usize]
12510 .par_chunks_mut(chunk_size)
12511 .enumerate()
12512 .for_each(|(chunk_index, chunk)| {
12513 let start = chunk_index * chunk_size;
12514 let dst_ptr = chunk.as_mut_ptr();
12515 let sa_ptr = sa_ptr as *const SaSint;
12516 let plcp_ptr = plcp_ptr as *const SaSint;
12517 for offset in 0..chunk.len() {
12518 let i = start + offset;
12519 let suffix = unsafe { *sa_ptr.add(i) };
12520 let suffix =
12521 usize::try_from(suffix).expect("suffix index must be non-negative");
12522 assert!(suffix < plcp.len());
12523 unsafe {
12524 *dst_ptr.add(offset) = *plcp_ptr.add(suffix);
12525 }
12526 }
12527 });
12528 });
12529}
12530
12531pub fn libsais_plcp(t: &[u8], sa: &[SaSint], plcp: &mut [SaSint]) -> SaSint {
12539 if sa.len() != t.len() || plcp.len() != t.len() {
12540 return -1;
12541 }
12542 if t.len() <= 1 {
12543 if t.len() == 1 {
12544 plcp[0] = 0;
12545 }
12546 return 0;
12547 }
12548
12549 let n = SaSint::try_from(t.len()).expect("input length must fit SaSint");
12550 compute_phi_omp(sa, plcp, n, 1);
12551 compute_plcp_omp(t, plcp, n, 1);
12552 0
12553}
12554
12555pub fn libsais_plcp_gsa(t: &[u8], sa: &[SaSint], plcp: &mut [SaSint]) -> SaSint {
12563 if t.last().copied().unwrap_or(0) != 0 {
12564 return -1;
12565 }
12566 if sa.len() != t.len() || plcp.len() != t.len() {
12567 return -1;
12568 }
12569 if t.len() <= 1 {
12570 if t.len() == 1 {
12571 plcp[0] = 0;
12572 }
12573 return 0;
12574 }
12575
12576 let n = SaSint::try_from(t.len()).expect("input length must fit SaSint");
12577 compute_phi_omp(sa, plcp, n, 1);
12578 compute_plcp_gsa_omp(t, plcp, n, 1);
12579 0
12580}
12581
12582pub fn libsais_plcp_int(t: &[SaSint], sa: &[SaSint], plcp: &mut [SaSint]) -> SaSint {
12590 if sa.len() != t.len() || plcp.len() != t.len() {
12591 return -1;
12592 }
12593 if t.len() <= 1 {
12594 if t.len() == 1 {
12595 plcp[0] = 0;
12596 }
12597 return 0;
12598 }
12599
12600 let n = SaSint::try_from(t.len()).expect("input length must fit SaSint");
12601 compute_phi_omp(sa, plcp, n, 1);
12602 compute_plcp_int_omp(t, plcp, n, 1);
12603 0
12604}
12605
12606pub fn libsais_lcp(plcp: &[SaSint], sa: &[SaSint], lcp: &mut [SaSint]) -> SaSint {
12614 if plcp.len() != sa.len() || lcp.len() != sa.len() {
12615 return -1;
12616 }
12617 if sa.len() <= 1 {
12618 if sa.len() == 1 {
12619 lcp[0] = plcp[usize::try_from(sa[0]).expect("suffix index must be non-negative")];
12620 }
12621 return 0;
12622 }
12623
12624 compute_lcp_omp(
12625 plcp,
12626 sa,
12627 lcp,
12628 SaSint::try_from(sa.len()).expect("suffix array length must fit SaSint"),
12629 1,
12630 );
12631 0
12632}
12633
12634pub fn libsais_plcp_omp(t: &[u8], sa: &[SaSint], plcp: &mut [SaSint], threads: SaSint) -> SaSint {
12643 if threads < 0 {
12644 return -1;
12645 }
12646 if sa.len() != t.len() || plcp.len() != t.len() {
12647 return -1;
12648 }
12649 if t.len() <= 1 {
12650 if t.len() == 1 {
12651 plcp[0] = 0;
12652 }
12653 return 0;
12654 }
12655
12656 let n = SaSint::try_from(t.len()).expect("input length must fit SaSint");
12657 let threads = normalize_omp_threads(threads);
12658 compute_phi_omp(sa, plcp, n, threads);
12659 compute_plcp_omp(t, plcp, n, threads);
12660 0
12661}
12662
12663pub fn libsais_plcp_gsa_omp(
12672 t: &[u8],
12673 sa: &[SaSint],
12674 plcp: &mut [SaSint],
12675 threads: SaSint,
12676) -> SaSint {
12677 if threads < 0 || t.last().copied().unwrap_or(0) != 0 {
12678 return -1;
12679 }
12680 if sa.len() != t.len() || plcp.len() != t.len() {
12681 return -1;
12682 }
12683 if t.len() <= 1 {
12684 if t.len() == 1 {
12685 plcp[0] = 0;
12686 }
12687 return 0;
12688 }
12689
12690 let n = SaSint::try_from(t.len()).expect("input length must fit SaSint");
12691 let threads = normalize_omp_threads(threads);
12692 compute_phi_omp(sa, plcp, n, threads);
12693 compute_plcp_gsa_omp(t, plcp, n, threads);
12694 0
12695}
12696
12697pub fn libsais_plcp_int_omp(
12706 t: &[SaSint],
12707 sa: &[SaSint],
12708 plcp: &mut [SaSint],
12709 threads: SaSint,
12710) -> SaSint {
12711 if threads < 0 {
12712 return -1;
12713 }
12714 if sa.len() != t.len() || plcp.len() != t.len() {
12715 return -1;
12716 }
12717 if t.len() <= 1 {
12718 if t.len() == 1 {
12719 plcp[0] = 0;
12720 }
12721 return 0;
12722 }
12723
12724 let n = SaSint::try_from(t.len()).expect("input length must fit SaSint");
12725 let threads = normalize_omp_threads(threads);
12726 compute_phi_omp(sa, plcp, n, threads);
12727 compute_plcp_int_omp(t, plcp, n, threads);
12728 0
12729}
12730
12731pub fn libsais_lcp_omp(
12740 plcp: &[SaSint],
12741 sa: &[SaSint],
12742 lcp: &mut [SaSint],
12743 threads: SaSint,
12744) -> SaSint {
12745 if threads < 0 {
12746 return -1;
12747 }
12748 if plcp.len() != sa.len() || lcp.len() != sa.len() {
12749 return -1;
12750 }
12751 if sa.len() <= 1 {
12752 if sa.len() == 1 {
12753 lcp[0] = plcp[usize::try_from(sa[0]).expect("suffix index must be non-negative")];
12754 }
12755 return 0;
12756 }
12757
12758 compute_lcp_omp(
12759 plcp,
12760 sa,
12761 lcp,
12762 SaSint::try_from(sa.len()).expect("suffix array length must fit SaSint"),
12763 normalize_omp_threads(threads),
12764 );
12765 0
12766}
12767
12768#[doc(hidden)]
12770pub fn unbwt_compute_histogram(t: &[u8], n: FastSint, count: &mut [SaUint]) {
12771 let n = usize::try_from(n).expect("n must be non-negative");
12772 assert!(count.len() >= ALPHABET_SIZE);
12773 for &byte in &t[..n] {
12774 count[byte as usize] += 1;
12775 }
12776}
12777
12778#[doc(hidden)]
12780pub fn unbwt_transpose_bucket2(bucket2: &mut [SaUint]) {
12781 assert!(bucket2.len() >= ALPHABET_SIZE * ALPHABET_SIZE);
12782 for x in 0..ALPHABET_SIZE {
12783 for y in x + 1..ALPHABET_SIZE {
12784 bucket2.swap((y << 8) + x, (x << 8) + y);
12785 }
12786 }
12787}
12788
12789#[doc(hidden)]
12791pub fn unbwt_compute_bigram_histogram_single(
12792 t: &[u8],
12793 bucket1: &mut [SaUint],
12794 bucket2: &mut [SaUint],
12795 index: FastUint,
12796) {
12797 let mut sum = 1usize;
12798 for c in 0..ALPHABET_SIZE {
12799 let prev = sum;
12800 sum += bucket1[c] as usize;
12801 bucket1[c] = prev as SaUint;
12802 if prev != sum {
12803 let bucket2_p = &mut bucket2[c << 8..(c + 1) << 8];
12804
12805 let hi = sum.min(index);
12806 if hi > prev {
12807 unbwt_compute_histogram(&t[prev..], (hi - prev) as FastSint, bucket2_p);
12808 }
12809
12810 let lo = prev.max(index + 1);
12811 if sum > lo {
12812 unbwt_compute_histogram(&t[lo - 1..], (sum - lo) as FastSint, bucket2_p);
12813 }
12814 }
12815 }
12816
12817 unbwt_transpose_bucket2(bucket2);
12818}
12819
12820#[doc(hidden)]
12822pub fn unbwt_calculate_fastbits(
12823 bucket2: &mut [SaUint],
12824 fastbits: &mut [u16],
12825 lastc: FastUint,
12826 shift: FastUint,
12827) {
12828 let mut v = 0usize;
12829 let mut w = 0usize;
12830 let mut sum = 1usize;
12831
12832 for c in 0..ALPHABET_SIZE {
12833 if c == lastc {
12834 sum += 1;
12835 }
12836
12837 for _d in 0..ALPHABET_SIZE {
12838 let prev = sum;
12839 sum += bucket2[w] as usize;
12840 bucket2[w] = prev as SaUint;
12841 if prev != sum {
12842 while v <= ((sum - 1) >> shift) {
12843 fastbits[v] = w as u16;
12844 v += 1;
12845 }
12846 }
12847 w += 1;
12848 }
12849 }
12850}
12851
12852#[doc(hidden)]
12854pub fn unbwt_calculate_bi_psi(
12855 t: &[u8],
12856 p: &mut [SaUint],
12857 bucket1: &mut [SaUint],
12858 bucket2: &mut [SaUint],
12859 index: FastUint,
12860 omp_block_start: FastSint,
12861 omp_block_end: FastSint,
12862) {
12863 let mut i = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
12864 let mut j = index;
12865 let block_end = usize::try_from(omp_block_end).expect("omp_block_end must be non-negative");
12866 if block_end < j {
12867 j = block_end;
12868 }
12869 while i < j {
12870 let c = t[i] as usize;
12871 let pidx = bucket1[c] as usize;
12872 bucket1[c] += 1;
12873 let tidx = index as isize - pidx as isize;
12874 if tidx != 0 {
12875 let src =
12876 pidx.wrapping_add((tidx >> ((std::mem::size_of::<FastSint>() * 8) - 1)) as usize);
12877 let w = ((t[src] as usize) << 8) + c;
12878 let dst = bucket2[w] as usize;
12879 p[dst] = i as SaUint;
12880 bucket2[w] += 1;
12881 }
12882 i += 1;
12883 }
12884
12885 let mut i = index;
12886 if usize::try_from(omp_block_start).expect("omp_block_start must be non-negative") > i {
12887 i = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
12888 }
12889 i += 1;
12890 while i <= block_end {
12891 let c = t[i - 1] as usize;
12892 let pidx = bucket1[c] as usize;
12893 bucket1[c] += 1;
12894 let tidx = index as isize - pidx as isize;
12895 if tidx != 0 {
12896 let src =
12897 pidx.wrapping_add((tidx >> ((std::mem::size_of::<FastSint>() * 8) - 1)) as usize);
12898 let w = ((t[src] as usize) << 8) + c;
12899 let dst = bucket2[w] as usize;
12900 p[dst] = i as SaUint;
12901 bucket2[w] += 1;
12902 }
12903 i += 1;
12904 }
12905}
12906
12907#[doc(hidden)]
12909#[allow(dead_code, non_snake_case)]
12910pub fn unbwt_calculate_biPSI(
12911 t: &[u8],
12912 p: &mut [SaUint],
12913 bucket1: &mut [SaUint],
12914 bucket2: &mut [SaUint],
12915 index: FastUint,
12916 omp_block_start: FastSint,
12917 omp_block_end: FastSint,
12918) {
12919 unbwt_calculate_bi_psi(
12920 t,
12921 p,
12922 bucket1,
12923 bucket2,
12924 index,
12925 omp_block_start,
12926 omp_block_end,
12927 );
12928}
12929
12930#[doc(hidden)]
12932pub fn unbwt_init_single(
12933 t: &[u8],
12934 p: &mut [SaUint],
12935 n: SaSint,
12936 freq: Option<&[SaSint]>,
12937 i: &[SaUint],
12938 bucket2: &mut [SaUint],
12939 fastbits: &mut [u16],
12940) {
12941 let mut bucket1 = vec![0u32; ALPHABET_SIZE];
12942 let index = i[0] as usize;
12943 let lastc = t[0] as usize;
12944 let mut shift = 0usize;
12945 while (usize::try_from(n).expect("n must be non-negative") >> shift)
12946 > (1usize << UNBWT_FASTBITS)
12947 {
12948 shift += 1;
12949 }
12950
12951 if let Some(freq) = freq {
12952 for c in 0..ALPHABET_SIZE {
12953 bucket1[c] = freq[c] as SaUint;
12954 }
12955 } else {
12956 unbwt_compute_histogram(t, n as FastSint, &mut bucket1);
12957 }
12958
12959 bucket2.fill(0);
12960 unbwt_compute_bigram_histogram_single(t, &mut bucket1, bucket2, index);
12961 unbwt_calculate_fastbits(bucket2, fastbits, lastc, shift);
12962 unbwt_calculate_bi_psi(t, p, &mut bucket1, bucket2, index, 0, n as FastSint);
12963}
12964
12965#[doc(hidden)]
12967pub fn unbwt_compute_bigram_histogram_parallel(
12968 t: &[u8],
12969 index: FastUint,
12970 bucket1: &mut [SaUint],
12971 bucket2: &mut [SaUint],
12972 omp_block_start: FastSint,
12973 omp_block_size: FastSint,
12974) {
12975 let start = usize::try_from(omp_block_start).expect("omp_block_start must be non-negative");
12976 let end = start + usize::try_from(omp_block_size).expect("omp_block_size must be non-negative");
12977 for &c_u8 in &t[start..end] {
12978 let c = c_u8 as usize;
12979 let p = bucket1[c] as usize;
12980 bucket1[c] += 1;
12981 let tidx = index as isize - p as isize;
12982 if tidx != 0 {
12983 let src =
12984 p.wrapping_add((tidx >> ((std::mem::size_of::<FastSint>() * 8) - 1)) as usize);
12985 let w = ((t[src] as usize) << 8) + c;
12986 bucket2[w] += 1;
12987 }
12988 }
12989}
12990
12991#[doc(hidden)]
12993pub fn unbwt_init_parallel(
12994 t: &[u8],
12995 p: &mut [SaUint],
12996 n: SaSint,
12997 freq: Option<&[SaSint]>,
12998 i: &[SaUint],
12999 bucket2: &mut [SaUint],
13000 fastbits: &mut [u16],
13001 buckets: Option<&mut [SaUint]>,
13002 threads: SaSint,
13003) {
13004 let num_threads = usize::try_from(threads.max(1)).expect("threads must be non-negative");
13005 if num_threads <= 1 || usize::try_from(n).expect("n must be non-negative") < 65_536 {
13006 unbwt_init_single(t, p, n, freq, i, bucket2, fastbits);
13007 return;
13008 }
13009
13010 let buckets = match buckets {
13011 Some(buckets) => buckets,
13012 None => {
13013 unbwt_init_single(t, p, n, freq, i, bucket2, fastbits);
13014 return;
13015 }
13016 };
13017
13018 let segment_len = ALPHABET_SIZE + ALPHABET_SIZE * ALPHABET_SIZE;
13019 assert!(buckets.len() >= num_threads * segment_len);
13020
13021 let index = i[0] as usize;
13022 let lastc = t[0] as usize;
13023 let mut shift = 0usize;
13024 while (usize::try_from(n).expect("n must be non-negative") >> shift)
13025 > (1usize << UNBWT_FASTBITS)
13026 {
13027 shift += 1;
13028 }
13029
13030 let mut bucket1 = vec![0u32; ALPHABET_SIZE];
13031 bucket2.fill(0);
13032
13033 let n_fast = n as FastSint;
13034 let block_stride = (n_fast / num_threads as FastSint) & (-16);
13035 let mut block_starts = vec![0usize; num_threads];
13036 let mut block_sizes = vec![0usize; num_threads];
13037
13038 for thread in 0..num_threads {
13039 let start = usize::try_from(thread as FastSint * block_stride)
13040 .expect("block start must be non-negative");
13041 let size = if thread + 1 < num_threads {
13042 usize::try_from(block_stride).expect("block stride must be non-negative")
13043 } else {
13044 usize::try_from(n_fast - thread as FastSint * block_stride)
13045 .expect("block size must be non-negative")
13046 };
13047 block_starts[thread] = start;
13048 block_sizes[thread] = size;
13049
13050 let segment = &mut buckets[thread * segment_len..(thread + 1) * segment_len];
13051 let (bucket1_local, _) = segment.split_at_mut(ALPHABET_SIZE);
13052 bucket1_local.fill(0);
13053 unbwt_compute_histogram(&t[start..], size as FastSint, bucket1_local);
13054 }
13055
13056 for thread in 0..num_threads {
13057 let segment = &mut buckets[thread * segment_len..(thread + 1) * segment_len];
13058 let (bucket1_temp, _) = segment.split_at_mut(ALPHABET_SIZE);
13059 for c in 0..ALPHABET_SIZE {
13060 let a = bucket1[c];
13061 let b = bucket1_temp[c];
13062 bucket1[c] = a + b;
13063 bucket1_temp[c] = a;
13064 }
13065 }
13066
13067 let mut sum = 1usize;
13068 for c in 0..ALPHABET_SIZE {
13069 let prev = sum;
13070 sum += bucket1[c] as usize;
13071 bucket1[c] = prev as SaUint;
13072 }
13073
13074 for thread in 0..num_threads {
13075 let start = block_starts[thread];
13076 let size = block_sizes[thread];
13077 let segment = &mut buckets[thread * segment_len..(thread + 1) * segment_len];
13078 let (bucket1_local, bucket2_local) = segment.split_at_mut(ALPHABET_SIZE);
13079 for c in 0..ALPHABET_SIZE {
13080 bucket1_local[c] += bucket1[c];
13081 }
13082 bucket2_local.fill(0);
13083 unbwt_compute_bigram_histogram_parallel(
13084 t,
13085 index,
13086 bucket1_local,
13087 bucket2_local,
13088 start as FastSint,
13089 size as FastSint,
13090 );
13091 }
13092
13093 for thread in 0..num_threads {
13094 let segment = &mut buckets[thread * segment_len..(thread + 1) * segment_len];
13095 let (_, bucket2_temp) = segment.split_at_mut(ALPHABET_SIZE);
13096 for c in 0..ALPHABET_SIZE * ALPHABET_SIZE {
13097 let a = bucket2[c];
13098 let b = bucket2_temp[c];
13099 bucket2[c] = a + b;
13100 bucket2_temp[c] = a;
13101 }
13102 }
13103
13104 unbwt_calculate_fastbits(bucket2, fastbits, lastc, shift);
13105
13106 for thread in (1..num_threads).rev() {
13107 let src_start = (thread - 1) * segment_len;
13108 let dst_start = thread * segment_len;
13109 let (head, tail) = buckets.split_at_mut(dst_start);
13110 let src = &head[src_start..src_start + ALPHABET_SIZE];
13111 let dst = &mut tail[..ALPHABET_SIZE];
13112 dst.copy_from_slice(src);
13113 }
13114 buckets[..ALPHABET_SIZE].copy_from_slice(&bucket1);
13115
13116 for thread in 0..num_threads {
13117 let start = block_starts[thread];
13118 let size = block_sizes[thread];
13119 let segment = &mut buckets[thread * segment_len..(thread + 1) * segment_len];
13120 let (bucket1_local, bucket2_local) = segment.split_at_mut(ALPHABET_SIZE);
13121 for c in 0..ALPHABET_SIZE * ALPHABET_SIZE {
13122 bucket2_local[c] += bucket2[c];
13123 }
13124 unbwt_calculate_bi_psi(
13125 t,
13126 p,
13127 bucket1_local,
13128 bucket2_local,
13129 index,
13130 start as FastSint,
13131 (start + size) as FastSint,
13132 );
13133 }
13134
13135 let last_segment = &buckets[(num_threads - 1) * segment_len..num_threads * segment_len];
13136 let (_, last_bucket2) = last_segment.split_at(ALPHABET_SIZE);
13137 bucket2.copy_from_slice(last_bucket2);
13138}
13139
13140fn bswap16(value: u16) -> u16 {
13141 value.swap_bytes()
13142}
13143
13144fn unbwt_resolve_symbol(bucket2: &[SaUint], fastbits: &[u16], shift: FastUint, p: SaUint) -> u16 {
13145 let mut c = fastbits[(p as usize) >> shift];
13146 while bucket2[c as usize] <= p {
13147 c += 1;
13148 }
13149 c
13150}
13151
13152#[doc(hidden)]
13154pub fn unbwt_decode_1(
13155 u: &mut [u8],
13156 p: &[SaUint],
13157 bucket2: &[SaUint],
13158 fastbits: &[u16],
13159 shift: FastUint,
13160 i0: &mut FastUint,
13161 k: FastUint,
13162) {
13163 let words = &mut u[..2 * k];
13164 let mut p0 = *i0 as SaUint;
13165
13166 for i in 0..k {
13167 let c0 = unbwt_resolve_symbol(bucket2, fastbits, shift, p0);
13168 p0 = p[p0 as usize];
13169 let bytes = bswap16(c0).to_ne_bytes();
13170 words[2 * i] = bytes[0];
13171 words[2 * i + 1] = bytes[1];
13172 }
13173
13174 *i0 = p0 as FastUint;
13175}
13176
13177#[doc(hidden)]
13179pub fn unbwt_decode_2(
13180 u: &mut [u8],
13181 p: &[SaUint],
13182 bucket2: &[SaUint],
13183 fastbits: &[u16],
13184 shift: FastUint,
13185 r: FastUint,
13186 i0: &mut FastUint,
13187 i1: &mut FastUint,
13188 k: FastUint,
13189) {
13190 let width = 2 * k;
13191 unbwt_decode_1(&mut u[0..width], p, bucket2, fastbits, shift, i0, k);
13192 unbwt_decode_1(&mut u[r..r + width], p, bucket2, fastbits, shift, i1, k);
13193}
13194
13195#[doc(hidden)]
13197pub fn unbwt_decode_3(
13198 u: &mut [u8],
13199 p: &[SaUint],
13200 bucket2: &[SaUint],
13201 fastbits: &[u16],
13202 shift: FastUint,
13203 r: FastUint,
13204 i0: &mut FastUint,
13205 i1: &mut FastUint,
13206 i2: &mut FastUint,
13207 k: FastUint,
13208) {
13209 let width = 2 * k;
13210 unbwt_decode_1(&mut u[0..width], p, bucket2, fastbits, shift, i0, k);
13211 unbwt_decode_1(&mut u[r..r + width], p, bucket2, fastbits, shift, i1, k);
13212 unbwt_decode_1(
13213 &mut u[2 * r..2 * r + width],
13214 p,
13215 bucket2,
13216 fastbits,
13217 shift,
13218 i2,
13219 k,
13220 );
13221}
13222
13223#[doc(hidden)]
13225pub fn unbwt_decode_4(
13226 u: &mut [u8],
13227 p: &[SaUint],
13228 bucket2: &[SaUint],
13229 fastbits: &[u16],
13230 shift: FastUint,
13231 r: FastUint,
13232 i0: &mut FastUint,
13233 i1: &mut FastUint,
13234 i2: &mut FastUint,
13235 i3: &mut FastUint,
13236 k: FastUint,
13237) {
13238 let width = 2 * k;
13239 unbwt_decode_1(&mut u[0..width], p, bucket2, fastbits, shift, i0, k);
13240 unbwt_decode_1(&mut u[r..r + width], p, bucket2, fastbits, shift, i1, k);
13241 unbwt_decode_1(
13242 &mut u[2 * r..2 * r + width],
13243 p,
13244 bucket2,
13245 fastbits,
13246 shift,
13247 i2,
13248 k,
13249 );
13250 unbwt_decode_1(
13251 &mut u[3 * r..3 * r + width],
13252 p,
13253 bucket2,
13254 fastbits,
13255 shift,
13256 i3,
13257 k,
13258 );
13259}
13260
13261#[doc(hidden)]
13263pub fn unbwt_decode_5(
13264 u: &mut [u8],
13265 p: &[SaUint],
13266 bucket2: &[SaUint],
13267 fastbits: &[u16],
13268 shift: FastUint,
13269 r: FastUint,
13270 i0: &mut FastUint,
13271 i1: &mut FastUint,
13272 i2: &mut FastUint,
13273 i3: &mut FastUint,
13274 i4: &mut FastUint,
13275 k: FastUint,
13276) {
13277 let width = 2 * k;
13278 unbwt_decode_1(&mut u[0..width], p, bucket2, fastbits, shift, i0, k);
13279 unbwt_decode_1(&mut u[r..r + width], p, bucket2, fastbits, shift, i1, k);
13280 unbwt_decode_1(
13281 &mut u[2 * r..2 * r + width],
13282 p,
13283 bucket2,
13284 fastbits,
13285 shift,
13286 i2,
13287 k,
13288 );
13289 unbwt_decode_1(
13290 &mut u[3 * r..3 * r + width],
13291 p,
13292 bucket2,
13293 fastbits,
13294 shift,
13295 i3,
13296 k,
13297 );
13298 unbwt_decode_1(
13299 &mut u[4 * r..4 * r + width],
13300 p,
13301 bucket2,
13302 fastbits,
13303 shift,
13304 i4,
13305 k,
13306 );
13307}
13308
13309#[doc(hidden)]
13311pub fn unbwt_decode_6(
13312 u: &mut [u8],
13313 p: &[SaUint],
13314 bucket2: &[SaUint],
13315 fastbits: &[u16],
13316 shift: FastUint,
13317 r: FastUint,
13318 i0: &mut FastUint,
13319 i1: &mut FastUint,
13320 i2: &mut FastUint,
13321 i3: &mut FastUint,
13322 i4: &mut FastUint,
13323 i5: &mut FastUint,
13324 k: FastUint,
13325) {
13326 let width = 2 * k;
13327 unbwt_decode_1(&mut u[0..width], p, bucket2, fastbits, shift, i0, k);
13328 unbwt_decode_1(&mut u[r..r + width], p, bucket2, fastbits, shift, i1, k);
13329 unbwt_decode_1(
13330 &mut u[2 * r..2 * r + width],
13331 p,
13332 bucket2,
13333 fastbits,
13334 shift,
13335 i2,
13336 k,
13337 );
13338 unbwt_decode_1(
13339 &mut u[3 * r..3 * r + width],
13340 p,
13341 bucket2,
13342 fastbits,
13343 shift,
13344 i3,
13345 k,
13346 );
13347 unbwt_decode_1(
13348 &mut u[4 * r..4 * r + width],
13349 p,
13350 bucket2,
13351 fastbits,
13352 shift,
13353 i4,
13354 k,
13355 );
13356 unbwt_decode_1(
13357 &mut u[5 * r..5 * r + width],
13358 p,
13359 bucket2,
13360 fastbits,
13361 shift,
13362 i5,
13363 k,
13364 );
13365}
13366
13367#[doc(hidden)]
13369pub fn unbwt_decode_7(
13370 u: &mut [u8],
13371 p: &[SaUint],
13372 bucket2: &[SaUint],
13373 fastbits: &[u16],
13374 shift: FastUint,
13375 r: FastUint,
13376 i0: &mut FastUint,
13377 i1: &mut FastUint,
13378 i2: &mut FastUint,
13379 i3: &mut FastUint,
13380 i4: &mut FastUint,
13381 i5: &mut FastUint,
13382 i6: &mut FastUint,
13383 k: FastUint,
13384) {
13385 let width = 2 * k;
13386 unbwt_decode_1(&mut u[0..width], p, bucket2, fastbits, shift, i0, k);
13387 unbwt_decode_1(&mut u[r..r + width], p, bucket2, fastbits, shift, i1, k);
13388 unbwt_decode_1(
13389 &mut u[2 * r..2 * r + width],
13390 p,
13391 bucket2,
13392 fastbits,
13393 shift,
13394 i2,
13395 k,
13396 );
13397 unbwt_decode_1(
13398 &mut u[3 * r..3 * r + width],
13399 p,
13400 bucket2,
13401 fastbits,
13402 shift,
13403 i3,
13404 k,
13405 );
13406 unbwt_decode_1(
13407 &mut u[4 * r..4 * r + width],
13408 p,
13409 bucket2,
13410 fastbits,
13411 shift,
13412 i4,
13413 k,
13414 );
13415 unbwt_decode_1(
13416 &mut u[5 * r..5 * r + width],
13417 p,
13418 bucket2,
13419 fastbits,
13420 shift,
13421 i5,
13422 k,
13423 );
13424 unbwt_decode_1(
13425 &mut u[6 * r..6 * r + width],
13426 p,
13427 bucket2,
13428 fastbits,
13429 shift,
13430 i6,
13431 k,
13432 );
13433}
13434
13435#[doc(hidden)]
13437pub fn unbwt_decode_8(
13438 u: &mut [u8],
13439 p: &[SaUint],
13440 bucket2: &[SaUint],
13441 fastbits: &[u16],
13442 shift: FastUint,
13443 r: FastUint,
13444 i0: &mut FastUint,
13445 i1: &mut FastUint,
13446 i2: &mut FastUint,
13447 i3: &mut FastUint,
13448 i4: &mut FastUint,
13449 i5: &mut FastUint,
13450 i6: &mut FastUint,
13451 i7: &mut FastUint,
13452 k: FastUint,
13453) {
13454 let width = 2 * k;
13455 unbwt_decode_1(&mut u[0..width], p, bucket2, fastbits, shift, i0, k);
13456 unbwt_decode_1(&mut u[r..r + width], p, bucket2, fastbits, shift, i1, k);
13457 unbwt_decode_1(
13458 &mut u[2 * r..2 * r + width],
13459 p,
13460 bucket2,
13461 fastbits,
13462 shift,
13463 i2,
13464 k,
13465 );
13466 unbwt_decode_1(
13467 &mut u[3 * r..3 * r + width],
13468 p,
13469 bucket2,
13470 fastbits,
13471 shift,
13472 i3,
13473 k,
13474 );
13475 unbwt_decode_1(
13476 &mut u[4 * r..4 * r + width],
13477 p,
13478 bucket2,
13479 fastbits,
13480 shift,
13481 i4,
13482 k,
13483 );
13484 unbwt_decode_1(
13485 &mut u[5 * r..5 * r + width],
13486 p,
13487 bucket2,
13488 fastbits,
13489 shift,
13490 i5,
13491 k,
13492 );
13493 unbwt_decode_1(
13494 &mut u[6 * r..6 * r + width],
13495 p,
13496 bucket2,
13497 fastbits,
13498 shift,
13499 i6,
13500 k,
13501 );
13502 unbwt_decode_1(
13503 &mut u[7 * r..7 * r + width],
13504 p,
13505 bucket2,
13506 fastbits,
13507 shift,
13508 i7,
13509 k,
13510 );
13511}
13512
13513#[doc(hidden)]
13515pub fn unbwt_decode(
13516 u: &mut [u8],
13517 p: &[SaUint],
13518 n: SaSint,
13519 r: SaSint,
13520 i: &[SaUint],
13521 bucket2: &[SaUint],
13522 fastbits: &[u16],
13523 mut blocks: FastSint,
13524 remainder: FastUint,
13525) {
13526 let mut shift = 0usize;
13527 while (usize::try_from(n).expect("n must be non-negative") >> shift)
13528 > (1usize << UNBWT_FASTBITS)
13529 {
13530 shift += 1;
13531 }
13532 let mut offset = 0usize;
13533 let mut i_index = 0usize;
13534 let r_usize = usize::try_from(r).expect("r must be non-negative");
13535
13536 while blocks > 8 {
13537 let mut i0 = i[i_index] as FastUint;
13538 let mut i1 = i[i_index + 1] as FastUint;
13539 let mut i2 = i[i_index + 2] as FastUint;
13540 let mut i3 = i[i_index + 3] as FastUint;
13541 let mut i4 = i[i_index + 4] as FastUint;
13542 let mut i5 = i[i_index + 5] as FastUint;
13543 let mut i6 = i[i_index + 6] as FastUint;
13544 let mut i7 = i[i_index + 7] as FastUint;
13545 unbwt_decode_8(
13546 &mut u[offset..],
13547 p,
13548 bucket2,
13549 fastbits,
13550 shift,
13551 r_usize,
13552 &mut i0,
13553 &mut i1,
13554 &mut i2,
13555 &mut i3,
13556 &mut i4,
13557 &mut i5,
13558 &mut i6,
13559 &mut i7,
13560 r_usize >> 1,
13561 );
13562 i_index += 8;
13563 blocks -= 8;
13564 offset += 8 * r_usize;
13565 }
13566
13567 match blocks {
13568 1 => {
13569 let mut i0 = i[i_index] as FastUint;
13570 unbwt_decode_1(
13571 &mut u[offset..],
13572 p,
13573 bucket2,
13574 fastbits,
13575 shift,
13576 &mut i0,
13577 remainder >> 1,
13578 );
13579 }
13580 2 => {
13581 let mut i0 = i[i_index] as FastUint;
13582 let mut i1 = i[i_index + 1] as FastUint;
13583 unbwt_decode_2(
13584 &mut u[offset..],
13585 p,
13586 bucket2,
13587 fastbits,
13588 shift,
13589 r_usize,
13590 &mut i0,
13591 &mut i1,
13592 remainder >> 1,
13593 );
13594 unbwt_decode_1(
13595 &mut u[offset + 2 * (remainder >> 1)..],
13596 p,
13597 bucket2,
13598 fastbits,
13599 shift,
13600 &mut i0,
13601 (r_usize >> 1) - (remainder >> 1),
13602 );
13603 }
13604 3 => {
13605 let mut i0 = i[i_index] as FastUint;
13606 let mut i1 = i[i_index + 1] as FastUint;
13607 let mut i2 = i[i_index + 2] as FastUint;
13608 unbwt_decode_3(
13609 &mut u[offset..],
13610 p,
13611 bucket2,
13612 fastbits,
13613 shift,
13614 r_usize,
13615 &mut i0,
13616 &mut i1,
13617 &mut i2,
13618 remainder >> 1,
13619 );
13620 unbwt_decode_2(
13621 &mut u[offset + 2 * (remainder >> 1)..],
13622 p,
13623 bucket2,
13624 fastbits,
13625 shift,
13626 r_usize,
13627 &mut i0,
13628 &mut i1,
13629 (r_usize >> 1) - (remainder >> 1),
13630 );
13631 }
13632 4 => {
13633 let mut i0 = i[i_index] as FastUint;
13634 let mut i1 = i[i_index + 1] as FastUint;
13635 let mut i2 = i[i_index + 2] as FastUint;
13636 let mut i3 = i[i_index + 3] as FastUint;
13637 unbwt_decode_4(
13638 &mut u[offset..],
13639 p,
13640 bucket2,
13641 fastbits,
13642 shift,
13643 r_usize,
13644 &mut i0,
13645 &mut i1,
13646 &mut i2,
13647 &mut i3,
13648 remainder >> 1,
13649 );
13650 unbwt_decode_3(
13651 &mut u[offset + 2 * (remainder >> 1)..],
13652 p,
13653 bucket2,
13654 fastbits,
13655 shift,
13656 r_usize,
13657 &mut i0,
13658 &mut i1,
13659 &mut i2,
13660 (r_usize >> 1) - (remainder >> 1),
13661 );
13662 }
13663 5 => {
13664 let mut i0 = i[i_index] as FastUint;
13665 let mut i1 = i[i_index + 1] as FastUint;
13666 let mut i2 = i[i_index + 2] as FastUint;
13667 let mut i3 = i[i_index + 3] as FastUint;
13668 let mut i4 = i[i_index + 4] as FastUint;
13669 unbwt_decode_5(
13670 &mut u[offset..],
13671 p,
13672 bucket2,
13673 fastbits,
13674 shift,
13675 r_usize,
13676 &mut i0,
13677 &mut i1,
13678 &mut i2,
13679 &mut i3,
13680 &mut i4,
13681 remainder >> 1,
13682 );
13683 unbwt_decode_4(
13684 &mut u[offset + 2 * (remainder >> 1)..],
13685 p,
13686 bucket2,
13687 fastbits,
13688 shift,
13689 r_usize,
13690 &mut i0,
13691 &mut i1,
13692 &mut i2,
13693 &mut i3,
13694 (r_usize >> 1) - (remainder >> 1),
13695 );
13696 }
13697 6 => {
13698 let mut i0 = i[i_index] as FastUint;
13699 let mut i1 = i[i_index + 1] as FastUint;
13700 let mut i2 = i[i_index + 2] as FastUint;
13701 let mut i3 = i[i_index + 3] as FastUint;
13702 let mut i4 = i[i_index + 4] as FastUint;
13703 let mut i5 = i[i_index + 5] as FastUint;
13704 unbwt_decode_6(
13705 &mut u[offset..],
13706 p,
13707 bucket2,
13708 fastbits,
13709 shift,
13710 r_usize,
13711 &mut i0,
13712 &mut i1,
13713 &mut i2,
13714 &mut i3,
13715 &mut i4,
13716 &mut i5,
13717 remainder >> 1,
13718 );
13719 unbwt_decode_5(
13720 &mut u[offset + 2 * (remainder >> 1)..],
13721 p,
13722 bucket2,
13723 fastbits,
13724 shift,
13725 r_usize,
13726 &mut i0,
13727 &mut i1,
13728 &mut i2,
13729 &mut i3,
13730 &mut i4,
13731 (r_usize >> 1) - (remainder >> 1),
13732 );
13733 }
13734 7 => {
13735 let mut i0 = i[i_index] as FastUint;
13736 let mut i1 = i[i_index + 1] as FastUint;
13737 let mut i2 = i[i_index + 2] as FastUint;
13738 let mut i3 = i[i_index + 3] as FastUint;
13739 let mut i4 = i[i_index + 4] as FastUint;
13740 let mut i5 = i[i_index + 5] as FastUint;
13741 let mut i6 = i[i_index + 6] as FastUint;
13742 unbwt_decode_7(
13743 &mut u[offset..],
13744 p,
13745 bucket2,
13746 fastbits,
13747 shift,
13748 r_usize,
13749 &mut i0,
13750 &mut i1,
13751 &mut i2,
13752 &mut i3,
13753 &mut i4,
13754 &mut i5,
13755 &mut i6,
13756 remainder >> 1,
13757 );
13758 unbwt_decode_6(
13759 &mut u[offset + 2 * (remainder >> 1)..],
13760 p,
13761 bucket2,
13762 fastbits,
13763 shift,
13764 r_usize,
13765 &mut i0,
13766 &mut i1,
13767 &mut i2,
13768 &mut i3,
13769 &mut i4,
13770 &mut i5,
13771 (r_usize >> 1) - (remainder >> 1),
13772 );
13773 }
13774 8 => {
13775 let mut i0 = i[i_index] as FastUint;
13776 let mut i1 = i[i_index + 1] as FastUint;
13777 let mut i2 = i[i_index + 2] as FastUint;
13778 let mut i3 = i[i_index + 3] as FastUint;
13779 let mut i4 = i[i_index + 4] as FastUint;
13780 let mut i5 = i[i_index + 5] as FastUint;
13781 let mut i6 = i[i_index + 6] as FastUint;
13782 let mut i7 = i[i_index + 7] as FastUint;
13783 unbwt_decode_8(
13784 &mut u[offset..],
13785 p,
13786 bucket2,
13787 fastbits,
13788 shift,
13789 r_usize,
13790 &mut i0,
13791 &mut i1,
13792 &mut i2,
13793 &mut i3,
13794 &mut i4,
13795 &mut i5,
13796 &mut i6,
13797 &mut i7,
13798 remainder >> 1,
13799 );
13800 unbwt_decode_7(
13801 &mut u[offset + 2 * (remainder >> 1)..],
13802 p,
13803 bucket2,
13804 fastbits,
13805 shift,
13806 r_usize,
13807 &mut i0,
13808 &mut i1,
13809 &mut i2,
13810 &mut i3,
13811 &mut i4,
13812 &mut i5,
13813 &mut i6,
13814 (r_usize >> 1) - (remainder >> 1),
13815 );
13816 }
13817 _ => {}
13818 }
13819}
13820
13821#[doc(hidden)]
13823pub fn unbwt_decode_omp(
13824 t: &[u8],
13825 u: &mut [u8],
13826 p: &[SaUint],
13827 n: SaSint,
13828 r: SaSint,
13829 i: &[SaUint],
13830 bucket2: &[SaUint],
13831 fastbits: &[u16],
13832 threads: SaSint,
13833) {
13834 let lastc = t[0];
13835 let blocks = 1 + ((n as FastSint - 1) / r as FastSint);
13836 let remainder = usize::try_from(n).expect("n must be non-negative")
13837 - usize::try_from(r).expect("r must be non-negative")
13838 * (usize::try_from(blocks).expect("blocks") - 1);
13839 let max_threads = usize::try_from(blocks.min(threads.max(1) as FastSint))
13840 .expect("thread count must fit usize");
13841 let block_stride = usize::try_from(blocks).expect("blocks must be non-negative") / max_threads;
13842 let block_remainder =
13843 usize::try_from(blocks).expect("blocks must be non-negative") % max_threads;
13844 let r_usize = usize::try_from(r).expect("r must be non-negative");
13845
13846 for thread in 0..max_threads {
13847 let block_size = block_stride + usize::from(thread < block_remainder);
13848 let block_start = block_stride * thread + thread.min(block_remainder);
13849 unbwt_decode(
13850 &mut u[r_usize * block_start..],
13851 p,
13852 n,
13853 r,
13854 &i[block_start..],
13855 bucket2,
13856 fastbits,
13857 block_size as FastSint,
13858 if thread + 1 < max_threads {
13859 r_usize
13860 } else {
13861 remainder
13862 },
13863 );
13864 }
13865 u[usize::try_from(n).expect("n must be non-negative") - 1] = lastc;
13866}
13867
13868#[doc(hidden)]
13870pub fn unbwt_core(
13871 t: &[u8],
13872 u: &mut [u8],
13873 p: &mut [SaUint],
13874 n: SaSint,
13875 freq: Option<&[SaSint]>,
13876 r: SaSint,
13877 i: &[SaUint],
13878 bucket2: &mut [SaUint],
13879 fastbits: &mut [u16],
13880 buckets: Option<&mut [SaUint]>,
13881 threads: SaSint,
13882) -> SaSint {
13883 if threads > 1 && n >= 262_144 {
13884 unbwt_init_parallel(t, p, n, freq, i, bucket2, fastbits, buckets, threads);
13885 } else {
13886 unbwt_init_single(t, p, n, freq, i, bucket2, fastbits);
13887 }
13888
13889 unbwt_decode_omp(t, u, p, n, r, i, bucket2, fastbits, threads);
13890 0
13891}
13892
13893#[doc(hidden)]
13895pub fn unbwt_main(
13896 t: &[u8],
13897 u: &mut [u8],
13898 p: &mut [SaUint],
13899 n: SaSint,
13900 freq: Option<&[SaSint]>,
13901 r: SaSint,
13902 i: &[SaUint],
13903 threads: SaSint,
13904) -> SaSint {
13905 let mut shift = 0usize;
13906 while (usize::try_from(n).expect("n must be non-negative") >> shift)
13907 > (1usize << UNBWT_FASTBITS)
13908 {
13909 shift += 1;
13910 }
13911
13912 let mut bucket2 = vec![0u32; ALPHABET_SIZE * ALPHABET_SIZE];
13913 let mut fastbits =
13914 vec![0u16; 1 + (usize::try_from(n).expect("n must be non-negative") >> shift)];
13915 let mut buckets = if threads > 1 && n >= 262_144 {
13916 Some(vec![
13917 0u32;
13918 usize::try_from(threads)
13919 .expect("threads must be non-negative")
13920 * (ALPHABET_SIZE + ALPHABET_SIZE * ALPHABET_SIZE)
13921 ])
13922 } else {
13923 None
13924 };
13925
13926 unbwt_core(
13927 t,
13928 u,
13929 p,
13930 n,
13931 freq,
13932 r,
13933 i,
13934 &mut bucket2,
13935 &mut fastbits,
13936 buckets.as_deref_mut(),
13937 threads,
13938 )
13939}
13940
13941#[doc(hidden)]
13943pub fn unbwt_main_ctx(
13944 ctx: &mut UnbwtContext,
13945 t: &[u8],
13946 u: &mut [u8],
13947 p: &mut [SaUint],
13948 n: SaSint,
13949 freq: Option<&[SaSint]>,
13950 r: SaSint,
13951 i: &[SaUint],
13952) -> SaSint {
13953 if ctx.threads <= 0 {
13954 return -2;
13955 }
13956 let mut shift = 0usize;
13957 while (usize::try_from(n).expect("n must be non-negative") >> shift)
13958 > (1usize << UNBWT_FASTBITS)
13959 {
13960 shift += 1;
13961 }
13962 let required_fastbits = 1 + (usize::try_from(n).expect("n must be non-negative") >> shift);
13963 if ctx.bucket2.len() < ALPHABET_SIZE * ALPHABET_SIZE
13964 || ctx.fastbits.len() < required_fastbits
13965 || (ctx.threads > 1 && ctx.buckets.is_none())
13966 {
13967 return -2;
13968 }
13969
13970 unbwt_core(
13971 t,
13972 u,
13973 p,
13974 n,
13975 freq,
13976 r,
13977 i,
13978 &mut ctx.bucket2,
13979 &mut ctx.fastbits,
13980 ctx.buckets.as_deref_mut(),
13981 ctx.threads as SaSint,
13982 )
13983}
13984
13985pub fn libsais_unbwt(
13995 t: &[u8],
13996 u: &mut [u8],
13997 a: &mut [SaSint],
13998 freq: Option<&[SaSint]>,
13999 i: SaSint,
14000) -> SaSint {
14001 libsais_unbwt_aux(
14002 t,
14003 u,
14004 a,
14005 freq,
14006 SaSint::try_from(t.len()).expect("input length must fit SaSint"),
14007 &[i],
14008 )
14009}
14010
14011pub fn libsais_unbwt_ctx(
14022 ctx: &mut UnbwtContext,
14023 t: &[u8],
14024 u: &mut [u8],
14025 a: &mut [SaSint],
14026 freq: Option<&[SaSint]>,
14027 i: SaSint,
14028) -> SaSint {
14029 libsais_unbwt_aux_ctx(
14030 ctx,
14031 t,
14032 u,
14033 a,
14034 freq,
14035 SaSint::try_from(t.len()).expect("input length must fit SaSint"),
14036 &[i],
14037 )
14038}
14039
14040pub fn libsais_unbwt_aux(
14051 t: &[u8],
14052 u: &mut [u8],
14053 a: &mut [SaSint],
14054 freq: Option<&[SaSint]>,
14055 r: SaSint,
14056 i: &[SaSint],
14057) -> SaSint {
14058 let t_len = t.len();
14059 let n = SaSint::try_from(t_len).expect("input length must fit SaSint");
14060 if u.len() < t_len
14061 || a.len() < t_len
14062 || freq.is_some_and(|freq| freq.len() < ALPHABET_SIZE)
14063 || (r != n && (r < 2 || (r & (r - 1)) != 0))
14064 {
14065 return -1;
14066 }
14067 let sample_count = if n == 0 {
14068 1
14069 } else {
14070 ((n - 1) / r + 1) as usize
14071 };
14072 if i.len() < sample_count {
14073 return -1;
14074 }
14075
14076 if n <= 1 {
14077 if i[0] != n {
14078 return -1;
14079 }
14080 if n == 1 {
14081 u[0] = t[0];
14082 }
14083 return 0;
14084 }
14085
14086 for t in 0..sample_count {
14087 let sample = i[t];
14088 if sample <= 0 || sample > n {
14089 return -1;
14090 }
14091 }
14092
14093 let i_u32: Vec<SaUint> = i
14094 .iter()
14095 .take(sample_count)
14096 .map(|&sample| SaUint::try_from(sample).expect("sample was validated positive"))
14097 .collect();
14098 let mut p = vec![0u32; t_len + 1];
14099 let result = unbwt_main(t, u, &mut p, n, freq, r, &i_u32, 1);
14100 for t in 0..t_len {
14101 a[t] = p[t] as SaSint;
14102 }
14103 result
14104}
14105
14106pub fn libsais_unbwt_aux_ctx(
14118 ctx: &mut UnbwtContext,
14119 t: &[u8],
14120 u: &mut [u8],
14121 a: &mut [SaSint],
14122 freq: Option<&[SaSint]>,
14123 r: SaSint,
14124 i: &[SaSint],
14125) -> SaSint {
14126 let t_len = t.len();
14127 let n = SaSint::try_from(t_len).expect("input length must fit SaSint");
14128 if u.len() < t_len
14129 || a.len() < t_len
14130 || freq.is_some_and(|freq| freq.len() < ALPHABET_SIZE)
14131 || (r != n && (r < 2 || (r & (r - 1)) != 0))
14132 {
14133 return -1;
14134 }
14135 let sample_count = if n == 0 {
14136 1
14137 } else {
14138 ((n - 1) / r + 1) as usize
14139 };
14140 if i.len() < sample_count {
14141 return -1;
14142 }
14143
14144 if n <= 1 {
14145 if i[0] != n {
14146 return -1;
14147 }
14148 if n == 1 {
14149 u[0] = t[0];
14150 }
14151 return 0;
14152 }
14153
14154 for t in 0..sample_count {
14155 let sample = i[t];
14156 if sample <= 0 || sample > n {
14157 return -1;
14158 }
14159 }
14160
14161 let i_u32: Vec<SaUint> = i
14162 .iter()
14163 .take(sample_count)
14164 .map(|&sample| SaUint::try_from(sample).expect("sample was validated positive"))
14165 .collect();
14166 let mut p = vec![0u32; t_len + 1];
14167 let result = unbwt_main_ctx(ctx, t, u, &mut p, n, freq, r, &i_u32);
14168 for t in 0..t_len {
14169 a[t] = p[t] as SaSint;
14170 }
14171 result
14172}
14173
14174pub fn unbwt_create_ctx_omp(threads: SaSint) -> Option<UnbwtContext> {
14182 if threads < 0 {
14183 return None;
14184 }
14185 unbwt_create_ctx_main(normalize_omp_threads(threads))
14186}
14187
14188pub fn libsais_unbwt_omp(
14199 t: &[u8],
14200 u: &mut [u8],
14201 a: &mut [SaSint],
14202 freq: Option<&[SaSint]>,
14203 i: SaSint,
14204 threads: SaSint,
14205) -> SaSint {
14206 libsais_unbwt_aux_omp(
14207 t,
14208 u,
14209 a,
14210 freq,
14211 SaSint::try_from(t.len()).expect("input length must fit SaSint"),
14212 &[i],
14213 threads,
14214 )
14215}
14216
14217pub fn libsais_unbwt_aux_omp(
14229 t: &[u8],
14230 u: &mut [u8],
14231 a: &mut [SaSint],
14232 freq: Option<&[SaSint]>,
14233 r: SaSint,
14234 i: &[SaSint],
14235 threads: SaSint,
14236) -> SaSint {
14237 let t_len = t.len();
14238 let n = SaSint::try_from(t_len).expect("input length must fit SaSint");
14239 if threads < 0
14240 || u.len() < t_len
14241 || a.len() < t_len
14242 || freq.is_some_and(|freq| freq.len() < ALPHABET_SIZE)
14243 || (r != n && (r < 2 || (r & (r - 1)) != 0))
14244 {
14245 return -1;
14246 }
14247 let sample_count = if n == 0 {
14248 1
14249 } else {
14250 ((n - 1) / r + 1) as usize
14251 };
14252 if i.len() < sample_count {
14253 return -1;
14254 }
14255
14256 if n <= 1 {
14257 if i[0] != n {
14258 return -1;
14259 }
14260 if n == 1 {
14261 u[0] = t[0];
14262 }
14263 return 0;
14264 }
14265
14266 for sample in i.iter().take(sample_count) {
14267 let sample = *sample;
14268 if sample <= 0 || sample > n {
14269 return -1;
14270 }
14271 }
14272
14273 let threads = if threads > 0 { threads } else { 1 };
14274 let i_u32: Vec<SaUint> = i
14275 .iter()
14276 .take(sample_count)
14277 .map(|&sample| SaUint::try_from(sample).expect("sample was validated positive"))
14278 .collect();
14279 let mut p = vec![0u32; t_len + 1];
14280 let result = unbwt_main(t, u, &mut p, n, freq, r, &i_u32, threads);
14281 for idx in 0..t_len {
14282 a[idx] = p[idx] as SaSint;
14283 }
14284 result
14285}
14286
14287#[doc(hidden)]
14289pub fn bwt_copy_8u(u: &mut [u8], a: &[SaSint], n: SaSint) {
14290 if n <= 0 {
14291 return;
14292 }
14293
14294 let n_usize = usize::try_from(n).expect("n must be non-negative");
14295 for i in 0..n_usize {
14296 u[i] = a[i] as u8;
14297 }
14298}
14299
14300#[doc(hidden)]
14302pub fn bwt_copy_8u_omp(u: &mut [u8], a: &[SaSint], n: SaSint, threads: SaSint) {
14303 if threads == 1 || n < 65_536 {
14304 bwt_copy_8u(u, a, n);
14305 return;
14306 }
14307
14308 let n_usize = usize::try_from(n).expect("n must be non-negative");
14309 assert!(u.len() >= n_usize);
14310 assert!(a.len() >= n_usize);
14311 let threads_usize = usize::try_from(threads).expect("threads must be non-negative");
14312 let chunk_size = ((n_usize / threads_usize) & !15usize).max(16);
14313 let a_ptr = a.as_ptr() as usize;
14314 run_rayon_with_threads(threads_usize, || {
14315 u[..n_usize]
14316 .par_chunks_mut(chunk_size)
14317 .enumerate()
14318 .for_each(|(chunk_index, chunk)| {
14319 let start = chunk_index * chunk_size;
14320 let dst_ptr = chunk.as_mut_ptr();
14321 let src_ptr = unsafe { (a_ptr as *const SaSint).add(start) };
14322 for offset in 0..chunk.len() {
14323 unsafe {
14324 *dst_ptr.add(offset) = *src_ptr.add(offset) as u8;
14325 }
14326 }
14327 });
14328 });
14329}
14330
14331#[doc(hidden)]
14333pub fn accumulate_counts_s32_2(bucket00: &mut [SaSint], bucket01: &[SaSint]) {
14334 assert_eq!(bucket00.len(), bucket01.len());
14335 for (dst, src) in bucket00.iter_mut().zip(bucket01.iter()) {
14336 *dst += *src;
14337 }
14338}
14339
14340#[doc(hidden)]
14342pub fn accumulate_counts_s32_3(bucket00: &mut [SaSint], bucket01: &[SaSint], bucket02: &[SaSint]) {
14343 assert_eq!(bucket00.len(), bucket01.len());
14344 assert_eq!(bucket00.len(), bucket02.len());
14345 for ((dst, src1), src2) in bucket00
14346 .iter_mut()
14347 .zip(bucket01.iter())
14348 .zip(bucket02.iter())
14349 {
14350 *dst += *src1 + *src2;
14351 }
14352}
14353
14354#[doc(hidden)]
14356pub fn accumulate_counts_s32_4(
14357 bucket00: &mut [SaSint],
14358 bucket01: &[SaSint],
14359 bucket02: &[SaSint],
14360 bucket03: &[SaSint],
14361) {
14362 assert_eq!(bucket00.len(), bucket01.len());
14363 assert_eq!(bucket00.len(), bucket02.len());
14364 assert_eq!(bucket00.len(), bucket03.len());
14365 for (((dst, src1), src2), src3) in bucket00
14366 .iter_mut()
14367 .zip(bucket01.iter())
14368 .zip(bucket02.iter())
14369 .zip(bucket03.iter())
14370 {
14371 *dst += *src1 + *src2 + *src3;
14372 }
14373}
14374
14375#[doc(hidden)]
14377pub fn accumulate_counts_s32_5(
14378 bucket00: &mut [SaSint],
14379 bucket01: &[SaSint],
14380 bucket02: &[SaSint],
14381 bucket03: &[SaSint],
14382 bucket04: &[SaSint],
14383) {
14384 assert_eq!(bucket00.len(), bucket01.len());
14385 assert_eq!(bucket00.len(), bucket02.len());
14386 assert_eq!(bucket00.len(), bucket03.len());
14387 assert_eq!(bucket00.len(), bucket04.len());
14388 for ((((dst, src1), src2), src3), src4) in bucket00
14389 .iter_mut()
14390 .zip(bucket01.iter())
14391 .zip(bucket02.iter())
14392 .zip(bucket03.iter())
14393 .zip(bucket04.iter())
14394 {
14395 *dst += *src1 + *src2 + *src3 + *src4;
14396 }
14397}
14398
14399#[doc(hidden)]
14401pub fn accumulate_counts_s32_6(
14402 bucket00: &mut [SaSint],
14403 bucket01: &[SaSint],
14404 bucket02: &[SaSint],
14405 bucket03: &[SaSint],
14406 bucket04: &[SaSint],
14407 bucket05: &[SaSint],
14408) {
14409 assert_eq!(bucket00.len(), bucket01.len());
14410 assert_eq!(bucket00.len(), bucket02.len());
14411 assert_eq!(bucket00.len(), bucket03.len());
14412 assert_eq!(bucket00.len(), bucket04.len());
14413 assert_eq!(bucket00.len(), bucket05.len());
14414 for (((((dst, src1), src2), src3), src4), src5) in bucket00
14415 .iter_mut()
14416 .zip(bucket01.iter())
14417 .zip(bucket02.iter())
14418 .zip(bucket03.iter())
14419 .zip(bucket04.iter())
14420 .zip(bucket05.iter())
14421 {
14422 *dst += *src1 + *src2 + *src3 + *src4 + *src5;
14423 }
14424}
14425
14426#[doc(hidden)]
14428pub fn accumulate_counts_s32_7(
14429 bucket00: &mut [SaSint],
14430 bucket01: &[SaSint],
14431 bucket02: &[SaSint],
14432 bucket03: &[SaSint],
14433 bucket04: &[SaSint],
14434 bucket05: &[SaSint],
14435 bucket06: &[SaSint],
14436) {
14437 assert_eq!(bucket00.len(), bucket01.len());
14438 assert_eq!(bucket00.len(), bucket02.len());
14439 assert_eq!(bucket00.len(), bucket03.len());
14440 assert_eq!(bucket00.len(), bucket04.len());
14441 assert_eq!(bucket00.len(), bucket05.len());
14442 assert_eq!(bucket00.len(), bucket06.len());
14443 for ((((((dst, src1), src2), src3), src4), src5), src6) in bucket00
14444 .iter_mut()
14445 .zip(bucket01.iter())
14446 .zip(bucket02.iter())
14447 .zip(bucket03.iter())
14448 .zip(bucket04.iter())
14449 .zip(bucket05.iter())
14450 .zip(bucket06.iter())
14451 {
14452 *dst += *src1 + *src2 + *src3 + *src4 + *src5 + *src6;
14453 }
14454}
14455
14456#[doc(hidden)]
14458pub fn accumulate_counts_s32_8(
14459 bucket00: &mut [SaSint],
14460 bucket01: &[SaSint],
14461 bucket02: &[SaSint],
14462 bucket03: &[SaSint],
14463 bucket04: &[SaSint],
14464 bucket05: &[SaSint],
14465 bucket06: &[SaSint],
14466 bucket07: &[SaSint],
14467) {
14468 assert_eq!(bucket00.len(), bucket01.len());
14469 assert_eq!(bucket00.len(), bucket02.len());
14470 assert_eq!(bucket00.len(), bucket03.len());
14471 assert_eq!(bucket00.len(), bucket04.len());
14472 assert_eq!(bucket00.len(), bucket05.len());
14473 assert_eq!(bucket00.len(), bucket06.len());
14474 assert_eq!(bucket00.len(), bucket07.len());
14475 for (((((((dst, src1), src2), src3), src4), src5), src6), src7) in bucket00
14476 .iter_mut()
14477 .zip(bucket01.iter())
14478 .zip(bucket02.iter())
14479 .zip(bucket03.iter())
14480 .zip(bucket04.iter())
14481 .zip(bucket05.iter())
14482 .zip(bucket06.iter())
14483 .zip(bucket07.iter())
14484 {
14485 *dst += *src1 + *src2 + *src3 + *src4 + *src5 + *src6 + *src7;
14486 }
14487}
14488
14489#[doc(hidden)]
14491pub fn accumulate_counts_s32_9(
14492 bucket00: &mut [SaSint],
14493 bucket01: &[SaSint],
14494 bucket02: &[SaSint],
14495 bucket03: &[SaSint],
14496 bucket04: &[SaSint],
14497 bucket05: &[SaSint],
14498 bucket06: &[SaSint],
14499 bucket07: &[SaSint],
14500 bucket08: &[SaSint],
14501) {
14502 assert_eq!(bucket00.len(), bucket01.len());
14503 assert_eq!(bucket00.len(), bucket02.len());
14504 assert_eq!(bucket00.len(), bucket03.len());
14505 assert_eq!(bucket00.len(), bucket04.len());
14506 assert_eq!(bucket00.len(), bucket05.len());
14507 assert_eq!(bucket00.len(), bucket06.len());
14508 assert_eq!(bucket00.len(), bucket07.len());
14509 assert_eq!(bucket00.len(), bucket08.len());
14510 for ((((((((dst, src1), src2), src3), src4), src5), src6), src7), src8) in bucket00
14511 .iter_mut()
14512 .zip(bucket01.iter())
14513 .zip(bucket02.iter())
14514 .zip(bucket03.iter())
14515 .zip(bucket04.iter())
14516 .zip(bucket05.iter())
14517 .zip(bucket06.iter())
14518 .zip(bucket07.iter())
14519 .zip(bucket08.iter())
14520 {
14521 *dst += *src1 + *src2 + *src3 + *src4 + *src5 + *src6 + *src7 + *src8;
14522 }
14523}
14524
14525#[doc(hidden)]
14527pub fn accumulate_counts_s32(
14528 buckets: &mut [SaSint],
14529 bucket_size: FastSint,
14530 bucket_stride: FastSint,
14531 mut num_buckets: FastSint,
14532) {
14533 if num_buckets <= 1 {
14534 return;
14535 }
14536
14537 let bucket_size = usize::try_from(bucket_size).expect("bucket_size must be non-negative");
14538 let bucket_stride = usize::try_from(bucket_stride).expect("bucket_stride must be non-negative");
14539 let num_buckets_usize = usize::try_from(num_buckets).expect("num_buckets must be non-negative");
14540 assert!(buckets.len() >= bucket_size + (num_buckets_usize - 1) * bucket_stride);
14541 let bucket00_start = (num_buckets_usize - 1) * bucket_stride;
14542
14543 while num_buckets >= 9 {
14544 let start = bucket00_start
14545 - usize::try_from(num_buckets - 9).expect("non-negative") * bucket_stride;
14546 accumulate_counts_at(buckets, start, bucket_size, bucket_stride, 9);
14547 num_buckets -= 8;
14548 }
14549
14550 match num_buckets {
14551 1 => {}
14552 2..=8 => accumulate_counts_at(
14553 buckets,
14554 bucket00_start,
14555 bucket_size,
14556 bucket_stride,
14557 usize::try_from(num_buckets).expect("non-negative"),
14558 ),
14559 _ => {}
14560 }
14561}
14562
14563fn block_slice<T>(slice: &[T], block_start: FastSint, block_size: FastSint) -> &[T] {
14564 let start = usize::try_from(block_start).expect("block_start must be non-negative");
14565 let len = usize::try_from(block_size).expect("block_size must be non-negative");
14566 &slice[start..start + len]
14567}
14568
14569#[allow(dead_code)]
14570struct SharedMutArray<'a> {
14571 ptr: *mut SaSint,
14572 len: usize,
14573 _marker: PhantomData<&'a mut [SaSint]>,
14574}
14575
14576#[allow(dead_code)]
14577impl<'a> SharedMutArray<'a> {
14578 fn new(slice: &'a mut [SaSint]) -> Self {
14579 Self {
14580 ptr: slice.as_mut_ptr(),
14581 len: slice.len(),
14582 _marker: PhantomData,
14583 }
14584 }
14585
14586 fn len(&self) -> usize {
14587 self.len
14588 }
14589
14590 fn slice_mut(&mut self, start: usize, len: usize) -> &mut [SaSint] {
14591 assert!(start <= self.len);
14592 assert!(len <= self.len - start);
14593 unsafe {
14594 std::slice::from_raw_parts_mut(self.ptr.add(start), len)
14598 }
14599 }
14600}
14601
14602fn accumulate_counts_at(
14603 buckets: &mut [SaSint],
14604 bucket00_start: usize,
14605 bucket_size: usize,
14606 bucket_stride: usize,
14607 count: usize,
14608) {
14609 assert!((2..=9).contains(&count));
14610 assert!(bucket00_start >= (count - 1) * bucket_stride);
14611
14612 let dst_end = bucket00_start + bucket_size;
14613 let mut sums = vec![0; bucket_size];
14614
14615 for i in 0..count {
14616 let start = bucket00_start - i * bucket_stride;
14617 let end = start + bucket_size;
14618 for (sum, value) in sums.iter_mut().zip(buckets[start..end].iter()) {
14619 *sum += *value;
14620 }
14621 }
14622
14623 buckets[bucket00_start..dst_end].copy_from_slice(&sums);
14624}
14625
14626#[doc(hidden)]
14628pub fn thread_state_size() -> usize {
14629 mem::size_of::<ThreadState>()
14630}
14631
14632#[cfg(all(test, feature = "upstream-c"))]
14633mod tests {
14634 use super::*;
14635
14636 unsafe extern "C" {
14637 fn probe_renumber_lms_suffixes_8u(
14638 sa: *mut SaSint,
14639 m: SaSint,
14640 name: SaSint,
14641 omp_block_start: FastSint,
14642 omp_block_size: FastSint,
14643 ) -> SaSint;
14644
14645 fn probe_gather_marked_lms_suffixes(
14646 sa: *mut SaSint,
14647 m: SaSint,
14648 l: FastSint,
14649 omp_block_start: FastSint,
14650 omp_block_size: FastSint,
14651 ) -> FastSint;
14652
14653 fn probe_renumber_distinct_lms_suffixes_32s_4k(
14654 sa: *mut SaSint,
14655 m: SaSint,
14656 name: SaSint,
14657 omp_block_start: FastSint,
14658 omp_block_size: FastSint,
14659 ) -> SaSint;
14660
14661 fn probe_renumber_unique_and_nonunique_lms_suffixes_32s(
14662 t: *mut SaSint,
14663 sa: *mut SaSint,
14664 m: SaSint,
14665 f: SaSint,
14666 omp_block_start: FastSint,
14667 omp_block_size: FastSint,
14668 ) -> SaSint;
14669
14670 fn probe_renumber_unique_and_nonunique_lms_suffixes_32s_omp(
14671 t: *mut SaSint,
14672 sa: *mut SaSint,
14673 m: SaSint,
14674 threads: SaSint,
14675 ) -> SaSint;
14676
14677 fn probe_renumber_and_gather_lms_suffixes_omp(
14678 sa: *mut SaSint,
14679 n: SaSint,
14680 m: SaSint,
14681 fs: SaSint,
14682 threads: SaSint,
14683 ) -> SaSint;
14684
14685 fn probe_renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(
14686 sa: *mut SaSint,
14687 n: SaSint,
14688 m: SaSint,
14689 threads: SaSint,
14690 ) -> SaSint;
14691
14692 fn probe_main_32s_entry(
14693 t: *mut SaSint,
14694 sa: *mut SaSint,
14695 n: SaSint,
14696 k: SaSint,
14697 fs: SaSint,
14698 threads: SaSint,
14699 ) -> SaSint;
14700
14701 fn probe_public_libsais_freq(
14702 t: *const u8,
14703 sa: *mut SaSint,
14704 n: SaSint,
14705 fs: SaSint,
14706 freq: *mut SaSint,
14707 ) -> SaSint;
14708
14709 fn probe_public_libsais_gsa_freq(
14710 t: *const u8,
14711 sa: *mut SaSint,
14712 n: SaSint,
14713 fs: SaSint,
14714 freq: *mut SaSint,
14715 ) -> SaSint;
14716
14717 fn probe_public_libsais_bwt_freq(
14718 t: *const u8,
14719 u: *mut u8,
14720 a: *mut SaSint,
14721 n: SaSint,
14722 fs: SaSint,
14723 freq: *mut SaSint,
14724 ) -> SaSint;
14725
14726 fn probe_public_libsais_bwt_aux_freq(
14727 t: *const u8,
14728 u: *mut u8,
14729 a: *mut SaSint,
14730 n: SaSint,
14731 fs: SaSint,
14732 freq: *mut SaSint,
14733 r: SaSint,
14734 i: *mut SaSint,
14735 ) -> SaSint;
14736
14737 fn probe_public_libsais_unbwt_freq(
14738 t: *const u8,
14739 u: *mut u8,
14740 a: *mut SaSint,
14741 n: SaSint,
14742 freq: *const SaSint,
14743 i: SaSint,
14744 ) -> SaSint;
14745
14746 fn probe_public_libsais_unbwt_aux_freq(
14747 t: *const u8,
14748 u: *mut u8,
14749 a: *mut SaSint,
14750 n: SaSint,
14751 freq: *const SaSint,
14752 r: SaSint,
14753 i: *const SaSint,
14754 ) -> SaSint;
14755 }
14756
14757 fn make_recursive_main_32s_text(repeats: usize) -> Vec<SaSint> {
14758 let motif = [9, 4, 9, 2, 9, 4, 9, 1];
14759 let mut t = Vec::with_capacity(repeats * motif.len() + 1);
14760 for _ in 0..repeats {
14761 t.extend_from_slice(&motif);
14762 }
14763 t.push(0);
14764 t
14765 }
14766
14767 fn make_large_main_32s_stress_text(len: usize, alphabet: SaSint) -> Vec<SaSint> {
14768 let mut state: u32 = 0x1357_9bdf;
14769 let mut t = Vec::with_capacity(len + 1);
14770
14771 for i in 0..len {
14772 state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223);
14773 let mut value = ((state >> 16) % (alphabet as u32 - 1)) as SaSint + 1;
14774
14775 if i % 17 < 8 {
14776 value = ((i / 17) as SaSint % 11) + 1;
14777 }
14778 if i % 29 < 10 {
14779 value = (((i / 29) as SaSint * 3) % 19) + 1;
14780 }
14781 if i % 64 >= 48 {
14782 value = t[i - 48];
14783 }
14784
14785 t.push(value);
14786 }
14787
14788 t.push(0);
14789 t
14790 }
14791
14792 fn assert_main_32s_entry_matches_upstream_c(
14793 t: Vec<SaSint>,
14794 k: SaSint,
14795 fs: SaSint,
14796 compare_full_sa: bool,
14797 ) {
14798 let mut t = t;
14799 let n = t.len() as SaSint;
14800 let n_usize = t.len();
14801 let threads = 1;
14802 let extra = usize::try_from(fs).expect("fs must be non-negative");
14803 let mut sa = vec![0; t.len() + extra];
14804
14805 let initial_t = t.clone();
14806 let initial_sa = sa.clone();
14807
14808 let c_result =
14809 unsafe { probe_main_32s_entry(t.as_mut_ptr(), sa.as_mut_ptr(), n, k, fs, threads) };
14810 let c_t = t.clone();
14811 let c_sa = sa.clone();
14812
14813 t.copy_from_slice(&initial_t);
14814 sa.copy_from_slice(&initial_sa);
14815
14816 let mut thread_state = alloc_thread_state(threads).expect("thread state");
14817 let rust_result =
14818 libsais_main_32s_entry(&mut t, &mut sa, n, k, fs, threads, &mut thread_state);
14819
14820 assert_eq!(rust_result, c_result);
14821 assert_slice_eq_with_first_diff("T", &t, &c_t);
14822 if compare_full_sa {
14823 assert_slice_eq_with_first_diff("SA", &sa, &c_sa);
14824 } else {
14825 assert_slice_eq_with_first_diff("SA", &sa[..n_usize], &c_sa[..n_usize]);
14826 }
14827 }
14828
14829 fn assert_main_32s_entry_matches_upstream_c_for_branch(k: SaSint) {
14830 assert_main_32s_entry_matches_upstream_c(
14831 vec![17, 3, 17, 9, 5, 9, 2, 11, 2, 7, 1, 7, 0],
14832 k,
14833 0,
14834 true,
14835 );
14836 }
14837
14838 fn assert_slice_eq_with_first_diff(label: &str, left: &[SaSint], right: &[SaSint]) {
14839 assert_eq!(left.len(), right.len(), "{label} length mismatch");
14840 if let Some((idx, (l, r))) = left
14841 .iter()
14842 .zip(right.iter())
14843 .enumerate()
14844 .find(|(_, (l, r))| l != r)
14845 {
14846 panic!("{label} first diff at index {idx}: rust={l}, c={r}");
14847 }
14848 }
14849
14850 #[test]
14851 fn align_up_matches_power_of_two_alignment() {
14852 assert_eq!(align_up(0, 4096), 0);
14853 assert_eq!(align_up(1, 4096), 4096);
14854 assert_eq!(align_up(4095, 4096), 4096);
14855 assert_eq!(align_up(4096, 4096), 4096);
14856 assert_eq!(align_up(4097, 4096), 8192);
14857 assert_eq!(align_up(65, 64), 128);
14858 }
14859
14860 #[test]
14861 fn shared_mut_array_projects_mutable_spans_from_one_backing_buffer() {
14862 let mut backing = vec![1, 2, 3, 4, 5, 6];
14863 let len;
14864 {
14865 let mut shared = SharedMutArray::new(&mut backing);
14866 shared.slice_mut(1, 3).copy_from_slice(&[20, 30, 40]);
14867 shared.slice_mut(4, 2).copy_from_slice(&[50, 60]);
14868 len = shared.len();
14869 }
14870 assert_eq!(backing, vec![1, 20, 30, 40, 50, 60]);
14871 assert_eq!(len, 6);
14872 }
14873
14874 #[test]
14875 fn create_ctx_main_matches_single_thread_layout() {
14876 let ctx = create_ctx_main(1).expect("context");
14877 assert_eq!(ctx.buckets.len(), 8 * ALPHABET_SIZE);
14878 assert_eq!(ctx.threads, 1);
14879 assert!(ctx.thread_state.is_none());
14880 }
14881
14882 #[test]
14883 fn create_ctx_main_allocates_thread_state_for_multi_threaded_mode() {
14884 let ctx = create_ctx_main(3).expect("context");
14885 let states = ctx.thread_state.expect("thread state");
14886 assert_eq!(states.len(), 3);
14887 assert!(states
14888 .iter()
14889 .all(|state| state.buckets.len() == 4 * ALPHABET_SIZE));
14890 assert!(states
14891 .iter()
14892 .all(|state| state.cache.len() == LIBSAIS_PER_THREAD_CACHE_SIZE));
14893 }
14894
14895 #[test]
14896 fn create_ctx_wraps_single_thread_main_context() {
14897 let ctx = create_ctx().expect("context");
14898 assert_eq!(ctx.threads, 1);
14899 assert_eq!(ctx.buckets.len(), 8 * ALPHABET_SIZE);
14900 assert!(ctx.thread_state.is_none());
14901 }
14902
14903 #[test]
14904 fn free_ctx_accepts_context_value() {
14905 let ctx = create_ctx().expect("context");
14906 free_ctx(ctx);
14907 }
14908
14909 fn brute_force_suffix_array_u8(t: &[u8]) -> Vec<SaSint> {
14910 let mut sa: Vec<SaSint> = (0..t.len())
14911 .map(|index| SaSint::try_from(index).expect("index must fit SaSint"))
14912 .collect();
14913 sa.sort_by(|&lhs, &rhs| {
14914 t[usize::try_from(lhs).expect("non-negative")..]
14915 .cmp(&t[usize::try_from(rhs).expect("non-negative")..])
14916 });
14917 sa
14918 }
14919
14920 fn brute_force_plcp_u8(t: &[u8], sa: &[SaSint]) -> Vec<SaSint> {
14921 let mut rank = vec![0usize; t.len()];
14922 for (i, &suffix) in sa.iter().enumerate() {
14923 rank[usize::try_from(suffix).expect("suffix index must be non-negative")] = i;
14924 }
14925
14926 let mut plcp = vec![0; t.len()];
14927 for i in 0..t.len() {
14928 let r = rank[i];
14929 let prev = if r == 0 {
14930 t.len()
14931 } else {
14932 usize::try_from(sa[r - 1]).expect("suffix index must be non-negative")
14933 };
14934 if prev == t.len() {
14935 plcp[i] = 0;
14936 continue;
14937 }
14938
14939 let mut l = 0usize;
14940 while i + l < t.len() && prev + l < t.len() && t[i + l] == t[prev + l] {
14941 l += 1;
14942 }
14943 plcp[i] = l as SaSint;
14944 }
14945 plcp
14946 }
14947
14948 fn brute_force_lcp_from_sa_u8(t: &[u8], sa: &[SaSint]) -> Vec<SaSint> {
14949 let mut lcp = vec![0; sa.len()];
14950 for i in 0..sa.len() {
14951 let lhs = usize::try_from(sa[i]).expect("suffix index must be non-negative");
14952 let rhs = if i == 0 {
14953 sa.len()
14954 } else {
14955 usize::try_from(sa[i - 1]).expect("suffix index must be non-negative")
14956 };
14957 if rhs == sa.len() {
14958 lcp[i] = 0;
14959 continue;
14960 }
14961
14962 let mut l = 0usize;
14963 while lhs + l < t.len() && rhs + l < t.len() && t[lhs + l] == t[rhs + l] {
14964 l += 1;
14965 }
14966 lcp[i] = l as SaSint;
14967 }
14968 lcp
14969 }
14970
14971 #[test]
14972 fn libsais_matches_bruteforce_suffix_array_for_small_text() {
14973 let t = b"banana";
14974 let mut sa = vec![0; t.len()];
14975 let mut freq = vec![0; ALPHABET_SIZE];
14976
14977 let result = libsais(t, &mut sa, 0, Some(&mut freq));
14978
14979 assert_eq!(result, 0);
14980 assert_eq!(sa, brute_force_suffix_array_u8(t));
14981 assert_eq!(freq[b'a' as usize], 3);
14982 assert_eq!(freq[b'b' as usize], 1);
14983 assert_eq!(freq[b'n' as usize], 2);
14984 }
14985
14986 #[test]
14987 fn public_libsais_frequency_outputs_match_upstream_c() {
14988 let text = b"banana";
14989 let gsa_text = b"ban\0ana\0";
14990 let mut rust_sa = vec![0; text.len()];
14991 let mut c_sa = vec![0; text.len()];
14992 let mut rust_freq = vec![-1; ALPHABET_SIZE];
14993 let mut c_freq = vec![-1; ALPHABET_SIZE];
14994
14995 let rust_rc = libsais(text, &mut rust_sa, 0, Some(&mut rust_freq));
14996 let c_rc = unsafe {
14997 probe_public_libsais_freq(
14998 text.as_ptr(),
14999 c_sa.as_mut_ptr(),
15000 text.len() as SaSint,
15001 0,
15002 c_freq.as_mut_ptr(),
15003 )
15004 };
15005 assert_eq!(rust_rc, c_rc);
15006 assert_eq!(rust_sa, c_sa);
15007 assert_eq!(rust_freq, c_freq);
15008
15009 let mut rust_gsa = vec![0; gsa_text.len()];
15010 let mut c_gsa = vec![0; gsa_text.len()];
15011 rust_freq.fill(-1);
15012 c_freq.fill(-1);
15013 let rust_rc = libsais_gsa(gsa_text, &mut rust_gsa, 0, Some(&mut rust_freq));
15014 let c_rc = unsafe {
15015 probe_public_libsais_gsa_freq(
15016 gsa_text.as_ptr(),
15017 c_gsa.as_mut_ptr(),
15018 gsa_text.len() as SaSint,
15019 0,
15020 c_freq.as_mut_ptr(),
15021 )
15022 };
15023 assert_eq!(rust_rc, c_rc);
15024 assert_eq!(rust_gsa, c_gsa);
15025 assert_eq!(rust_freq, c_freq);
15026
15027 let mut rust_u = vec![0; text.len()];
15028 let mut rust_a = vec![0; text.len()];
15029 let mut c_u = vec![0; text.len()];
15030 let mut c_a = vec![0; text.len()];
15031 rust_freq.fill(-1);
15032 c_freq.fill(-1);
15033 let rust_rc = libsais_bwt(text, &mut rust_u, &mut rust_a, 0, Some(&mut rust_freq));
15034 let c_rc = unsafe {
15035 probe_public_libsais_bwt_freq(
15036 text.as_ptr(),
15037 c_u.as_mut_ptr(),
15038 c_a.as_mut_ptr(),
15039 text.len() as SaSint,
15040 0,
15041 c_freq.as_mut_ptr(),
15042 )
15043 };
15044 assert_eq!(rust_rc, c_rc);
15045 assert_eq!(rust_u, c_u);
15046 assert_eq!(rust_freq, c_freq);
15047
15048 let r = 4;
15049 let mut rust_i = vec![0; (text.len() - 1) / r as usize + 1];
15050 let mut c_i = vec![0; rust_i.len()];
15051 rust_freq.fill(-1);
15052 c_freq.fill(-1);
15053 let rust_rc = libsais_bwt_aux(
15054 text,
15055 &mut rust_u,
15056 &mut rust_a,
15057 0,
15058 Some(&mut rust_freq),
15059 r,
15060 &mut rust_i,
15061 );
15062 let c_rc = unsafe {
15063 probe_public_libsais_bwt_aux_freq(
15064 text.as_ptr(),
15065 c_u.as_mut_ptr(),
15066 c_a.as_mut_ptr(),
15067 text.len() as SaSint,
15068 0,
15069 c_freq.as_mut_ptr(),
15070 r,
15071 c_i.as_mut_ptr(),
15072 )
15073 };
15074 assert_eq!(rust_rc, c_rc);
15075 assert_eq!(rust_u, c_u);
15076 assert_eq!(rust_i, c_i);
15077 assert_eq!(rust_freq, c_freq);
15078 }
15079
15080 #[test]
15081 fn public_libsais_unbwt_with_frequency_matches_upstream_c() {
15082 let text = b"abracadabra";
15083 let mut freq = vec![0; ALPHABET_SIZE];
15084 let mut bwt = vec![0; text.len()];
15085 let mut work = vec![0; text.len()];
15086 let primary = libsais_bwt(text, &mut bwt, &mut work, 0, Some(&mut freq));
15087 assert!(primary >= 0);
15088
15089 let mut rust_u = vec![0; text.len()];
15090 let mut rust_a = vec![0; text.len() + 1];
15091 let mut c_u = vec![0; text.len()];
15092 let mut c_a = vec![0; text.len() + 1];
15093 let rust_rc = libsais_unbwt(&bwt, &mut rust_u, &mut rust_a, Some(&freq), primary);
15094 let c_rc = unsafe {
15095 probe_public_libsais_unbwt_freq(
15096 bwt.as_ptr(),
15097 c_u.as_mut_ptr(),
15098 c_a.as_mut_ptr(),
15099 bwt.len() as SaSint,
15100 freq.as_ptr(),
15101 primary,
15102 )
15103 };
15104 assert_eq!(rust_rc, c_rc);
15105 assert_eq!(rust_u, c_u);
15106 assert_eq!(rust_u, text);
15107
15108 let r = 4;
15109 let mut aux = vec![0; (text.len() - 1) / r as usize + 1];
15110 let bwt_rc = libsais_bwt_aux(text, &mut bwt, &mut work, 0, Some(&mut freq), r, &mut aux);
15111 assert_eq!(bwt_rc, 0);
15112
15113 rust_u.fill(0);
15114 rust_a.fill(0);
15115 c_u.fill(0);
15116 c_a.fill(0);
15117 let rust_rc = libsais_unbwt_aux(&bwt, &mut rust_u, &mut rust_a, Some(&freq), r, &aux);
15118 let c_rc = unsafe {
15119 probe_public_libsais_unbwt_aux_freq(
15120 bwt.as_ptr(),
15121 c_u.as_mut_ptr(),
15122 c_a.as_mut_ptr(),
15123 bwt.len() as SaSint,
15124 freq.as_ptr(),
15125 r,
15126 aux.as_ptr(),
15127 )
15128 };
15129 assert_eq!(rust_rc, c_rc);
15130 assert_eq!(rust_u, c_u);
15131 assert_eq!(rust_u, text);
15132 }
15133
15134 #[test]
15135 fn libsais_omp_frequency_wrappers_match_direct_calls() {
15136 let text = b"banana";
15137 let gsa_text = b"ban\0ana\0";
15138
15139 let mut direct_sa = vec![0; text.len()];
15140 let mut omp_sa = vec![0; text.len()];
15141 let mut direct_freq = vec![-1; ALPHABET_SIZE];
15142 let mut omp_freq = vec![-1; ALPHABET_SIZE];
15143 assert_eq!(libsais(text, &mut direct_sa, 0, Some(&mut direct_freq)), 0);
15144 assert_eq!(libsais_omp(text, &mut omp_sa, 0, Some(&mut omp_freq), 2), 0);
15145 assert_eq!(omp_sa, direct_sa);
15146 assert_eq!(omp_freq, direct_freq);
15147
15148 let mut direct_gsa = vec![0; gsa_text.len()];
15149 let mut omp_gsa = vec![0; gsa_text.len()];
15150 direct_freq.fill(-1);
15151 omp_freq.fill(-1);
15152 assert_eq!(
15153 libsais_gsa(gsa_text, &mut direct_gsa, 0, Some(&mut direct_freq)),
15154 0
15155 );
15156 assert_eq!(
15157 libsais_gsa_omp(gsa_text, &mut omp_gsa, 0, Some(&mut omp_freq), 2),
15158 0
15159 );
15160 assert_eq!(omp_gsa, direct_gsa);
15161 assert_eq!(omp_freq, direct_freq);
15162
15163 let mut direct_bwt = vec![0; text.len()];
15164 let mut direct_work = vec![0; text.len()];
15165 let mut omp_bwt = vec![0; text.len()];
15166 let mut omp_work = vec![0; text.len()];
15167 direct_freq.fill(-1);
15168 omp_freq.fill(-1);
15169 assert_eq!(
15170 libsais_bwt(
15171 text,
15172 &mut direct_bwt,
15173 &mut direct_work,
15174 0,
15175 Some(&mut direct_freq)
15176 ),
15177 libsais_bwt_omp(text, &mut omp_bwt, &mut omp_work, 0, Some(&mut omp_freq), 2)
15178 );
15179 assert_eq!(omp_bwt, direct_bwt);
15180 assert_eq!(omp_freq, direct_freq);
15181
15182 let mut direct_aux = vec![0; 2];
15183 let mut omp_aux = vec![0; 2];
15184 direct_freq.fill(-1);
15185 omp_freq.fill(-1);
15186 assert_eq!(
15187 libsais_bwt_aux(
15188 text,
15189 &mut direct_bwt,
15190 &mut direct_work,
15191 0,
15192 Some(&mut direct_freq),
15193 4,
15194 &mut direct_aux
15195 ),
15196 libsais_bwt_aux_omp(
15197 text,
15198 &mut omp_bwt,
15199 &mut omp_work,
15200 0,
15201 Some(&mut omp_freq),
15202 4,
15203 &mut omp_aux,
15204 2
15205 )
15206 );
15207 assert_eq!(omp_bwt, direct_bwt);
15208 assert_eq!(omp_aux, direct_aux);
15209 assert_eq!(omp_freq, direct_freq);
15210 }
15211
15212 #[test]
15213 #[ignore = "large real-data regression; requires local minibwa yeast fixture"]
15214 fn public_libsais_omp_handles_minibwa_yeast_two_strand_index_input() {
15215 let l2b_path =
15216 "/data/henriksson/github/claude/minibwa/.tmp/compare-yeast-now/ref.split.rust.l2b";
15217 let fasta_path =
15218 "/data/henriksson/github/claude/minibwa/.tmp/large-real/yeast/ref.sanitized.fa";
15219 let forward = if let Ok(bytes) = std::fs::read(l2b_path) {
15220 assert!(bytes.len() >= 64, "short l2b fixture: {l2b_path}");
15221 assert_eq!(&bytes[..4], b"L2B\x01", "bad l2b magic in {l2b_path}");
15222 let n_ctg = u64::from_le_bytes(bytes[8..16].try_into().unwrap()) as usize;
15223 let tot_len = u64::from_le_bytes(bytes[16..24].try_into().unwrap()) as usize;
15224 let n_ambi = u64::from_le_bytes(bytes[24..32].try_into().unwrap()) as usize;
15225 let n_mask = u64::from_le_bytes(bytes[32..40].try_into().unwrap()) as usize;
15226 let n_pac = u64::from_le_bytes(bytes[56..64].try_into().unwrap()) as usize;
15227 let pac_start = 64 + 8 * n_ctg + 16 * n_ambi + 16 * n_mask;
15228 assert!(
15229 bytes.len() >= pac_start + 8 * n_pac,
15230 "truncated l2b pac in {l2b_path}"
15231 );
15232 let mut pac = Vec::with_capacity(n_pac);
15233 for chunk in bytes[pac_start..pac_start + 8 * n_pac].chunks_exact(8) {
15234 pac.push(u64::from_le_bytes(chunk.try_into().unwrap()));
15235 }
15236 (0..tot_len)
15237 .map(|i| ((pac[i >> 5] >> ((i & 31) << 1)) & 3) as u8)
15238 .collect::<Vec<_>>()
15239 } else if let Ok(fasta) = std::fs::read_to_string(fasta_path) {
15240 let mut rng = 11u64;
15241 let mut forward = Vec::new();
15242 for line in fasta.lines() {
15243 if line.starts_with('>') {
15244 continue;
15245 }
15246 forward.extend(line.bytes().map(|b| {
15247 let mut c = match b {
15248 b'A' | b'a' => 0,
15249 b'C' | b'c' => 1,
15250 b'G' | b'g' => 2,
15251 b'T' | b't' | b'U' | b'u' => 3,
15252 _ => {
15253 rng = rng.wrapping_add(0x9e3779b97f4a7c15);
15254 let mut z = rng;
15255 z = (z ^ (z >> 30)).wrapping_mul(0xbf58476d1ce4e5b9);
15256 z = (z ^ (z >> 27)).wrapping_mul(0x94d049bb133111eb);
15257 4 | ((z ^ (z >> 31)) & 3) as u8
15258 }
15259 };
15260 if b < b'A' || b > b'Z' {
15261 c |= 1 << 3;
15262 }
15263 c & 3
15264 }));
15265 }
15266 forward
15267 } else {
15268 eprintln!("skipping missing fixtures: {l2b_path} and {fasta_path}");
15269 return;
15270 };
15271 assert!(
15272 forward.len() > 12_000_000,
15273 "fixture should exercise the minibwa yeast index workload"
15274 );
15275
15276 let mut text = Vec::with_capacity(forward.len() * 2);
15277 text.extend_from_slice(&forward);
15278 text.extend(forward.iter().rev().map(|&c| 3 - c));
15279
15280 const FS: SaSint = 10_000;
15281 let mut sa = vec![0; text.len() + FS as usize + 1];
15282 assert_eq!(libsais_omp(&text, &mut sa[1..], FS, None, 4), 0);
15283 if let Some((i, &value)) = sa[1..1 + text.len()]
15284 .iter()
15285 .enumerate()
15286 .find(|&(_, &value)| value < 0 || value as usize >= text.len())
15287 {
15288 panic!("invalid suffix-array entry at {i}: {value}");
15289 }
15290 }
15291
15292 #[test]
15293 #[ignore = "large real-data regression; requires local minibwa yeast fixture"]
15294 fn public_libsais_omp_matches_plain_on_minibwa_yeast_two_strand_index_input() {
15295 let l2b_path =
15296 "/data/henriksson/github/claude/minibwa/.tmp/compare-yeast-now/ref.split.rust.l2b";
15297 let fasta_path =
15298 "/data/henriksson/github/claude/minibwa/.tmp/large-real/yeast/ref.sanitized.fa";
15299 let forward = if let Ok(bytes) = std::fs::read(l2b_path) {
15300 assert!(bytes.len() >= 64, "short l2b fixture: {l2b_path}");
15301 assert_eq!(&bytes[..4], b"L2B\x01", "bad l2b magic in {l2b_path}");
15302 let n_ctg = u64::from_le_bytes(bytes[8..16].try_into().unwrap()) as usize;
15303 let tot_len = u64::from_le_bytes(bytes[16..24].try_into().unwrap()) as usize;
15304 let n_ambi = u64::from_le_bytes(bytes[24..32].try_into().unwrap()) as usize;
15305 let n_mask = u64::from_le_bytes(bytes[32..40].try_into().unwrap()) as usize;
15306 let n_pac = u64::from_le_bytes(bytes[56..64].try_into().unwrap()) as usize;
15307 let pac_start = 64 + 8 * n_ctg + 16 * n_ambi + 16 * n_mask;
15308 assert!(
15309 bytes.len() >= pac_start + 8 * n_pac,
15310 "truncated l2b pac in {l2b_path}"
15311 );
15312 let mut pac = Vec::with_capacity(n_pac);
15313 for chunk in bytes[pac_start..pac_start + 8 * n_pac].chunks_exact(8) {
15314 pac.push(u64::from_le_bytes(chunk.try_into().unwrap()));
15315 }
15316 (0..tot_len)
15317 .map(|i| ((pac[i >> 5] >> ((i & 31) << 1)) & 3) as u8)
15318 .collect::<Vec<_>>()
15319 } else if let Ok(fasta) = std::fs::read_to_string(fasta_path) {
15320 let mut rng = 11u64;
15321 let mut forward = Vec::new();
15322 for line in fasta.lines() {
15323 if line.starts_with('>') {
15324 continue;
15325 }
15326 forward.extend(line.bytes().map(|b| {
15327 let mut c = match b {
15328 b'A' | b'a' => 0,
15329 b'C' | b'c' => 1,
15330 b'G' | b'g' => 2,
15331 b'T' | b't' | b'U' | b'u' => 3,
15332 _ => {
15333 rng = rng.wrapping_add(0x9e3779b97f4a7c15);
15334 let mut z = rng;
15335 z = (z ^ (z >> 30)).wrapping_mul(0xbf58476d1ce4e5b9);
15336 z = (z ^ (z >> 27)).wrapping_mul(0x94d049bb133111eb);
15337 4 | ((z ^ (z >> 31)) & 3) as u8
15338 }
15339 };
15340 if b < b'A' || b > b'Z' {
15341 c |= 1 << 3;
15342 }
15343 c & 3
15344 }));
15345 }
15346 forward
15347 } else {
15348 eprintln!("skipping missing fixtures: {l2b_path} and {fasta_path}");
15349 return;
15350 };
15351 assert!(
15352 forward.len() > 12_000_000,
15353 "fixture should exercise the minibwa yeast index workload"
15354 );
15355
15356 let mut text = Vec::with_capacity(forward.len() * 2);
15357 text.extend_from_slice(&forward);
15358 text.extend(forward.iter().rev().map(|&c| 3 - c));
15359
15360 const FS: SaSint = 10_000;
15361 let mut plain_sa = vec![0; text.len() + FS as usize + 1];
15362 let mut omp_sa = vec![0; text.len() + FS as usize + 1];
15363 assert_eq!(libsais(&text, &mut plain_sa[1..], FS, None), 0);
15364 assert_eq!(libsais_omp(&text, &mut omp_sa[1..], FS, None, 4), 0);
15365 plain_sa[0] = text.len() as SaSint;
15366 omp_sa[0] = text.len() as SaSint;
15367 if let Some(i) = plain_sa[..=text.len()]
15368 .iter()
15369 .zip(&omp_sa[..=text.len()])
15370 .position(|(plain, omp)| plain != omp)
15371 {
15372 panic!(
15373 "first suffix-array diff at {i}: plain={} omp={}",
15374 plain_sa[i], omp_sa[i]
15375 );
15376 }
15377 }
15378
15379 #[test]
15380 fn libsais_unbwt_omp_frequency_wrappers_match_direct_calls() {
15381 let text = b"abracadabra";
15382 let mut freq = vec![0; ALPHABET_SIZE];
15383 let mut bwt = vec![0; text.len()];
15384 let mut work = vec![0; text.len()];
15385 let primary = libsais_bwt(text, &mut bwt, &mut work, 0, Some(&mut freq));
15386 assert!(primary >= 0);
15387
15388 let mut direct = vec![0; text.len()];
15389 let mut direct_work = vec![0; text.len() + 1];
15390 let mut omp = vec![0; text.len()];
15391 let mut omp_work = vec![0; text.len() + 1];
15392 assert_eq!(
15393 libsais_unbwt(&bwt, &mut direct, &mut direct_work, Some(&freq), primary),
15394 libsais_unbwt_omp(&bwt, &mut omp, &mut omp_work, Some(&freq), primary, 2)
15395 );
15396 assert_eq!(omp, direct);
15397 assert_eq!(omp, text);
15398
15399 let mut aux = vec![0; (text.len() - 1) / 4 + 1];
15400 assert_eq!(
15401 libsais_bwt_aux(text, &mut bwt, &mut work, 0, Some(&mut freq), 4, &mut aux),
15402 0
15403 );
15404 direct.fill(0);
15405 direct_work.fill(0);
15406 omp.fill(0);
15407 omp_work.fill(0);
15408 assert_eq!(
15409 libsais_unbwt_aux(&bwt, &mut direct, &mut direct_work, Some(&freq), 4, &aux),
15410 libsais_unbwt_aux_omp(&bwt, &mut omp, &mut omp_work, Some(&freq), 4, &aux, 2)
15411 );
15412 assert_eq!(omp, direct);
15413 assert_eq!(omp, text);
15414 }
15415
15416 #[test]
15417 fn libsais_ctx_matches_plain_entry_point_for_small_text() {
15418 let t = b"mississippi";
15419 let mut sa_plain = vec![0; t.len()];
15420 let mut sa_ctx = vec![0; t.len()];
15421 let plain = libsais(t, &mut sa_plain, 0, None);
15422
15423 let mut ctx = create_ctx().expect("context");
15424 let with_ctx = libsais_ctx(&mut ctx, t, &mut sa_ctx, 0, None);
15425
15426 assert_eq!(plain, 0);
15427 assert_eq!(with_ctx, 0);
15428 assert_eq!(sa_ctx, sa_plain);
15429 }
15430
15431 #[test]
15432 fn libsais_ctx_frequency_wrappers_match_direct_calls() {
15433 let text = b"banana";
15434 let gsa_text = b"ban\0ana\0";
15435 let mut ctx = create_ctx().expect("context");
15436
15437 let mut direct_sa = vec![0; text.len()];
15438 let mut ctx_sa = vec![0; text.len()];
15439 let mut direct_freq = vec![-1; ALPHABET_SIZE];
15440 let mut ctx_freq = vec![-1; ALPHABET_SIZE];
15441 assert_eq!(libsais(text, &mut direct_sa, 0, Some(&mut direct_freq)), 0);
15442 assert_eq!(
15443 libsais_ctx(&mut ctx, text, &mut ctx_sa, 0, Some(&mut ctx_freq)),
15444 0
15445 );
15446 assert_eq!(ctx_sa, direct_sa);
15447 assert_eq!(ctx_freq, direct_freq);
15448
15449 let mut direct_gsa = vec![0; gsa_text.len()];
15450 let mut ctx_gsa = vec![0; gsa_text.len()];
15451 direct_freq.fill(-1);
15452 ctx_freq.fill(-1);
15453 assert_eq!(
15454 libsais_gsa(gsa_text, &mut direct_gsa, 0, Some(&mut direct_freq)),
15455 0
15456 );
15457 assert_eq!(
15458 libsais_gsa_ctx(&mut ctx, gsa_text, &mut ctx_gsa, 0, Some(&mut ctx_freq)),
15459 0
15460 );
15461 assert_eq!(ctx_gsa, direct_gsa);
15462 assert_eq!(ctx_freq, direct_freq);
15463
15464 let mut direct_bwt = vec![0; text.len()];
15465 let mut direct_work = vec![0; text.len()];
15466 let mut ctx_bwt = vec![0; text.len()];
15467 let mut ctx_work = vec![0; text.len()];
15468 direct_freq.fill(-1);
15469 ctx_freq.fill(-1);
15470 assert_eq!(
15471 libsais_bwt(
15472 text,
15473 &mut direct_bwt,
15474 &mut direct_work,
15475 0,
15476 Some(&mut direct_freq)
15477 ),
15478 libsais_bwt_ctx(
15479 &mut ctx,
15480 text,
15481 &mut ctx_bwt,
15482 &mut ctx_work,
15483 0,
15484 Some(&mut ctx_freq)
15485 )
15486 );
15487 assert_eq!(ctx_bwt, direct_bwt);
15488 assert_eq!(ctx_freq, direct_freq);
15489
15490 let mut direct_aux = vec![0; 2];
15491 let mut ctx_aux = vec![0; 2];
15492 direct_freq.fill(-1);
15493 ctx_freq.fill(-1);
15494 assert_eq!(
15495 libsais_bwt_aux(
15496 text,
15497 &mut direct_bwt,
15498 &mut direct_work,
15499 0,
15500 Some(&mut direct_freq),
15501 4,
15502 &mut direct_aux
15503 ),
15504 libsais_bwt_aux_ctx(
15505 &mut ctx,
15506 text,
15507 &mut ctx_bwt,
15508 &mut ctx_work,
15509 0,
15510 Some(&mut ctx_freq),
15511 4,
15512 &mut ctx_aux
15513 )
15514 );
15515 assert_eq!(ctx_bwt, direct_bwt);
15516 assert_eq!(ctx_aux, direct_aux);
15517 assert_eq!(ctx_freq, direct_freq);
15518 }
15519
15520 #[test]
15521 fn libsais_unbwt_ctx_frequency_wrappers_match_direct_calls() {
15522 let text = b"abracadabra";
15523 let mut freq = vec![0; ALPHABET_SIZE];
15524 let mut bwt = vec![0; text.len()];
15525 let mut work = vec![0; text.len()];
15526 let primary = libsais_bwt(text, &mut bwt, &mut work, 0, Some(&mut freq));
15527 assert!(primary >= 0);
15528
15529 let mut ctx = unbwt_create_ctx().expect("unbwt context");
15530 let mut direct = vec![0; text.len()];
15531 let mut direct_work = vec![0; text.len() + 1];
15532 let mut via_ctx = vec![0; text.len()];
15533 let mut ctx_work = vec![0; text.len() + 1];
15534 assert_eq!(
15535 libsais_unbwt(&bwt, &mut direct, &mut direct_work, Some(&freq), primary),
15536 libsais_unbwt_ctx(
15537 &mut ctx,
15538 &bwt,
15539 &mut via_ctx,
15540 &mut ctx_work,
15541 Some(&freq),
15542 primary
15543 )
15544 );
15545 assert_eq!(via_ctx, direct);
15546 assert_eq!(via_ctx, text);
15547
15548 let mut aux = vec![0; (text.len() - 1) / 4 + 1];
15549 assert_eq!(
15550 libsais_bwt_aux(text, &mut bwt, &mut work, 0, Some(&mut freq), 4, &mut aux),
15551 0
15552 );
15553 direct.fill(0);
15554 direct_work.fill(0);
15555 via_ctx.fill(0);
15556 ctx_work.fill(0);
15557 assert_eq!(
15558 libsais_unbwt_aux(&bwt, &mut direct, &mut direct_work, Some(&freq), 4, &aux),
15559 libsais_unbwt_aux_ctx(
15560 &mut ctx,
15561 &bwt,
15562 &mut via_ctx,
15563 &mut ctx_work,
15564 Some(&freq),
15565 4,
15566 &aux
15567 )
15568 );
15569 assert_eq!(via_ctx, direct);
15570 assert_eq!(via_ctx, text);
15571 }
15572
15573 #[test]
15574 fn libsais_int_matches_bruteforce_suffix_array_for_small_integer_text() {
15575 let mut t = vec![2, 1, 3, 1, 0];
15576 let expected = {
15577 let mut sa: Vec<SaSint> = (0..t.len())
15578 .map(|index| SaSint::try_from(index).expect("index must fit SaSint"))
15579 .collect();
15580 sa.sort_by(|&lhs, &rhs| {
15581 t[usize::try_from(lhs).expect("non-negative")..]
15582 .cmp(&t[usize::try_from(rhs).expect("non-negative")..])
15583 });
15584 sa
15585 };
15586 let mut sa = vec![0; t.len()];
15587
15588 let result = libsais_int(&mut t, &mut sa, 4, 0);
15589
15590 assert_eq!(result, 0);
15591 assert_eq!(sa, expected);
15592 }
15593
15594 #[test]
15595 fn libsais_plcp_matches_bruteforce_for_small_text() {
15596 let t = b"banana";
15597 let sa = brute_force_suffix_array_u8(t);
15598 let expected = brute_force_plcp_u8(t, &sa);
15599 let mut plcp = vec![0; t.len()];
15600
15601 let result = libsais_plcp(t, &sa, &mut plcp);
15602
15603 assert_eq!(result, 0);
15604 assert_eq!(plcp, expected);
15605 }
15606
15607 #[test]
15608 fn libsais_plcp_gsa_stops_at_separator() {
15609 let t = b"ab\0b\0";
15610 let sa = brute_force_suffix_array_u8(t);
15611 let mut plcp = vec![0; t.len()];
15612
15613 let result = libsais_plcp_gsa(t, &sa, &mut plcp);
15614
15615 assert_eq!(result, 0);
15616 assert_eq!(plcp[2], 0);
15617 assert_eq!(plcp[4], 0);
15618 }
15619
15620 #[test]
15621 fn libsais_lcp_matches_bruteforce_for_small_text() {
15622 let t = b"banana";
15623 let sa = brute_force_suffix_array_u8(t);
15624 let plcp = brute_force_plcp_u8(t, &sa);
15625 let expected = brute_force_lcp_from_sa_u8(t, &sa);
15626 let mut lcp = vec![0; t.len()];
15627
15628 let result = libsais_lcp(&plcp, &sa, &mut lcp);
15629
15630 assert_eq!(result, 0);
15631 assert_eq!(lcp, expected);
15632 }
15633
15634 #[test]
15635 fn libsais_ctx_rejects_invalid_public_arguments() {
15636 let text = b"banana";
15637 let mut ctx = create_ctx().expect("context");
15638 let mut short_sa = vec![0; text.len() - 1];
15639 let mut full_sa = vec![0; text.len()];
15640 let mut short_freq = vec![0; ALPHABET_SIZE - 1];
15641 let mut short_u = vec![0; text.len() - 1];
15642 let mut full_u = vec![0; text.len()];
15643 let mut short_a = vec![0; text.len() - 1];
15644 let mut full_a = vec![0; text.len()];
15645 let mut aux = vec![0; 2];
15646
15647 assert_eq!(libsais_ctx(&mut ctx, text, &mut short_sa, 0, None), -1);
15648 assert_eq!(
15649 libsais_ctx(&mut ctx, text, &mut full_sa, 0, Some(&mut short_freq)),
15650 -1
15651 );
15652 assert_eq!(
15653 libsais_gsa_ctx(&mut ctx, b"banana", &mut full_sa, 0, None),
15654 -1
15655 );
15656 assert_eq!(
15657 libsais_gsa_ctx(&mut ctx, b"banana\0", &mut short_sa, 0, None),
15658 -1
15659 );
15660 assert_eq!(
15661 libsais_bwt_ctx(&mut ctx, text, &mut short_u, &mut full_a, 0, None),
15662 -1
15663 );
15664 assert_eq!(
15665 libsais_bwt_ctx(&mut ctx, text, &mut full_u, &mut short_a, 0, None),
15666 -1
15667 );
15668 assert_eq!(
15669 libsais_bwt_ctx(
15670 &mut ctx,
15671 text,
15672 &mut full_u,
15673 &mut full_a,
15674 0,
15675 Some(&mut short_freq)
15676 ),
15677 -1
15678 );
15679 assert_eq!(
15680 libsais_bwt_aux_ctx(
15681 &mut ctx,
15682 text,
15683 &mut full_u,
15684 &mut full_a,
15685 0,
15686 None,
15687 0,
15688 &mut aux
15689 ),
15690 -1
15691 );
15692 assert_eq!(
15693 libsais_bwt_aux_ctx(
15694 &mut ctx,
15695 text,
15696 &mut full_u,
15697 &mut full_a,
15698 0,
15699 None,
15700 3,
15701 &mut aux
15702 ),
15703 -1
15704 );
15705 assert_eq!(
15706 libsais_bwt_aux_ctx(
15707 &mut ctx,
15708 text,
15709 &mut full_u,
15710 &mut full_a,
15711 0,
15712 None,
15713 4,
15714 &mut []
15715 ),
15716 -1
15717 );
15718
15719 let mut missing_thread_state_ctx = Context {
15720 buckets: vec![0; 8 * ALPHABET_SIZE],
15721 thread_state: None,
15722 threads: 2,
15723 };
15724 assert_eq!(
15725 libsais_ctx(&mut missing_thread_state_ctx, text, &mut full_sa, 0, None),
15726 -2
15727 );
15728
15729 let mut zero_thread_ctx = Context {
15730 buckets: vec![0; 8 * ALPHABET_SIZE],
15731 thread_state: None,
15732 threads: 0,
15733 };
15734 assert_eq!(
15735 libsais_ctx(&mut zero_thread_ctx, text, &mut full_sa, 0, None),
15736 -2
15737 );
15738
15739 let mut short_thread_state_ctx = create_ctx_main(2).expect("context");
15740 short_thread_state_ctx
15741 .thread_state
15742 .as_mut()
15743 .expect("thread state")
15744 .truncate(1);
15745 assert_eq!(
15746 libsais_ctx(&mut short_thread_state_ctx, text, &mut full_sa, 0, None),
15747 -2
15748 );
15749 }
15750
15751 #[test]
15752 fn libsais_unbwt_ctx_rejects_invalid_public_arguments() {
15753 let text = b"banana";
15754 let mut bwt = vec![0; text.len()];
15755 let mut work = vec![0; text.len()];
15756 let primary = libsais_bwt(text, &mut bwt, &mut work, 0, None);
15757 let mut ctx = unbwt_create_ctx().expect("context");
15758
15759 let mut short_u = vec![0; text.len() - 1];
15760 let mut full_u = vec![0; text.len()];
15761 let mut short_a = vec![0; text.len() - 1];
15762 let mut full_a = vec![0; text.len()];
15763 let short_freq = vec![0; ALPHABET_SIZE - 1];
15764 let good_aux = vec![primary, 4];
15765
15766 assert_eq!(
15767 libsais_unbwt_ctx(&mut ctx, &bwt, &mut short_u, &mut full_a, None, primary),
15768 -1
15769 );
15770 assert_eq!(
15771 libsais_unbwt_ctx(&mut ctx, &bwt, &mut full_u, &mut short_a, None, primary),
15772 -1
15773 );
15774 assert_eq!(
15775 libsais_unbwt_ctx(
15776 &mut ctx,
15777 &bwt,
15778 &mut full_u,
15779 &mut full_a,
15780 Some(&short_freq),
15781 primary
15782 ),
15783 -1
15784 );
15785 assert_eq!(
15786 libsais_unbwt_ctx(&mut ctx, &bwt, &mut full_u, &mut full_a, None, 0),
15787 -1
15788 );
15789 assert_eq!(
15790 libsais_unbwt_aux_ctx(&mut ctx, &bwt, &mut full_u, &mut full_a, None, 3, &good_aux),
15791 -1
15792 );
15793 assert_eq!(
15794 libsais_unbwt_aux_ctx(
15795 &mut ctx,
15796 &bwt,
15797 &mut full_u,
15798 &mut full_a,
15799 None,
15800 4,
15801 &[primary]
15802 ),
15803 -1
15804 );
15805
15806 let mut malformed_ctx = UnbwtContext {
15807 bucket2: Vec::new(),
15808 fastbits: Vec::new(),
15809 buckets: None,
15810 threads: 1,
15811 };
15812 assert_eq!(
15813 libsais_unbwt_ctx(
15814 &mut malformed_ctx,
15815 &bwt,
15816 &mut full_u,
15817 &mut full_a,
15818 None,
15819 primary
15820 ),
15821 -2
15822 );
15823
15824 let mut missing_parallel_buckets_ctx = UnbwtContext {
15825 bucket2: vec![0; ALPHABET_SIZE * ALPHABET_SIZE],
15826 fastbits: vec![0; 1 + (1 << UNBWT_FASTBITS)],
15827 buckets: None,
15828 threads: 2,
15829 };
15830 assert_eq!(
15831 libsais_unbwt_ctx(
15832 &mut missing_parallel_buckets_ctx,
15833 &bwt,
15834 &mut full_u,
15835 &mut full_a,
15836 None,
15837 primary
15838 ),
15839 -2
15840 );
15841 }
15842
15843 #[test]
15844 fn unbwt_create_ctx_main_allocates_expected_buffers() {
15845 let ctx = unbwt_create_ctx_main(3).expect("context");
15846 assert_eq!(ctx.bucket2.len(), ALPHABET_SIZE * ALPHABET_SIZE);
15847 assert_eq!(ctx.fastbits.len(), 1 + (1 << UNBWT_FASTBITS));
15848 assert_eq!(
15849 ctx.buckets.as_ref().expect("parallel buckets").len(),
15850 3 * (ALPHABET_SIZE + ALPHABET_SIZE * ALPHABET_SIZE)
15851 );
15852 assert_eq!(ctx.threads, 3);
15853 }
15854
15855 #[test]
15856 fn unbwt_compute_histogram_counts_bytes() {
15857 let t = b"banana";
15858 let mut count = vec![0u32; ALPHABET_SIZE];
15859 unbwt_compute_histogram(t, t.len() as FastSint, &mut count);
15860 assert_eq!(count[b'a' as usize], 3);
15861 assert_eq!(count[b'b' as usize], 1);
15862 assert_eq!(count[b'n' as usize], 2);
15863 }
15864
15865 #[test]
15866 fn unbwt_transpose_bucket2_swaps_matrix_entries() {
15867 let mut bucket2 = vec![0u32; ALPHABET_SIZE * ALPHABET_SIZE];
15868 bucket2[(2 << 8) + 1] = 7;
15869 bucket2[(1 << 8) + 2] = 9;
15870 unbwt_transpose_bucket2(&mut bucket2);
15871 assert_eq!(bucket2[(1 << 8) + 2], 7);
15872 assert_eq!(bucket2[(2 << 8) + 1], 9);
15873 }
15874
15875 #[test]
15876 fn unbwt_init_single_builds_monotone_fastbits_and_writes_psi() {
15877 let t = b"annb\x00aa";
15878 let mut p = vec![0u32; t.len() + 1];
15879 let mut bucket2 = vec![0u32; ALPHABET_SIZE * ALPHABET_SIZE];
15880 let mut fastbits = vec![0u16; 1 + (1 << UNBWT_FASTBITS)];
15881 let i = vec![4u32];
15882
15883 unbwt_init_single(
15884 t,
15885 &mut p,
15886 t.len() as SaSint,
15887 None,
15888 &i,
15889 &mut bucket2,
15890 &mut fastbits,
15891 );
15892
15893 assert!(fastbits
15894 .iter()
15895 .all(|&value| usize::from(value) < ALPHABET_SIZE * ALPHABET_SIZE));
15896 assert!(fastbits.iter().any(|&value| value != 0));
15897 assert!(p.iter().any(|&value| value != 0));
15898 }
15899
15900 #[test]
15901 fn unbwt_init_parallel_currently_matches_single_initializer() {
15902 let t = b"annb\x00aa";
15903 let mut p_single = vec![0u32; t.len() + 1];
15904 let mut p_parallel = vec![0u32; t.len() + 1];
15905 let mut bucket2_single = vec![0u32; ALPHABET_SIZE * ALPHABET_SIZE];
15906 let mut bucket2_parallel = vec![0u32; ALPHABET_SIZE * ALPHABET_SIZE];
15907 let mut fastbits_single = vec![0u16; 1 + (1 << UNBWT_FASTBITS)];
15908 let mut fastbits_parallel = vec![0u16; 1 + (1 << UNBWT_FASTBITS)];
15909 let i = vec![4u32];
15910 let mut scratch = vec![0u32; 2 * (ALPHABET_SIZE + ALPHABET_SIZE * ALPHABET_SIZE)];
15911
15912 unbwt_init_single(
15913 t,
15914 &mut p_single,
15915 t.len() as SaSint,
15916 None,
15917 &i,
15918 &mut bucket2_single,
15919 &mut fastbits_single,
15920 );
15921 unbwt_init_parallel(
15922 t,
15923 &mut p_parallel,
15924 t.len() as SaSint,
15925 None,
15926 &i,
15927 &mut bucket2_parallel,
15928 &mut fastbits_parallel,
15929 Some(&mut scratch),
15930 2,
15931 );
15932
15933 assert_eq!(p_parallel, p_single);
15934 assert_eq!(bucket2_parallel, bucket2_single);
15935 assert_eq!(fastbits_parallel, fastbits_single);
15936 }
15937
15938 #[test]
15939 fn unbwt_init_parallel_uses_block_partition_for_large_inputs() {
15940 let n = 70_003usize;
15941 let t: Vec<u8> = (0..n)
15942 .map(|i| i.wrapping_mul(37).wrapping_add(i >> 3) as u8)
15943 .collect();
15944 let i = [12_345u32];
15945
15946 let mut single_p = vec![0u32; n + 1];
15947 let mut threaded_p = vec![0u32; n + 1];
15948 let mut single_bucket2 = vec![0u32; ALPHABET_SIZE * ALPHABET_SIZE];
15949 let mut threaded_bucket2 = vec![0u32; ALPHABET_SIZE * ALPHABET_SIZE];
15950 let mut single_fastbits = vec![0u16; 1 + (1 << UNBWT_FASTBITS)];
15951 let mut threaded_fastbits = vec![0u16; 1 + (1 << UNBWT_FASTBITS)];
15952 let mut buckets = vec![0u32; 4 * (ALPHABET_SIZE + ALPHABET_SIZE * ALPHABET_SIZE)];
15953
15954 unbwt_init_single(
15955 &t,
15956 &mut single_p,
15957 n as SaSint,
15958 None,
15959 &i,
15960 &mut single_bucket2,
15961 &mut single_fastbits,
15962 );
15963 unbwt_init_parallel(
15964 &t,
15965 &mut threaded_p,
15966 n as SaSint,
15967 None,
15968 &i,
15969 &mut threaded_bucket2,
15970 &mut threaded_fastbits,
15971 Some(&mut buckets),
15972 4,
15973 );
15974
15975 assert_eq!(threaded_p, single_p);
15976 assert_eq!(threaded_bucket2, single_bucket2);
15977 assert_eq!(threaded_fastbits, single_fastbits);
15978 }
15979
15980 #[test]
15981 fn unbwt_decode_1_writes_big_endian_symbol_words() {
15982 let mut u = vec![0u8; 4];
15983 let p = vec![1u32, 0u32];
15984 let mut bucket2 = vec![0u32; ALPHABET_SIZE * ALPHABET_SIZE];
15985 bucket2[0x1234] = 0;
15986 bucket2[0x1235] = 2;
15987 let mut fastbits = vec![0u16; 1 + (1 << UNBWT_FASTBITS)];
15988 fastbits[0] = 0x1234;
15989 let mut i0 = 0usize;
15990
15991 unbwt_decode_1(&mut u, &p, &bucket2, &fastbits, 0, &mut i0, 2);
15992
15993 assert_eq!(u, vec![0x12, 0x35, 0x12, 0x35]);
15994 assert_eq!(i0, 0);
15995 }
15996
15997 #[test]
15998 fn unbwt_decode_dispatches_two_block_tail_shape() {
15999 let mut u = vec![0u8; 8];
16000 let p = vec![1u32, 0u32];
16001 let mut bucket2 = vec![0u32; ALPHABET_SIZE * ALPHABET_SIZE];
16002 bucket2[0x1234] = 0;
16003 bucket2[0x1235] = 2;
16004 let mut fastbits = vec![0u16; 1 + (1 << UNBWT_FASTBITS)];
16005 fastbits[0] = 0x1234;
16006 let i = vec![0u32, 0u32];
16007
16008 unbwt_decode(&mut u, &p, 4, 2, &i, &bucket2, &fastbits, 2, 2);
16009
16010 assert_eq!(u, vec![0x12, 0x35, 0x12, 0x35, 0x00, 0x00, 0x00, 0x00]);
16011 }
16012
16013 #[test]
16014 fn libsais_unbwt_aux_rejects_invalid_sampling_range() {
16015 let t = b"abc";
16016 let mut u = vec![0u8; t.len()];
16017 let mut a = vec![0i32; t.len()];
16018
16019 let result = libsais_unbwt_aux(t, &mut u, &mut a, None, 2, &[0, 4]);
16020
16021 assert_eq!(result, -1);
16022
16023 assert_eq!(libsais_unbwt_aux(t, &mut u, &mut a, None, 0, &[1]), -1);
16024
16025 let mut ctx = unbwt_create_ctx().expect("context");
16026 assert_eq!(
16027 libsais_unbwt_aux_ctx(&mut ctx, t, &mut u, &mut a, None, 0, &[1]),
16028 -1
16029 );
16030 assert_eq!(
16031 libsais_unbwt_aux_omp(t, &mut u, &mut a, None, 0, &[1], 2),
16032 -1
16033 );
16034 }
16035
16036 #[test]
16037 fn libsais_bwt_and_unbwt_round_trip_small_text() {
16038 let t = b"banana";
16039 let mut bwt = vec![0u8; t.len()];
16040 let mut a = vec![0i32; t.len()];
16041
16042 let primary = libsais_bwt(t, &mut bwt, &mut a, 0, None);
16043 assert!(primary > 0);
16044
16045 let mut restored = vec![0u8; t.len()];
16046 let result = libsais_unbwt(&bwt, &mut restored, &mut a, None, primary);
16047
16048 assert_eq!(result, 0);
16049 assert_eq!(restored, t);
16050 }
16051
16052 #[test]
16053 fn libsais_bwt_aux_and_unbwt_aux_round_trip_small_text() {
16054 let t = b"mississippi";
16055 let mut bwt = vec![0u8; t.len()];
16056 let mut a = vec![0i32; t.len()];
16057 let mut samples = vec![0i32; 4];
16058
16059 let result = libsais_bwt_aux(t, &mut bwt, &mut a, 0, None, 4, &mut samples);
16060 assert_eq!(result, 0);
16061
16062 let mut restored = vec![0u8; t.len()];
16063 let result = libsais_unbwt_aux(&bwt, &mut restored, &mut a, None, 4, &samples);
16064
16065 assert_eq!(result, 0);
16066 assert_eq!(restored, t);
16067 }
16068
16069 #[test]
16070 fn libsais_bwt_aux_and_unbwt_aux_omp_round_trip_small_text() {
16071 let t = b"mississippi";
16072 let mut bwt = vec![0u8; t.len()];
16073 let mut a = vec![0i32; t.len()];
16074 let mut samples = vec![0i32; 4];
16075
16076 let result = libsais_bwt_aux(t, &mut bwt, &mut a, 0, None, 4, &mut samples);
16077 assert_eq!(result, 0);
16078
16079 let mut restored = vec![0u8; t.len()];
16080 let result = libsais_unbwt_aux_omp(&bwt, &mut restored, &mut a, None, 4, &samples, 2);
16081
16082 assert_eq!(result, 0);
16083 assert_eq!(restored, t);
16084 }
16085
16086 #[test]
16087 fn real_world_round_trip_on_upstream_readme() {
16088 let t = include_bytes!("../libsais/README.md");
16089 let mut bwt = vec![0u8; t.len()];
16090 let mut a = vec![0i32; t.len()];
16091
16092 let primary = libsais_bwt(t, &mut bwt, &mut a, 0, None);
16093 assert!(primary > 0);
16094
16095 let mut restored = vec![0u8; t.len()];
16096 let result = libsais_unbwt(&bwt, &mut restored, &mut a, None, primary);
16097
16098 assert_eq!(result, 0);
16099 assert_eq!(restored, t);
16100 }
16101
16102 #[test]
16103 fn real_world_aux_omp_round_trip_on_upstream_c_source() {
16104 let t = include_bytes!("../libsais/src/libsais.c");
16105 let mut bwt = vec![0u8; t.len()];
16106 let mut a = vec![0i32; t.len()];
16107 let r = 128i32;
16108 let mut samples = vec![0i32; (t.len() - 1) / usize::try_from(r).expect("fits") + 1];
16109
16110 let result = libsais_bwt_aux(t, &mut bwt, &mut a, 0, None, r, &mut samples);
16111 assert_eq!(result, 0);
16112
16113 let mut restored = vec![0u8; t.len()];
16114 let result = libsais_unbwt_aux_omp(&bwt, &mut restored, &mut a, None, r, &samples, 2);
16115
16116 assert_eq!(result, 0);
16117 assert_eq!(restored, t);
16118 }
16119
16120 #[test]
16121 fn libsais_bwt_aux_rejects_undersized_sampling_array() {
16122 let t = b"upstream source text";
16123 let mut bwt = vec![0u8; t.len()];
16124 let mut a = vec![0i32; t.len()];
16125 let mut samples = vec![0i32; 1];
16126
16127 let result = libsais_bwt_aux(t, &mut bwt, &mut a, 0, None, 2, &mut samples);
16128
16129 assert_eq!(result, -1);
16130
16131 let result = libsais_bwt_aux(t, &mut bwt, &mut a, 0, None, 0, &mut samples);
16132
16133 assert_eq!(result, -1);
16134 }
16135
16136 #[test]
16137 fn libsais_bwt_aux_omp_rejects_invalid_sampling_rate_without_panicking() {
16138 let t = b"upstream source text";
16139 let mut bwt = vec![0u8; t.len()];
16140 let mut a = vec![0i32; t.len()];
16141 let mut samples = vec![0i32; 4];
16142
16143 let result = libsais_bwt_aux_omp(t, &mut bwt, &mut a, 0, None, 0, &mut samples, 2);
16144
16145 assert_eq!(result, -1);
16146 }
16147
16148 #[test]
16149 fn count_helpers_match_c_predicates() {
16150 let sa = [1, -1, 0, -3, 4, 0, -9];
16151 assert_eq!(
16152 count_negative_marked_suffixes(&sa, 0, sa.len() as FastSint),
16153 3
16154 );
16155 assert_eq!(count_zero_marked_suffixes(&sa, 0, sa.len() as FastSint), 2);
16156 assert_eq!(count_negative_marked_suffixes(&sa, 2, 3), 1);
16157 assert_eq!(count_zero_marked_suffixes(&sa, 2, 3), 1);
16158 }
16159
16160 #[test]
16161 fn flip_suffix_markers_omp_toggles_saint_min_bits() {
16162 let mut sa = vec![1, -2, 3, -4];
16163 flip_suffix_markers_omp(&mut sa, 4, 1);
16164 assert_eq!(
16165 sa,
16166 vec![1 ^ SAINT_MIN, -2 ^ SAINT_MIN, 3 ^ SAINT_MIN, -4 ^ SAINT_MIN]
16167 );
16168 }
16169
16170 #[test]
16171 fn flip_suffix_markers_omp_uses_block_partition_for_large_inputs() {
16172 let n = 65_600usize;
16173 let mut single: Vec<SaSint> = (0..n).map(|i| (i as SaSint) ^ SAINT_MIN).collect();
16174 let mut threaded = single.clone();
16175
16176 flip_suffix_markers_omp(&mut single, n as SaSint, 1);
16177 flip_suffix_markers_omp(&mut threaded, n as SaSint, 4);
16178
16179 assert_eq!(threaded, single);
16180 }
16181
16182 #[test]
16183 fn place_cached_suffixes_writes_indices_to_symbol_slots() {
16184 let mut sa = vec![0; 8];
16185 let cache = vec![
16186 ThreadCache {
16187 symbol: 2,
16188 index: 10,
16189 },
16190 ThreadCache {
16191 symbol: 5,
16192 index: 20,
16193 },
16194 ThreadCache {
16195 symbol: 1,
16196 index: 30,
16197 },
16198 ];
16199
16200 place_cached_suffixes(&mut sa, &cache, 0, cache.len() as FastSint);
16201
16202 assert_eq!(sa[2], 10);
16203 assert_eq!(sa[5], 20);
16204 assert_eq!(sa[1], 30);
16205 }
16206
16207 #[test]
16208 fn compact_and_place_cached_suffixes_discards_negative_symbols() {
16209 let mut sa = vec![0; 8];
16210 let mut cache = vec![
16211 ThreadCache {
16212 symbol: 2,
16213 index: 10,
16214 },
16215 ThreadCache {
16216 symbol: -1,
16217 index: 99,
16218 },
16219 ThreadCache {
16220 symbol: 5,
16221 index: 20,
16222 },
16223 ThreadCache {
16224 symbol: -4,
16225 index: 77,
16226 },
16227 ThreadCache {
16228 symbol: 1,
16229 index: 30,
16230 },
16231 ];
16232 let cache_len = cache.len() as FastSint;
16233
16234 compact_and_place_cached_suffixes(&mut sa, &mut cache, 0, cache_len);
16235
16236 assert_eq!(sa[2], 10);
16237 assert_eq!(sa[5], 20);
16238 assert_eq!(sa[1], 30);
16239 assert_eq!(
16240 cache[0],
16241 ThreadCache {
16242 symbol: 2,
16243 index: 10
16244 }
16245 );
16246 assert_eq!(
16247 cache[1],
16248 ThreadCache {
16249 symbol: 5,
16250 index: 20
16251 }
16252 );
16253 assert_eq!(
16254 cache[2],
16255 ThreadCache {
16256 symbol: 1,
16257 index: 30
16258 }
16259 );
16260 }
16261
16262 #[test]
16263 fn gather_lms_suffixes_32s_collects_expected_suffix_starts() {
16264 let t = vec![2, 1, 3, 1, 0];
16265 let mut sa = vec![0; t.len()];
16266 let m = gather_lms_suffixes_32s(&t, &mut sa, t.len() as SaSint);
16267 assert!(m >= 0);
16268 assert!(sa
16269 .iter()
16270 .all(|&value| value >= 0 && value <= t.len() as SaSint));
16271 assert!(sa[t.len() - 1] >= 1 && sa[t.len() - 1] <= t.len() as SaSint - 1);
16272 }
16273
16274 #[test]
16275 fn gather_compacted_lms_suffixes_32s_skips_negative_marked_symbols() {
16276 let t = vec![2, -1, 3, 1, 0];
16277 let mut sa = vec![0; t.len()];
16278 let m = gather_compacted_lms_suffixes_32s(&t, &mut sa, t.len() as SaSint);
16279 assert!(m >= 0);
16280 assert!(sa
16281 .iter()
16282 .all(|&value| value >= 0 && value <= t.len() as SaSint));
16283 }
16284
16285 #[test]
16286 fn count_lms_suffixes_32s_2k_counts_two_bucket_categories() {
16287 let t = vec![2, 1, 3, 1, 0];
16288 let mut buckets = vec![0; 2 * 4];
16289 count_lms_suffixes_32s_2k(&t, t.len() as SaSint, 4, &mut buckets);
16290 assert_eq!(buckets.iter().sum::<SaSint>(), t.len() as SaSint);
16291 }
16292
16293 #[test]
16294 fn count_lms_suffixes_32s_4k_counts_four_bucket_categories() {
16295 let t = vec![2, 1, 3, 1, 0];
16296 let mut buckets = vec![0; 4 * 4];
16297 count_lms_suffixes_32s_4k(&t, t.len() as SaSint, 4, &mut buckets);
16298 assert_eq!(buckets.iter().sum::<SaSint>(), t.len() as SaSint);
16299 }
16300
16301 #[test]
16302 fn count_compacted_lms_suffixes_32s_2k_masks_saint_bits() {
16303 let t = vec![2, SAINT_MIN | 1, 3, 1, 0];
16304 let mut buckets = vec![0; 2 * 4];
16305 count_compacted_lms_suffixes_32s_2k(&t, t.len() as SaSint, 4, &mut buckets);
16306 assert_eq!(buckets.iter().sum::<SaSint>(), t.len() as SaSint);
16307 }
16308
16309 #[test]
16310 fn count_and_gather_lms_suffixes_8u_updates_sa_and_buckets() {
16311 let t = vec![2_u8, 1, 3, 1, 0];
16312 let mut sa = vec![0; t.len()];
16313 let mut buckets = vec![0; 4 * ALPHABET_SIZE];
16314 let m = count_and_gather_lms_suffixes_8u(
16315 &t,
16316 &mut sa,
16317 t.len() as SaSint,
16318 &mut buckets,
16319 0,
16320 t.len() as FastSint,
16321 );
16322 assert_eq!(m, 1);
16323 assert_eq!(sa[t.len() - 1], 1);
16324 assert_eq!(buckets.iter().sum::<SaSint>(), t.len() as SaSint);
16325 }
16326
16327 #[test]
16328 fn get_bucket_stride_prefers_aligned_sizes_when_space_allows() {
16329 assert_eq!(get_bucket_stride(8192, 1000, 2), 1024);
16330 assert_eq!(get_bucket_stride(256, 17, 2), 32);
16331 assert_eq!(get_bucket_stride(8, 17, 2), 17);
16332 }
16333
16334 #[test]
16335 fn count_suffixes_32s_counts_symbol_histogram() {
16336 let t = vec![2, 1, 2, 3, 1, 0, 2];
16337 let mut buckets = vec![0; 4];
16338 count_suffixes_32s(&t, t.len() as SaSint, 4, &mut buckets);
16339 assert_eq!(buckets, vec![1, 2, 3, 1]);
16340 }
16341
16342 #[test]
16343 fn initialize_buckets_start_and_end_8u_sets_ranges_and_freq() {
16344 let mut buckets = vec![0; 8 * ALPHABET_SIZE];
16345 buckets[buckets_index4(0, 0)] = 1;
16346 buckets[buckets_index4(1, 1)] = 2;
16347 buckets[buckets_index4(2, 3)] = 3;
16348 let mut freq = vec![0; ALPHABET_SIZE];
16349 let k = initialize_buckets_start_and_end_8u(&mut buckets, Some(&mut freq));
16350 assert_eq!(k, 3);
16351 assert_eq!(freq[0], 1);
16352 assert_eq!(freq[1], 2);
16353 assert_eq!(freq[2], 3);
16354 assert_eq!(buckets[6 * ALPHABET_SIZE], 0);
16355 assert_eq!(buckets[7 * ALPHABET_SIZE], 1);
16356 assert_eq!(buckets[6 * ALPHABET_SIZE + 1], 1);
16357 assert_eq!(buckets[7 * ALPHABET_SIZE + 1], 3);
16358 }
16359
16360 #[test]
16361 fn initialize_buckets_start_and_end_32s_6k_sets_prefix_ranges() {
16362 let k = 3;
16363 let mut buckets = vec![0; 6 * k];
16364 buckets[buckets_index4(0, 0)] = 1;
16365 buckets[buckets_index4(0, 1)] = 2;
16366 buckets[buckets_index4(1, 2)] = 3;
16367 buckets[buckets_index4(2, 3)] = 4;
16368 initialize_buckets_start_and_end_32s_6k(k as SaSint, &mut buckets);
16369 assert_eq!(&buckets[4 * k..5 * k], &[0, 3, 6]);
16370 assert_eq!(&buckets[5 * k..6 * k], &[3, 6, 10]);
16371 }
16372
16373 #[test]
16374 fn initialize_buckets_start_and_end_32s_4k_sets_prefix_ranges() {
16375 let k = 3;
16376 let mut buckets = vec![0; 4 * k];
16377 buckets[buckets_index2(0, 0)] = 1;
16378 buckets[buckets_index2(0, 1)] = 2;
16379 buckets[buckets_index2(1, 0)] = 3;
16380 buckets[buckets_index2(2, 1)] = 4;
16381 initialize_buckets_start_and_end_32s_4k(k as SaSint, &mut buckets);
16382 assert_eq!(&buckets[2 * k..3 * k], &[0, 3, 6]);
16383 assert_eq!(&buckets[3 * k..4 * k], &[3, 6, 10]);
16384 }
16385
16386 #[test]
16387 fn initialize_buckets_end_32s_2k_rewrites_first_lanes_to_end_positions() {
16388 let k = 3;
16389 let mut buckets = vec![1, 2, 3, 4, 5, 6];
16390 initialize_buckets_end_32s_2k(k as SaSint, &mut buckets);
16391 assert_eq!(buckets[0], 3);
16392 assert_eq!(buckets[2], 10);
16393 assert_eq!(buckets[4], 21);
16394 }
16395
16396 #[test]
16397 fn initialize_buckets_start_and_end_32s_2k_copies_start_positions() {
16398 let k = 3;
16399 let mut buckets = vec![3, 2, 10, 4, 21, 6];
16400 initialize_buckets_start_and_end_32s_2k(k as SaSint, &mut buckets);
16401 assert_eq!(&buckets[..k], &[3, 10, 21]);
16402 assert_eq!(&buckets[k..2 * k], &[0, 3, 10]);
16403 }
16404
16405 #[test]
16406 fn initialize_buckets_start_32s_1k_builds_prefix_starts() {
16407 let mut buckets = vec![1, 2, 3];
16408 initialize_buckets_start_32s_1k(3, &mut buckets);
16409 assert_eq!(buckets, vec![0, 1, 3]);
16410 }
16411
16412 #[test]
16413 fn initialize_buckets_end_32s_1k_builds_prefix_ends() {
16414 let mut buckets = vec![1, 2, 3];
16415 initialize_buckets_end_32s_1k(3, &mut buckets);
16416 assert_eq!(buckets, vec![1, 3, 6]);
16417 }
16418
16419 #[test]
16420 fn initialize_buckets_for_lms_suffixes_radix_sort_8u_returns_total_lms_slots() {
16421 let t = vec![2_u8, 1, 3, 1, 0];
16422 let mut buckets = vec![0; 6 * ALPHABET_SIZE];
16423 buckets[buckets_index4(0, 1)] = 1;
16424 buckets[buckets_index4(1, 3)] = 2;
16425 let sum = initialize_buckets_for_lms_suffixes_radix_sort_8u(&t, &mut buckets, 4);
16426 assert!(sum >= 0);
16427 }
16428
16429 #[test]
16430 fn initialize_buckets_for_lms_suffixes_radix_sort_32s_2k_rewrites_two_lane_prefixes() {
16431 let t = vec![2, 1, 3, 1, 0];
16432 let mut buckets = vec![0; 2 * 4];
16433 initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(&t, 4, &mut buckets, 4);
16434 assert!(buckets.iter().any(|&v| v != 0));
16435 }
16436
16437 #[test]
16438 fn initialize_buckets_for_lms_suffixes_radix_sort_32s_6k_returns_total_lms_slots() {
16439 let t = vec![2, 1, 3, 1, 0];
16440 let mut buckets = vec![0; 6 * 4];
16441 buckets[buckets_index4(0, 1)] = 1;
16442 buckets[buckets_index4(1, 3)] = 2;
16443 let sum = initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(&t, 4, &mut buckets, 4);
16444 assert!(sum >= 0);
16445 }
16446
16447 #[test]
16448 fn initialize_buckets_for_radix_and_partial_sorting_32s_4k_sets_start_end_views() {
16449 let t = vec![2, 1, 3, 1, 0];
16450 let k = 4usize;
16451 let mut buckets = vec![0; 4 * k];
16452 buckets[buckets_index2(0, 0)] = 1;
16453 buckets[buckets_index2(0, 1)] = 2;
16454 buckets[buckets_index2(1, 0)] = 3;
16455 initialize_buckets_for_radix_and_partial_sorting_32s_4k(&t, k as SaSint, &mut buckets, 4);
16456 assert_eq!(buckets[2 * k], 0);
16457 assert!(buckets[3 * k] >= buckets[2 * k]);
16458 }
16459
16460 #[test]
16461 fn radix_sort_lms_suffixes_8u_places_suffixes_by_bucket() {
16462 let t = vec![1_u8, 0, 1, 0];
16463 let mut sa = vec![9, 9, 9, 9, 0, 1, 2, 3];
16464 let mut induction_bucket = vec![0; 2 * ALPHABET_SIZE];
16465 induction_bucket[buckets_index2(0, 0)] = 2;
16466 induction_bucket[buckets_index2(1, 0)] = 4;
16467 radix_sort_lms_suffixes_8u(&t, &mut sa, &mut induction_bucket, 4, 4);
16468 assert_eq!(&sa[..4], &[1, 3, 0, 2]);
16469 }
16470
16471 #[test]
16472 fn radix_sort_lms_suffixes_8u_omp_wraps_sequential_version() {
16473 let t = vec![9_u8, 1, 0, 1, 0];
16474 let mut sa = vec![9, 9, 9, 9, 9, 1, 2, 3, 4];
16475 let mut buckets = vec![0; 6 * ALPHABET_SIZE];
16476 buckets[4 * ALPHABET_SIZE + buckets_index2(0, 0)] = 2;
16477 buckets[4 * ALPHABET_SIZE + buckets_index2(1, 0)] = 4;
16478 let mut thread_state = alloc_thread_state(2).unwrap();
16479 radix_sort_lms_suffixes_8u_omp(&t, &mut sa, 9, 5, 0, &mut buckets, 2, &mut thread_state);
16480 assert_eq!(&sa[..4], &[2, 4, 1, 3]);
16481 }
16482
16483 #[test]
16484 fn radix_sort_lms_suffixes_8u_omp_uses_thread_state_for_large_inputs() {
16485 let m = 65_600usize;
16486 let n = 2 * m + 16;
16487 let start = n - m + 1;
16488 let t: Vec<u8> = (0..n).map(|i| (i % 4) as u8).collect();
16489 let suffixes: Vec<SaSint> = (0..m - 1).map(|i| i as SaSint).collect();
16490
16491 let mut buckets = vec![0; 6 * ALPHABET_SIZE];
16492 for &suffix in &suffixes {
16493 buckets[4 * ALPHABET_SIZE + buckets_index2(t[suffix as usize] as usize, 0)] += 1;
16494 }
16495 let mut sum = 0;
16496 for symbol in 0..ALPHABET_SIZE {
16497 let bucket = 4 * ALPHABET_SIZE + buckets_index2(symbol, 0);
16498 sum += buckets[bucket];
16499 buckets[bucket] = sum;
16500 }
16501
16502 let mut sa_single = vec![0; n];
16503 sa_single[start..start + suffixes.len()].copy_from_slice(&suffixes);
16504 let mut sa_threaded = sa_single.clone();
16505 let mut buckets_single = buckets.clone();
16506 let mut buckets_threaded = buckets;
16507 let mut thread_state = alloc_thread_state(4).unwrap();
16508 thread_state[3].m = m as FastSint;
16509
16510 radix_sort_lms_suffixes_8u_omp(
16511 &t,
16512 &mut sa_single,
16513 n as SaSint,
16514 m as SaSint,
16515 0,
16516 &mut buckets_single,
16517 1,
16518 &mut [],
16519 );
16520 radix_sort_lms_suffixes_8u_omp(
16521 &t,
16522 &mut sa_threaded,
16523 n as SaSint,
16524 m as SaSint,
16525 0,
16526 &mut buckets_threaded,
16527 4,
16528 &mut thread_state,
16529 );
16530
16531 assert_eq!(sa_threaded, sa_single);
16532 }
16533
16534 #[test]
16535 fn radix_sort_lms_suffixes_32s_6k_places_suffixes_by_bucket() {
16536 let t = vec![1, 0, 1, 0];
16537 let mut sa = vec![9, 9, 9, 9, 0, 1, 2, 3];
16538 let mut induction_bucket = vec![2, 4];
16539 radix_sort_lms_suffixes_32s_6k(&t, &mut sa, &mut induction_bucket, 4, 4);
16540 assert_eq!(&sa[..4], &[1, 3, 0, 2]);
16541 }
16542
16543 #[test]
16544 fn radix_sort_lms_suffixes_32s_2k_places_suffixes_by_bucket() {
16545 let t = vec![1, 0, 1, 0];
16546 let mut sa = vec![9, 9, 9, 9, 0, 1, 2, 3];
16547 let mut induction_bucket = vec![2, 0, 4, 0];
16548 radix_sort_lms_suffixes_32s_2k(&t, &mut sa, &mut induction_bucket, 4, 4);
16549 assert_eq!(&sa[..4], &[1, 3, 0, 2]);
16550 }
16551
16552 #[test]
16553 fn radix_sort_lms_suffixes_32s_6k_omp_wraps_sequential_version() {
16554 let t = vec![9, 1, 0, 1, 0];
16555 let mut sa = vec![9, 9, 9, 9, 9, 1, 2, 3, 4];
16556 let mut induction_bucket = vec![2, 4];
16557 let mut thread_state = alloc_thread_state(2).unwrap();
16558 radix_sort_lms_suffixes_32s_6k_omp(
16559 &t,
16560 &mut sa,
16561 9,
16562 5,
16563 &mut induction_bucket,
16564 2,
16565 &mut thread_state,
16566 );
16567 assert_eq!(&sa[..4], &[2, 4, 1, 3]);
16568 }
16569
16570 #[test]
16571 fn radix_sort_lms_suffixes_32s_2k_omp_wraps_sequential_version() {
16572 let t = vec![9, 1, 0, 1, 0];
16573 let mut sa = vec![9, 9, 9, 9, 9, 1, 2, 3, 4];
16574 let mut induction_bucket = vec![2, 0, 4, 0];
16575 let mut thread_state = alloc_thread_state(2).unwrap();
16576 radix_sort_lms_suffixes_32s_2k_omp(
16577 &t,
16578 &mut sa,
16579 9,
16580 5,
16581 &mut induction_bucket,
16582 2,
16583 &mut thread_state,
16584 );
16585 assert_eq!(&sa[..4], &[2, 4, 1, 3]);
16586 }
16587
16588 #[test]
16589 fn radix_sort_lms_suffixes_32s_block_omp_runs_cache_pipeline() {
16590 let t = vec![9, 1, 0, 1, 0];
16591 let mut sa_6k = vec![9, 9, 9, 9, 9, 1, 2, 3, 4];
16592 let mut bucket_6k = vec![2, 4];
16593 let mut cache = vec![ThreadCache::default(); 9];
16594 radix_sort_lms_suffixes_32s_6k_block_omp(
16595 &t,
16596 &mut sa_6k,
16597 &mut bucket_6k,
16598 &mut cache,
16599 5,
16600 4,
16601 2,
16602 );
16603 assert_eq!(&sa_6k[..4], &[2, 4, 1, 3]);
16604
16605 let mut sa_2k = vec![9, 9, 9, 9, 9, 1, 2, 3, 4];
16606 let mut bucket_2k = vec![2, 0, 4, 0];
16607 cache.fill(ThreadCache::default());
16608 radix_sort_lms_suffixes_32s_2k_block_omp(
16609 &t,
16610 &mut sa_2k,
16611 &mut bucket_2k,
16612 &mut cache,
16613 5,
16614 4,
16615 2,
16616 );
16617 assert_eq!(&sa_2k[..4], &[2, 4, 1, 3]);
16618 }
16619
16620 #[test]
16621 fn radix_sort_lms_suffixes_32s_omp_uses_block_pipeline_for_large_inputs() {
16622 let m = 65_600usize;
16623 let n = 2 * m + 16;
16624 let start = n - m + 1;
16625 let t: Vec<SaSint> = (0..n).map(|i| (i % 4) as SaSint).collect();
16626 let suffixes: Vec<SaSint> = (0..m - 1).map(|i| i as SaSint).collect();
16627
16628 let mut bucket_ends = vec![0; 4];
16629 for &suffix in &suffixes {
16630 bucket_ends[t[suffix as usize] as usize] += 1;
16631 }
16632 let mut sum = 0;
16633 for bucket in &mut bucket_ends {
16634 sum += *bucket;
16635 *bucket = sum;
16636 }
16637
16638 let mut sa_single = vec![0; n];
16639 sa_single[start..start + suffixes.len()].copy_from_slice(&suffixes);
16640 let mut sa_threaded = sa_single.clone();
16641 let mut bucket_single = bucket_ends.clone();
16642 let mut bucket_threaded = bucket_ends.clone();
16643 let mut thread_state = alloc_thread_state(4).unwrap();
16644
16645 radix_sort_lms_suffixes_32s_6k_omp(
16646 &t,
16647 &mut sa_single,
16648 n as SaSint,
16649 m as SaSint,
16650 &mut bucket_single,
16651 1,
16652 &mut [],
16653 );
16654 radix_sort_lms_suffixes_32s_6k_omp(
16655 &t,
16656 &mut sa_threaded,
16657 n as SaSint,
16658 m as SaSint,
16659 &mut bucket_threaded,
16660 4,
16661 &mut thread_state,
16662 );
16663 assert_eq!(sa_threaded, sa_single);
16664 assert_eq!(bucket_threaded, bucket_single);
16665
16666 let mut bucket_2k = vec![0; 8];
16667 for (symbol, &end) in bucket_ends.iter().enumerate() {
16668 bucket_2k[buckets_index2(symbol, 0)] = end;
16669 }
16670 let mut sa_single = vec![0; n];
16671 sa_single[start..start + suffixes.len()].copy_from_slice(&suffixes);
16672 let mut sa_threaded = sa_single.clone();
16673 let mut bucket_single = bucket_2k.clone();
16674 let mut bucket_threaded = bucket_2k;
16675
16676 radix_sort_lms_suffixes_32s_2k_omp(
16677 &t,
16678 &mut sa_single,
16679 n as SaSint,
16680 m as SaSint,
16681 &mut bucket_single,
16682 1,
16683 &mut [],
16684 );
16685 radix_sort_lms_suffixes_32s_2k_omp(
16686 &t,
16687 &mut sa_threaded,
16688 n as SaSint,
16689 m as SaSint,
16690 &mut bucket_threaded,
16691 4,
16692 &mut thread_state,
16693 );
16694 assert_eq!(sa_threaded, sa_single);
16695 assert_eq!(bucket_threaded, bucket_single);
16696 }
16697
16698 #[test]
16699 fn radix_sort_lms_suffixes_32s_1k_collects_lms_suffixes() {
16700 let t = vec![2, 1, 3, 1, 0];
16701 let mut sa = vec![0; t.len()];
16702 let mut buckets = vec![0, 2, 4, 5];
16703 let m = radix_sort_lms_suffixes_32s_1k(&t, &mut sa, t.len() as SaSint, &mut buckets);
16704 assert!(m >= 0);
16705 }
16706
16707 #[test]
16708 fn radix_sort_set_markers_32s_6k_marks_target_suffixes() {
16709 let mut sa = vec![0; 6];
16710 let induction_bucket = vec![1, 3, 5];
16711 radix_sort_set_markers_32s_6k(&mut sa, &induction_bucket, 0, 3);
16712 assert_eq!(sa[1], SAINT_MIN);
16713 assert_eq!(sa[3], SAINT_MIN);
16714 assert_eq!(sa[5], SAINT_MIN);
16715 }
16716
16717 #[test]
16718 fn radix_sort_set_markers_32s_4k_marks_target_suffixes() {
16719 let mut sa = vec![0; 6];
16720 let induction_bucket = vec![1, 0, 3, 0, 5, 0];
16721 radix_sort_set_markers_32s_4k(&mut sa, &induction_bucket, 0, 3);
16722 assert_eq!(sa[1], SUFFIX_GROUP_MARKER);
16723 assert_eq!(sa[3], SUFFIX_GROUP_MARKER);
16724 assert_eq!(sa[5], SUFFIX_GROUP_MARKER);
16725 }
16726
16727 #[test]
16728 fn radix_sort_set_markers_32s_6k_omp_wraps_sequential_version() {
16729 let mut sa = vec![0; 6];
16730 let induction_bucket = vec![1, 3, 5];
16731 radix_sort_set_markers_32s_6k_omp(&mut sa, 4, &induction_bucket, 2);
16732 assert_eq!(sa[1], SAINT_MIN);
16733 assert_eq!(sa[3], SAINT_MIN);
16734 assert_eq!(sa[5], SAINT_MIN);
16735 }
16736
16737 #[test]
16738 fn radix_sort_set_markers_32s_4k_omp_wraps_sequential_version() {
16739 let mut sa = vec![0; 6];
16740 let induction_bucket = vec![1, 0, 3, 0, 5, 0];
16741 radix_sort_set_markers_32s_4k_omp(&mut sa, 4, &induction_bucket, 2);
16742 assert_eq!(sa[1], SUFFIX_GROUP_MARKER);
16743 assert_eq!(sa[3], SUFFIX_GROUP_MARKER);
16744 assert_eq!(sa[5], SUFFIX_GROUP_MARKER);
16745 }
16746
16747 #[test]
16748 fn radix_sort_set_markers_32s_omp_partitions_large_inputs() {
16749 let k = 65_600usize;
16750 let induction_bucket_6k: Vec<SaSint> = (0..k).map(|i| i as SaSint).collect();
16751 let mut sa_single = vec![0; k];
16752 let mut sa_threaded = vec![0; k];
16753 radix_sort_set_markers_32s_6k_omp(&mut sa_single, k as SaSint, &induction_bucket_6k, 1);
16754 radix_sort_set_markers_32s_6k_omp(&mut sa_threaded, k as SaSint, &induction_bucket_6k, 4);
16755 assert_eq!(sa_threaded, sa_single);
16756
16757 let mut induction_bucket_4k = vec![0; 2 * k];
16758 for i in 0..k {
16759 induction_bucket_4k[buckets_index2(i, 0)] = i as SaSint;
16760 }
16761 let mut sa_single = vec![0; k];
16762 let mut sa_threaded = vec![0; k];
16763 radix_sort_set_markers_32s_4k_omp(&mut sa_single, k as SaSint, &induction_bucket_4k, 1);
16764 radix_sort_set_markers_32s_4k_omp(&mut sa_threaded, k as SaSint, &induction_bucket_4k, 4);
16765 assert_eq!(sa_threaded, sa_single);
16766 }
16767
16768 #[test]
16769 fn initialize_buckets_for_partial_sorting_8u_sets_start_and_distinct_views() {
16770 let t = vec![2_u8, 1, 3, 1, 0];
16771 let mut buckets = vec![0; 6 * ALPHABET_SIZE];
16772 buckets[buckets_index4(0, 0)] = 1;
16773 buckets[buckets_index4(0, 2)] = 2;
16774 initialize_buckets_for_partial_sorting_8u(&t, &mut buckets, 4, 3);
16775 assert!(buckets[0] >= 4);
16776 assert!(buckets[1] >= 0);
16777 assert!(buckets[4 * ALPHABET_SIZE] >= 4);
16778 }
16779
16780 #[test]
16781 fn initialize_buckets_for_partial_sorting_32s_6k_rewrites_bucket_views() {
16782 let t = vec![2, 1, 3, 1, 0];
16783 let k = 4usize;
16784 let mut buckets = vec![0; 6 * k];
16785 buckets[buckets_index4(0, 0)] = 1;
16786 buckets[buckets_index4(0, 1)] = 2;
16787 buckets[buckets_index4(1, 2)] = 3;
16788 initialize_buckets_for_partial_sorting_32s_6k(&t, k as SaSint, &mut buckets, 4, 3);
16789 assert!(buckets[0] >= 4);
16790 assert!(buckets[4 * k] >= 4);
16791 }
16792
16793 #[test]
16794 fn partial_sorting_scan_left_to_right_8u_emits_induced_suffixes() {
16795 let t = vec![2_u8, 1, 3, 1, 0];
16796 let mut sa = vec![2 | SAINT_MIN, 4, 0, 0, 0, 0];
16797 let mut buckets = vec![0; 6 * ALPHABET_SIZE];
16798 buckets[4 * ALPHABET_SIZE + buckets_index2(1, 0)] = 2;
16799 let d = partial_sorting_scan_left_to_right_8u(&t, &mut sa, &mut buckets, 0, 0, 2);
16800 assert!(d >= 0);
16801 assert!(sa.iter().any(|&v| v != 0));
16802 }
16803
16804 #[test]
16805 fn partial_sorting_scan_left_to_right_8u_omp_wraps_sequential_version() {
16806 let t = vec![2_u8, 1, 3, 1, 0];
16807 let mut sa = vec![0; 8];
16808 let mut buckets = vec![0; 6 * ALPHABET_SIZE];
16809 buckets[4 * ALPHABET_SIZE + buckets_index2(0, 0)] = 1;
16810 let mut thread_state = alloc_thread_state(2).unwrap();
16811 let d = partial_sorting_scan_left_to_right_8u_omp(
16812 &t,
16813 &mut sa,
16814 5,
16815 4,
16816 &mut buckets,
16817 0,
16818 0,
16819 2,
16820 &mut thread_state,
16821 );
16822 assert!(d >= 1);
16823 }
16824
16825 #[test]
16826 fn partial_sorting_scan_left_to_right_32s_6k_emits_induced_suffixes() {
16827 let t = vec![2, 1, 3, 1, 0];
16828 let mut sa = vec![2 | SAINT_MIN, 4, 0, 0, 0, 0];
16829 let mut buckets = vec![0; 4 * 4];
16830 buckets[buckets_index4(1, 0)] = 2;
16831 let d = partial_sorting_scan_left_to_right_32s_6k(&t, &mut sa, &mut buckets, 0, 0, 2);
16832 assert!(d >= 0);
16833 assert!(sa.iter().any(|&v| v != 0));
16834 }
16835
16836 #[test]
16837 fn partial_sorting_scan_left_to_right_32s_4k_emits_induced_suffixes() {
16838 let t = vec![2, 1, 3, 1, 0];
16839 let k = 4usize;
16840 let mut sa = vec![2 | SUFFIX_GROUP_MARKER, 4, 0, 0, 0, 0];
16841 let mut buckets = vec![0; 4 * k];
16842 buckets[2 * k + 1] = 2;
16843 let d = partial_sorting_scan_left_to_right_32s_4k(
16844 &t,
16845 &mut sa,
16846 k as SaSint,
16847 &mut buckets,
16848 0,
16849 0,
16850 2,
16851 );
16852 assert!(d >= 0);
16853 assert!(sa.iter().any(|&v| v != 0));
16854 }
16855
16856 #[test]
16857 fn partial_sorting_scan_left_to_right_32s_1k_emits_induced_suffixes() {
16858 let t = vec![2, 1, 3, 1, 0];
16859 let mut sa = vec![2, 4, 0, 0, 0, 0];
16860 let mut buckets = vec![0; 4];
16861 buckets[1] = 2;
16862 partial_sorting_scan_left_to_right_32s_1k(&t, &mut sa, &mut buckets, 0, 2);
16863 assert!(sa.iter().any(|&v| v != 0));
16864 }
16865
16866 #[test]
16867 fn partial_sorting_scan_left_to_right_32s_6k_omp_wraps_sequential_version() {
16868 let t = vec![2, 1, 3, 1, 0];
16869 let mut sa = vec![0; 8];
16870 let mut buckets = vec![0; 4 * 4];
16871 let mut thread_state = alloc_thread_state(2).unwrap();
16872 let d = partial_sorting_scan_left_to_right_32s_6k_omp(
16873 &t,
16874 &mut sa,
16875 5,
16876 &mut buckets,
16877 0,
16878 0,
16879 2,
16880 &mut thread_state,
16881 );
16882 assert!(d >= 1);
16883 }
16884
16885 #[test]
16886 fn partial_sorting_scan_left_to_right_32s_4k_omp_wraps_sequential_version() {
16887 let t = vec![2, 1, 3, 1, 0];
16888 let k = 4usize;
16889 let mut sa = vec![0; 8];
16890 let mut buckets = vec![0; 4 * k];
16891 let mut thread_state = alloc_thread_state(2).unwrap();
16892 let d = partial_sorting_scan_left_to_right_32s_4k_omp(
16893 &t,
16894 &mut sa,
16895 5,
16896 k as SaSint,
16897 &mut buckets,
16898 0,
16899 2,
16900 &mut thread_state,
16901 );
16902 assert!(d >= 1);
16903 }
16904
16905 #[test]
16906 fn partial_sorting_scan_left_to_right_32s_1k_omp_wraps_sequential_version() {
16907 let t = vec![2, 1, 3, 1, 0];
16908 let mut sa = vec![0; 8];
16909 let mut buckets = vec![0; 4];
16910 let mut thread_state = alloc_thread_state(2).unwrap();
16911 partial_sorting_scan_left_to_right_32s_1k_omp(
16912 &t,
16913 &mut sa,
16914 5,
16915 &mut buckets,
16916 2,
16917 &mut thread_state,
16918 );
16919 assert!(sa.iter().any(|&v| v != 0));
16920 }
16921
16922 #[test]
16923 fn partial_sorting_scan_left_to_right_32s_6k_block_gather_records_bucket_symbols() {
16924 let t = vec![3, 1, 2, 0];
16925 let mut sa = vec![2 | SAINT_MIN, 0, 0, 0];
16926 let mut cache = vec![ThreadCache::default(); 1];
16927
16928 partial_sorting_scan_left_to_right_32s_6k_block_gather(&t, &mut sa, &mut cache, 0, 1);
16929
16930 assert_eq!(cache[0].index, 2 | SAINT_MIN);
16931 assert_eq!(cache[0].symbol, buckets_index4(1, 1) as SaSint);
16932 }
16933
16934 #[test]
16935 fn partial_sorting_scan_left_to_right_32s_1k_block_gather_zeroes_positive_entries() {
16936 let t = vec![3, 1, 2, 0];
16937 let mut sa = vec![2, 0, 0, 0];
16938 let mut cache = vec![ThreadCache::default(); 1];
16939
16940 partial_sorting_scan_left_to_right_32s_1k_block_gather(&t, &mut sa, &mut cache, 0, 1);
16941
16942 assert_eq!(cache[0].symbol, 1);
16943 assert_eq!(cache[0].index, 1);
16944 assert_eq!(sa[0], 0);
16945 }
16946
16947 #[test]
16948 fn partial_sorting_scan_left_to_right_32s_1k_block_omp_uses_relative_cache() {
16949 let block_start = 20_000usize;
16950 let block_size = 16_384usize;
16951 let n = block_start + block_size + 8;
16952 let t = vec![1; n];
16953 let suffixes: Vec<SaSint> = (2..2 + block_size).map(|i| i as SaSint).collect();
16954
16955 let mut sa_single = vec![0; n];
16956 sa_single[block_start..block_start + block_size].copy_from_slice(&suffixes);
16957 let mut sa_threaded = sa_single.clone();
16958 let mut bucket_single = vec![0, 0];
16959 let mut bucket_threaded = bucket_single.clone();
16960 let mut cache = vec![ThreadCache::default(); 4 * LIBSAIS_PER_THREAD_CACHE_SIZE];
16961
16962 partial_sorting_scan_left_to_right_32s_1k(
16963 &t,
16964 &mut sa_single,
16965 &mut bucket_single,
16966 block_start as FastSint,
16967 block_size as FastSint,
16968 );
16969 partial_sorting_scan_left_to_right_32s_1k_block_omp(
16970 &t,
16971 &mut sa_threaded,
16972 &mut bucket_threaded,
16973 &mut cache,
16974 block_start as FastSint,
16975 block_size as FastSint,
16976 4,
16977 );
16978
16979 assert_eq!(sa_threaded, sa_single);
16980 assert_eq!(bucket_threaded, bucket_single);
16981 }
16982
16983 #[test]
16984 fn partial_sorting_scan_left_to_right_8u_block_prepare_records_cache_and_counts() {
16985 let t = vec![2_u8, 1, 3, 1, 0];
16986 let sa = vec![2 | SAINT_MIN, 4, 0, 0, 0, 0];
16987 let k = 4;
16988 let mut buckets = vec![0; 4 * k];
16989 let mut cache = vec![ThreadCache::default(); 8];
16990 let mut state = ThreadState::new();
16991 let (position, count) = partial_sorting_scan_left_to_right_8u_block_prepare(
16992 &t,
16993 &sa,
16994 k as SaSint,
16995 &mut buckets,
16996 &mut cache,
16997 0,
16998 2,
16999 );
17000 state.position = position;
17001 state.count = count;
17002 assert!(state.count >= 1);
17003 assert!(cache
17004 .iter()
17005 .take(state.count as usize)
17006 .any(|entry| entry.symbol >= 0));
17007 }
17008
17009 #[test]
17010 fn partial_sorting_scan_left_to_right_8u_block_place_writes_induced_values() {
17011 let mut sa = vec![0; 8];
17012 let mut buckets = vec![0; 8];
17013 buckets[0] = 0;
17014 buckets[1] = 1;
17015 let cache = vec![
17016 ThreadCache {
17017 index: 3 | SAINT_MIN,
17018 symbol: 0,
17019 },
17020 ThreadCache {
17021 index: 5,
17022 symbol: 1,
17023 },
17024 ];
17025 partial_sorting_scan_left_to_right_8u_block_place(&mut sa, &mut buckets, 2, &cache, 2, 0);
17026 assert!(sa[0] != 0 || sa[1] != 0);
17027 }
17028
17029 #[test]
17030 fn partial_sorting_scan_left_to_right_8u_block_omp_wraps_sequential_version() {
17031 let t = vec![2_u8, 1, 3, 1, 0];
17032 let mut sa = vec![2 | SAINT_MIN, 4, 0, 0, 0, 0];
17033 let mut buckets = vec![0; 6 * ALPHABET_SIZE];
17034 let mut thread_state = alloc_thread_state(2).unwrap();
17035 let d = partial_sorting_scan_left_to_right_8u_block_omp(
17036 &t,
17037 &mut sa,
17038 4,
17039 &mut buckets,
17040 0,
17041 0,
17042 2,
17043 2,
17044 &mut thread_state,
17045 );
17046 assert!(d >= 0);
17047 }
17048
17049 #[test]
17050 fn partial_sorting_shift_markers_8u_omp_toggles_segment_markers() {
17051 let mut sa = vec![1 | SAINT_MIN, 2 | SAINT_MIN, 3, 4 | SAINT_MIN, 5];
17052 let mut buckets = vec![0; 6 * ALPHABET_SIZE];
17053 buckets[4 * ALPHABET_SIZE + buckets_index2(1, 0)] = 5;
17054 buckets[buckets_index2(0, 0)] = 0;
17055 let len = sa.len() as SaSint;
17056 partial_sorting_shift_markers_8u_omp(&mut sa, len, &buckets, 1);
17057 assert!(sa.iter().any(|&v| (v & SAINT_MIN) == 0));
17058 }
17059
17060 #[test]
17061 fn partial_sorting_shift_markers_32s_6k_omp_toggles_segment_markers() {
17062 let mut sa = vec![1 | SAINT_MIN, 2 | SAINT_MIN, 3, 4 | SAINT_MIN, 5];
17063 let k = 3usize;
17064 let mut buckets = vec![0; 6 * k];
17065 buckets[buckets_index4(1, 0)] = 5;
17066 buckets[4 * k + buckets_index2(0, 0)] = 0;
17067 partial_sorting_shift_markers_32s_6k_omp(&mut sa, k as SaSint, &buckets, 1);
17068 assert!(sa.iter().any(|&v| (v & SAINT_MIN) == 0));
17069 }
17070
17071 #[test]
17072 fn partial_sorting_shift_markers_32s_4k_toggles_group_markers() {
17073 let mut sa = vec![
17074 1 | SUFFIX_GROUP_MARKER,
17075 2 | SUFFIX_GROUP_MARKER,
17076 3,
17077 4 | SUFFIX_GROUP_MARKER,
17078 ];
17079 let len = sa.len() as SaSint;
17080 partial_sorting_shift_markers_32s_4k(&mut sa, len);
17081 assert!(sa.iter().any(|&v| (v & SUFFIX_GROUP_MARKER) == 0));
17082 }
17083
17084 #[test]
17085 fn partial_sorting_shift_buckets_32s_6k_moves_temp_bucket_view_into_main_slots() {
17086 let k = 3usize;
17087 let mut buckets = vec![0; 6 * k];
17088 buckets[4 * k + 0] = 10;
17089 buckets[4 * k + 1] = 11;
17090 buckets[4 * k + 2] = 12;
17091 buckets[4 * k + 3] = 13;
17092 partial_sorting_shift_buckets_32s_6k(k as SaSint, &mut buckets);
17093 assert_eq!(buckets[0], 10);
17094 assert_eq!(buckets[1], 11);
17095 assert_eq!(buckets[4], 12);
17096 assert_eq!(buckets[5], 13);
17097 }
17098
17099 #[test]
17100 fn partial_sorting_scan_right_to_left_8u_emits_induced_suffixes() {
17101 let t = vec![0_u8, 1, 2, 1, 0];
17102 let mut sa = vec![0, 0, 4 | SAINT_MIN];
17103 let mut buckets = vec![0; 4 * ALPHABET_SIZE];
17104 buckets[buckets_index2(1, 1)] = 2;
17105
17106 let d = partial_sorting_scan_right_to_left_8u(&t, &mut sa, &mut buckets, 0, 2, 1);
17107
17108 assert_eq!(d, 1);
17109 assert_eq!(sa[1], 3 | SAINT_MIN);
17110 assert_eq!(buckets[buckets_index2(1, 1)], 1);
17111 assert_eq!(buckets[2 * ALPHABET_SIZE + buckets_index2(1, 1)], 1);
17112 }
17113
17114 #[test]
17115 fn partial_gsa_scan_right_to_left_8u_skips_separator_bucket() {
17116 let t = vec![1_u8, 0, 0];
17117 let mut sa = vec![0, 2 | SAINT_MIN];
17118 let mut buckets = vec![0; 4 * ALPHABET_SIZE];
17119 buckets[buckets_index2(0, 1)] = 2;
17120
17121 let d = partial_gsa_scan_right_to_left_8u(&t, &mut sa, &mut buckets, 0, 1, 1);
17122
17123 assert_eq!(d, 1);
17124 assert_eq!(sa, vec![0, 2 | SAINT_MIN]);
17125 assert_eq!(buckets[buckets_index2(0, 1)], 2);
17126 }
17127
17128 #[test]
17129 fn partial_sorting_scan_right_to_left_32s_6k_emits_induced_suffixes() {
17130 let t = vec![0, 1, 2, 1, 0];
17131 let mut sa = vec![0, 0, 4 | SAINT_MIN];
17132 let mut buckets = vec![0; 4 * 3];
17133 buckets[buckets_index4(1, 1)] = 2;
17134
17135 let d = partial_sorting_scan_right_to_left_32s_6k(&t, &mut sa, &mut buckets, 0, 2, 1);
17136
17137 assert_eq!(d, 1);
17138 assert_eq!(sa[1], 3 | SAINT_MIN);
17139 assert_eq!(buckets[buckets_index4(1, 1)], 1);
17140 assert_eq!(buckets[buckets_index4(1, 1) + 2], 1);
17141 }
17142
17143 #[test]
17144 fn partial_sorting_scan_right_to_left_32s_1k_omp_wraps_sequential_version() {
17145 let t = vec![0, 1, 2, 1, 0];
17146 let mut sa = vec![0, 0, 4];
17147 let mut buckets = vec![0; 3];
17148 buckets[1] = 2;
17149 let mut thread_state = alloc_thread_state(2).unwrap();
17150
17151 partial_sorting_scan_right_to_left_32s_1k_omp(
17152 &t,
17153 &mut sa,
17154 3,
17155 &mut buckets,
17156 2,
17157 &mut thread_state,
17158 );
17159
17160 assert_eq!(sa[1], 3 | SAINT_MIN);
17161 assert_eq!(buckets[1], 1);
17162 }
17163
17164 #[test]
17165 fn partial_sorting_scan_right_to_left_32s_6k_block_gather_records_symbols() {
17166 let t = vec![0, 1, 2, 1, 0];
17167 let sa = vec![0, 4 | SAINT_MIN, 0];
17168 let mut cache = vec![ThreadCache::default(); sa.len()];
17169
17170 partial_sorting_scan_right_to_left_32s_6k_block_gather(&t, &sa, &mut cache, 1, 1);
17171
17172 assert_eq!(cache[0].index, 4 | SAINT_MIN);
17173 assert_eq!(cache[0].symbol, buckets_index4(1, 1) as SaSint);
17174 }
17175
17176 #[test]
17177 fn partial_sorting_scan_right_to_left_32s_4k_block_gather_zeroes_positive_entries() {
17178 let t = vec![0, 1, 2, 1, 0];
17179 let mut sa = vec![0, 4 | SUFFIX_GROUP_MARKER, 0];
17180 let mut cache = vec![ThreadCache::default(); sa.len()];
17181
17182 partial_sorting_scan_right_to_left_32s_4k_block_gather(&t, &mut sa, &mut cache, 1, 1);
17183
17184 assert_eq!(sa[1], 0);
17185 assert_eq!(cache[0].index, 4 | SUFFIX_GROUP_MARKER);
17186 assert_eq!(cache[0].symbol, buckets_index2(1, 1) as SaSint);
17187 }
17188
17189 #[test]
17190 fn partial_sorting_scan_right_to_left_32s_1k_block_gather_stores_preinduced_entries() {
17191 let t = vec![0, 1, 2, 1, 0];
17192 let mut sa = vec![0, 4, 0];
17193 let mut cache = vec![ThreadCache::default(); sa.len()];
17194
17195 partial_sorting_scan_right_to_left_32s_1k_block_gather(&t, &mut sa, &mut cache, 1, 1);
17196
17197 assert_eq!(sa[1], 0);
17198 assert_eq!(cache[0].index, 3 | SAINT_MIN);
17199 assert_eq!(cache[0].symbol, 1);
17200 }
17201
17202 #[test]
17203 fn partial_sorting_scan_right_to_left_32s_6k_block_sort_updates_bucket_and_marker_state() {
17204 let t = vec![0, 1, 2, 1, 0];
17205 let mut cache = vec![ThreadCache::default(); 3];
17206 cache[0].index = 4 | SAINT_MIN;
17207 cache[0].symbol = buckets_index4(1, 1) as SaSint;
17208 let mut buckets = vec![0; 4 * 3];
17209 buckets[buckets_index4(1, 1)] = 2;
17210
17211 let d = partial_sorting_scan_right_to_left_32s_6k_block_sort(
17212 &t,
17213 &mut buckets,
17214 0,
17215 &mut cache,
17216 1,
17217 1,
17218 );
17219
17220 assert_eq!(d, 1);
17221 assert_eq!(cache[0].index, 3 | SAINT_MIN);
17222 assert_eq!(buckets[buckets_index4(1, 1)], 1);
17223 assert_eq!(buckets[buckets_index4(1, 1) + 2], 1);
17224 }
17225
17226 #[test]
17227 fn partial_sorting_scan_right_to_left_32s_1k_block_omp_places_cached_suffixes() {
17228 let t = vec![0, 1, 2, 1, 0];
17229 let mut sa = vec![0, 4, 0];
17230 let mut buckets = vec![0; 3];
17231 buckets[1] = 2;
17232 let mut cache = vec![ThreadCache::default(); sa.len()];
17233
17234 partial_sorting_scan_right_to_left_32s_1k_block_omp(
17235 &t,
17236 &mut sa,
17237 &mut buckets,
17238 &mut cache,
17239 1,
17240 1,
17241 2,
17242 );
17243
17244 assert_eq!(sa[1], 3 | SAINT_MIN);
17245 assert_eq!(buckets[1], 1);
17246 }
17247
17248 #[test]
17249 fn partial_sorting_scan_right_to_left_32s_1k_block_omp_uses_relative_cache() {
17250 let block_start = 20_000usize;
17251 let block_size = 16_384usize;
17252 let n = block_start + block_size + 8;
17253 let t = vec![1; n];
17254 let suffixes: Vec<SaSint> = (2..2 + block_size).map(|i| i as SaSint).collect();
17255
17256 let mut sa_single = vec![0; n];
17257 sa_single[block_start..block_start + block_size].copy_from_slice(&suffixes);
17258 let mut sa_threaded = sa_single.clone();
17259 let mut bucket_single = vec![0, block_size as SaSint];
17260 let mut bucket_threaded = bucket_single.clone();
17261 let mut cache = vec![ThreadCache::default(); 4 * LIBSAIS_PER_THREAD_CACHE_SIZE];
17262
17263 partial_sorting_scan_right_to_left_32s_1k(
17264 &t,
17265 &mut sa_single,
17266 &mut bucket_single,
17267 block_start as FastSint,
17268 block_size as FastSint,
17269 );
17270 partial_sorting_scan_right_to_left_32s_1k_block_omp(
17271 &t,
17272 &mut sa_threaded,
17273 &mut bucket_threaded,
17274 &mut cache,
17275 block_start as FastSint,
17276 block_size as FastSint,
17277 4,
17278 );
17279
17280 assert_eq!(sa_threaded, sa_single);
17281 assert_eq!(bucket_threaded, bucket_single);
17282 }
17283
17284 #[test]
17285 fn partial_sorting_gather_lms_suffixes_32s_4k_compacts_negative_marked_entries() {
17286 let mut sa = vec![1 | SUFFIX_GROUP_MARKER, -3, 5 | SUFFIX_GROUP_MARKER, -7];
17287 let n = sa.len() as FastSint;
17288
17289 let l = partial_sorting_gather_lms_suffixes_32s_4k(&mut sa, 0, n);
17290
17291 assert_eq!(l, 2);
17292 assert_eq!(sa[0], -1073741827);
17293 assert_eq!(sa[1], -1073741831);
17294 }
17295
17296 #[test]
17297 fn partial_sorting_gather_lms_suffixes_32s_1k_compacts_negative_marked_entries() {
17298 let mut sa = vec![1, -3, 5, -7];
17299 let n = sa.len() as FastSint;
17300
17301 let l = partial_sorting_gather_lms_suffixes_32s_1k(&mut sa, 0, n);
17302
17303 assert_eq!(l, 2);
17304 assert_eq!(sa[0], SAINT_MAX - 2);
17305 assert_eq!(sa[1], SAINT_MAX - 6);
17306 }
17307
17308 #[test]
17309 fn partial_sorting_gather_lms_suffixes_32s_4k_omp_wraps_sequential_version() {
17310 let mut sa = vec![1 | SUFFIX_GROUP_MARKER, -3, 5 | SUFFIX_GROUP_MARKER, -7];
17311 let mut thread_state = alloc_thread_state(2).unwrap();
17312
17313 partial_sorting_gather_lms_suffixes_32s_4k_omp(&mut sa, 4, 2, &mut thread_state);
17314
17315 assert_eq!(sa[0], -1073741827);
17316 assert_eq!(sa[1], -1073741831);
17317 }
17318
17319 #[test]
17320 fn partial_sorting_gather_lms_suffixes_32s_1k_omp_wraps_sequential_version() {
17321 let mut sa = vec![1, -3, 5, -7];
17322 let mut thread_state = alloc_thread_state(2).unwrap();
17323
17324 partial_sorting_gather_lms_suffixes_32s_1k_omp(&mut sa, 4, 2, &mut thread_state);
17325
17326 assert_eq!(sa[0], SAINT_MAX - 2);
17327 assert_eq!(sa[1], SAINT_MAX - 6);
17328 }
17329
17330 #[test]
17331 fn partial_sorting_gather_lms_suffixes_32s_omp_uses_block_partition() {
17332 let n = 65_600usize;
17333 let input_4k: Vec<SaSint> = (0..n)
17334 .map(|i| {
17335 let value = (i as SaSint) | SUFFIX_GROUP_MARKER;
17336 if i % 5 == 0 {
17337 value | SAINT_MIN
17338 } else {
17339 value
17340 }
17341 })
17342 .collect();
17343 let count_4k = input_4k.iter().filter(|&&value| value < 0).count();
17344
17345 let mut single = input_4k.clone();
17346 let mut threaded = input_4k;
17347 let mut thread_state = alloc_thread_state(4).unwrap();
17348 partial_sorting_gather_lms_suffixes_32s_4k_omp(&mut single, n as SaSint, 1, &mut []);
17349 partial_sorting_gather_lms_suffixes_32s_4k_omp(
17350 &mut threaded,
17351 n as SaSint,
17352 4,
17353 &mut thread_state,
17354 );
17355 assert_eq!(&threaded[..count_4k], &single[..count_4k]);
17356
17357 let input_1k: Vec<SaSint> = (0..n)
17358 .map(|i| {
17359 let value = i as SaSint;
17360 if i % 7 == 0 {
17361 value | SAINT_MIN
17362 } else {
17363 value
17364 }
17365 })
17366 .collect();
17367 let count_1k = input_1k.iter().filter(|&&value| value < 0).count();
17368
17369 let mut single = input_1k.clone();
17370 let mut threaded = input_1k;
17371 partial_sorting_gather_lms_suffixes_32s_1k_omp(&mut single, n as SaSint, 1, &mut []);
17372 partial_sorting_gather_lms_suffixes_32s_1k_omp(
17373 &mut threaded,
17374 n as SaSint,
17375 4,
17376 &mut thread_state,
17377 );
17378 assert_eq!(&threaded[..count_1k], &single[..count_1k]);
17379 }
17380
17381 #[test]
17382 fn renumber_lms_suffixes_8u_writes_names_into_second_half() {
17383 let mut sa = vec![1 | SAINT_MIN, 3, 0, 0];
17384
17385 let name = renumber_lms_suffixes_8u(&mut sa, 2, 0, 0, 2);
17386
17387 assert_eq!(name, 1);
17388 assert_eq!(sa[2], SAINT_MIN);
17389 assert_eq!(sa[3], SAINT_MIN | 1);
17390 }
17391
17392 #[test]
17393 fn renumber_lms_suffixes_8u_matches_upstream_c_helper() {
17394 let mut sa_rust = vec![1 | SAINT_MIN, 3, 0, 0];
17395 let mut sa_c = sa_rust.clone();
17396
17397 let rust_name = renumber_lms_suffixes_8u(&mut sa_rust, 2, 0, 0, 2);
17398 let c_name = unsafe { probe_renumber_lms_suffixes_8u(sa_c.as_mut_ptr(), 2, 0, 0, 2) };
17399
17400 assert_eq!(rust_name, c_name);
17401 assert_eq!(sa_rust, sa_c);
17402 }
17403
17404 #[test]
17405 fn gather_marked_lms_suffixes_moves_negative_marked_entries_to_tail() {
17406 let mut sa = vec![0, 0, 1 | SAINT_MIN, 3];
17407
17408 let l = gather_marked_lms_suffixes(&mut sa, 2, 4, 0, 2);
17409
17410 assert_eq!(l, 3);
17411 assert_eq!(sa[3], 1);
17412 }
17413
17414 #[test]
17415 fn gather_marked_lms_suffixes_matches_upstream_c_helper() {
17416 let mut sa_rust = vec![0, 0, 1 | SAINT_MIN, 3];
17417 let mut sa_c = sa_rust.clone();
17418
17419 let rust_l = gather_marked_lms_suffixes(&mut sa_rust, 2, 4, 0, 2);
17420 let c_l = unsafe { probe_gather_marked_lms_suffixes(sa_c.as_mut_ptr(), 2, 4, 0, 2) };
17421
17422 assert_eq!(rust_l, c_l);
17423 assert_eq!(sa_rust, sa_c);
17424 }
17425
17426 #[test]
17427 fn renumber_lms_suffixes_8u_omp_wraps_sequential_version() {
17428 let mut sa = vec![1 | SAINT_MIN, 3, 0, 0];
17429 let mut thread_state = alloc_thread_state(2).unwrap();
17430
17431 let name = renumber_lms_suffixes_8u_omp(&mut sa, 2, 2, &mut thread_state);
17432
17433 assert_eq!(name, 1);
17434 assert_eq!(sa[2], SAINT_MIN);
17435 }
17436
17437 #[test]
17438 fn renumber_lms_suffixes_8u_omp_uses_block_partition_for_large_inputs() {
17439 let m = 65_600usize;
17440 let mut input = vec![0; 2 * m];
17441 for (i, slot) in input[..m].iter_mut().enumerate() {
17442 let suffix = (2 * i + 1) as SaSint;
17443 *slot = if i % 5 == 0 {
17444 suffix | SAINT_MIN
17445 } else {
17446 suffix
17447 };
17448 }
17449
17450 let mut single = input.clone();
17451 let mut threaded = input;
17452 let mut thread_state = alloc_thread_state(4).unwrap();
17453 let single_name = renumber_lms_suffixes_8u(&mut single, m as SaSint, 0, 0, m as FastSint);
17454 let threaded_name =
17455 renumber_lms_suffixes_8u_omp(&mut threaded, m as SaSint, 4, &mut thread_state);
17456
17457 assert_eq!(threaded_name, single_name);
17458 assert_eq!(threaded, single);
17459 }
17460
17461 #[test]
17462 fn gather_marked_lms_suffixes_omp_uses_block_partition_for_large_inputs() {
17463 let n = 131_200usize;
17464 let half_n = n >> 1;
17465 let mut input = vec![-77; n];
17466 for (i, slot) in input[..half_n].iter_mut().enumerate() {
17467 let suffix = (3 * i + 1) as SaSint;
17468 *slot = if i % 7 == 0 {
17469 suffix | SAINT_MIN
17470 } else {
17471 suffix
17472 };
17473 }
17474 let marked_count = input[..half_n].iter().filter(|&&value| value < 0).count();
17475
17476 let mut single = input.clone();
17477 let mut threaded = input;
17478 let mut thread_state = alloc_thread_state(4).unwrap();
17479 let _ = gather_marked_lms_suffixes(&mut single, 0, n as FastSint, 0, half_n as FastSint);
17480 gather_marked_lms_suffixes_omp(&mut threaded, n as SaSint, 0, 0, 4, &mut thread_state);
17481
17482 assert_eq!(&threaded[n - marked_count..], &single[n - marked_count..]);
17483 }
17484
17485 #[test]
17486 fn renumber_and_gather_lms_suffixes_omp_uses_large_input_paths() {
17487 let m = 65_600usize;
17488 let n = 2 * m;
17489 let mut input = vec![0; n];
17490 for (i, slot) in input[..m].iter_mut().enumerate() {
17491 let suffix = (2 * i + 1) as SaSint;
17492 *slot = if i % 5 == 0 {
17493 suffix | SAINT_MIN
17494 } else {
17495 suffix
17496 };
17497 }
17498
17499 let mut single = input.clone();
17500 let mut threaded = input;
17501 let mut single_state = alloc_thread_state(1).unwrap();
17502 let mut threaded_state = alloc_thread_state(4).unwrap();
17503 let single_name = renumber_and_gather_lms_suffixes_omp(
17504 &mut single,
17505 n as SaSint,
17506 m as SaSint,
17507 0,
17508 1,
17509 &mut single_state,
17510 );
17511 let threaded_name = renumber_and_gather_lms_suffixes_omp(
17512 &mut threaded,
17513 n as SaSint,
17514 m as SaSint,
17515 0,
17516 4,
17517 &mut threaded_state,
17518 );
17519
17520 assert_eq!(threaded_name, single_name);
17521 assert_eq!(threaded, single);
17522 }
17523
17524 #[test]
17525 fn renumber_and_gather_lms_suffixes_omp_gathers_when_names_are_not_distinct() {
17526 let mut sa = vec![1 | SAINT_MIN, 3, 0, 0];
17527 let mut thread_state = alloc_thread_state(2).unwrap();
17528
17529 let name = renumber_and_gather_lms_suffixes_omp(&mut sa, 4, 2, 0, 2, &mut thread_state);
17530
17531 assert_eq!(name, 1);
17532 assert_eq!(sa[3], 1);
17533 }
17534
17535 #[test]
17536 fn renumber_and_gather_lms_suffixes_omp_matches_upstream_c_helper() {
17537 let mut sa_rust = vec![1 | SAINT_MIN, 3, 0, 0];
17538 let mut sa_c = sa_rust.clone();
17539 let mut thread_state = alloc_thread_state(2).unwrap();
17540
17541 let rust_name =
17542 renumber_and_gather_lms_suffixes_omp(&mut sa_rust, 4, 2, 0, 2, &mut thread_state);
17543 let c_name =
17544 unsafe { probe_renumber_and_gather_lms_suffixes_omp(sa_c.as_mut_ptr(), 4, 2, 0, 2) };
17545
17546 assert_eq!(rust_name, c_name);
17547 assert_eq!(sa_rust, sa_c);
17548 }
17549
17550 #[test]
17551 fn renumber_distinct_lms_suffixes_32s_4k_masks_sources_and_writes_second_half() {
17552 let mut sa = vec![1 | SAINT_MIN, 3 | SAINT_MIN, 0, 0];
17553
17554 let name = renumber_distinct_lms_suffixes_32s_4k(&mut sa, 2, 1, 0, 2);
17555
17556 assert_eq!(name, 3);
17557 assert_eq!(sa[0], 1);
17558 assert_eq!(sa[1], 3);
17559 assert_eq!(sa[2], 1);
17560 assert_eq!(sa[3], 2 | SAINT_MIN);
17561 }
17562
17563 #[test]
17564 fn renumber_distinct_lms_suffixes_32s_4k_matches_upstream_c_helper() {
17565 let mut sa_rust = vec![1 | SAINT_MIN, 3 | SAINT_MIN, 0, 0];
17566 let mut sa_c = sa_rust.clone();
17567
17568 let rust_name = renumber_distinct_lms_suffixes_32s_4k(&mut sa_rust, 2, 1, 0, 2);
17569 let c_name =
17570 unsafe { probe_renumber_distinct_lms_suffixes_32s_4k(sa_c.as_mut_ptr(), 2, 1, 0, 2) };
17571
17572 assert_eq!(rust_name, c_name);
17573 assert_eq!(sa_rust, sa_c);
17574 }
17575
17576 #[test]
17577 fn mark_distinct_lms_suffixes_32s_propagates_previous_nonzero_marker() {
17578 let mut sa = vec![0, 0, SAINT_MIN | 5, 0, SAINT_MIN | 7];
17579
17580 mark_distinct_lms_suffixes_32s(&mut sa, 2, 0, 3);
17581
17582 assert_eq!(sa[2], 5);
17583 assert_eq!(sa[3], 0);
17584 assert_eq!(sa[4], SAINT_MIN | 7);
17585 }
17586
17587 #[test]
17588 fn clamp_lms_suffixes_length_32s_keeps_only_negative_lengths() {
17589 let mut sa = vec![0, 0, SAINT_MIN | 5, 7, SAINT_MIN | 3];
17590
17591 clamp_lms_suffixes_length_32s(&mut sa, 2, 0, 3);
17592
17593 assert_eq!(sa[2], 5);
17594 assert_eq!(sa[3], 0);
17595 assert_eq!(sa[4], 3);
17596 }
17597
17598 #[test]
17599 fn renumber_and_mark_distinct_lms_suffixes_32s_4k_omp_marks_second_half_when_names_repeat() {
17600 let mut sa = vec![1 | SAINT_MIN, 3 | SAINT_MIN, 0, 0];
17601 let mut thread_state = alloc_thread_state(2).unwrap();
17602
17603 let name =
17604 renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(&mut sa, 4, 2, 2, &mut thread_state);
17605
17606 assert_eq!(name, 2);
17607 assert_eq!(sa[2], 1);
17608 assert_eq!(sa[3], SAINT_MIN | 2);
17609 }
17610
17611 #[test]
17612 fn renumber_and_mark_distinct_lms_suffixes_32s_4k_omp_matches_upstream_c_helper() {
17613 let mut sa_rust = vec![1 | SAINT_MIN, 3 | SAINT_MIN, 0, 0];
17614 let mut sa_c = sa_rust.clone();
17615 let mut thread_state = alloc_thread_state(2).unwrap();
17616
17617 let rust_name = renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(
17618 &mut sa_rust,
17619 4,
17620 2,
17621 2,
17622 &mut thread_state,
17623 );
17624 let c_name = unsafe {
17625 probe_renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(sa_c.as_mut_ptr(), 4, 2, 2)
17626 };
17627
17628 assert_eq!(rust_name, c_name);
17629 assert_eq!(sa_rust, sa_c);
17630 }
17631
17632 #[test]
17633 fn reconstruct_lms_suffixes_maps_indices_from_tail_interval() {
17634 let mut sa = vec![0, 1, 2, 7, 11, 13];
17635
17636 reconstruct_lms_suffixes(&mut sa, 6, 3, 0, 3);
17637
17638 assert_eq!(&sa[..3], &[7, 11, 13]);
17639 }
17640
17641 #[test]
17642 fn reconstruct_lms_suffixes_omp_wraps_sequential_version() {
17643 let mut sa = vec![0, 1, 2, 7, 11, 13];
17644
17645 reconstruct_lms_suffixes_omp(&mut sa, 6, 3, 2);
17646
17647 assert_eq!(&sa[..3], &[7, 11, 13]);
17648 }
17649
17650 #[test]
17651 fn reconstruct_lms_suffixes_omp_uses_block_partition_for_large_inputs() {
17652 let m = 65_600usize;
17653 let n = 2 * m;
17654 let mut input = vec![0; n];
17655 for (i, slot) in input[..m].iter_mut().enumerate() {
17656 *slot = (m - 1 - i) as SaSint;
17657 }
17658 for (i, slot) in input[m..].iter_mut().enumerate() {
17659 *slot = (i * 17 + 3) as SaSint;
17660 }
17661
17662 let mut single = input.clone();
17663 let mut threaded = input;
17664 reconstruct_lms_suffixes(&mut single, n as SaSint, m as SaSint, 0, m as FastSint);
17665 reconstruct_lms_suffixes_omp(&mut threaded, n as SaSint, m as SaSint, 4);
17666
17667 assert_eq!(threaded, single);
17668 }
17669
17670 #[test]
17671 fn renumber_and_mark_distinct_lms_suffixes_32s_1k_omp_handles_single_lms_suffix() {
17672 let t = vec![2, 1, 0];
17673 let mut sa = vec![0; t.len()];
17674
17675 let name = renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(&t, &mut sa, 3, 1, 1);
17676
17677 assert_eq!(name, 1);
17678 assert_eq!(sa[1], SAINT_MIN | 1);
17679 }
17680
17681 #[test]
17682 fn libsais_main_32s_entry_matches_upstream_c_on_6k_branch() {
17683 assert_main_32s_entry_matches_upstream_c_for_branch(300);
17684 }
17685
17686 #[test]
17687 fn libsais_main_32s_entry_matches_upstream_c_on_4k_branch() {
17688 assert_main_32s_entry_matches_upstream_c_for_branch(400);
17689 }
17690
17691 #[test]
17692 fn libsais_main_32s_entry_matches_upstream_c_on_2k_branch() {
17693 assert_main_32s_entry_matches_upstream_c_for_branch(700);
17694 }
17695
17696 #[test]
17697 fn libsais_main_32s_entry_matches_upstream_c_on_1k_branch() {
17698 assert_main_32s_entry_matches_upstream_c_for_branch(1501);
17699 }
17700
17701 #[test]
17702 fn libsais_main_32s_entry_matches_upstream_c_on_recursive_repetitive_6k_case() {
17703 assert_main_32s_entry_matches_upstream_c(make_recursive_main_32s_text(24), 300, 0, true);
17704 }
17705
17706 #[test]
17707 fn libsais_main_32s_entry_matches_upstream_c_on_recursive_repetitive_1k_case() {
17708 assert_main_32s_entry_matches_upstream_c(make_recursive_main_32s_text(24), 1501, 0, true);
17709 }
17710
17711 #[test]
17712 fn libsais_main_32s_entry_matches_upstream_c_on_large_generated_6k_case() {
17713 assert_main_32s_entry_matches_upstream_c(
17714 make_large_main_32s_stress_text(1024, 300),
17715 300,
17716 0,
17717 true,
17718 );
17719 }
17720
17721 #[test]
17722 fn libsais_main_32s_entry_matches_upstream_c_on_large_generated_6k_case_with_fs() {
17723 assert_main_32s_entry_matches_upstream_c(
17724 make_large_main_32s_stress_text(1024, 300),
17725 300,
17726 2048,
17727 false,
17728 );
17729 }
17730
17731 #[test]
17732 fn libsais_main_32s_entry_matches_upstream_c_on_large_generated_4k_case() {
17733 assert_main_32s_entry_matches_upstream_c(
17734 make_large_main_32s_stress_text(1024, 400),
17735 400,
17736 0,
17737 true,
17738 );
17739 }
17740
17741 #[test]
17742 fn libsais_main_32s_entry_matches_upstream_c_on_large_generated_4k_case_with_fs() {
17743 assert_main_32s_entry_matches_upstream_c(
17744 make_large_main_32s_stress_text(1024, 400),
17745 400,
17746 2048,
17747 false,
17748 );
17749 }
17750
17751 #[test]
17752 fn libsais_main_32s_entry_matches_upstream_c_on_large_generated_2k_case() {
17753 assert_main_32s_entry_matches_upstream_c(
17754 make_large_main_32s_stress_text(1024, 700),
17755 700,
17756 0,
17757 true,
17758 );
17759 }
17760
17761 #[test]
17762 fn libsais_main_32s_entry_matches_upstream_c_on_large_generated_2k_case_with_fs() {
17763 assert_main_32s_entry_matches_upstream_c(
17764 make_large_main_32s_stress_text(1024, 700),
17765 700,
17766 2048,
17767 false,
17768 );
17769 }
17770
17771 #[test]
17772 fn libsais_main_32s_entry_matches_upstream_c_on_large_generated_1k_case_with_fs() {
17773 assert_main_32s_entry_matches_upstream_c(
17774 make_large_main_32s_stress_text(1024, 1501),
17775 1501,
17776 2048,
17777 false,
17778 );
17779 }
17780
17781 #[test]
17782 fn place_lms_suffixes_interval_32s_4k_moves_suffixes_into_bucket_intervals() {
17783 let mut sa = vec![10, 11, 12, 13, 14];
17784 let k = 3usize;
17785 let mut buckets = vec![0; 4 * k];
17786 buckets[buckets_index2(0, 1)] = 0;
17787 buckets[buckets_index2(1, 1)] = 2;
17788 buckets[buckets_index2(2, 1)] = 3;
17789 buckets[3 * k] = 2;
17790 buckets[3 * k + 1] = 5;
17791
17792 place_lms_suffixes_interval_32s_4k(&mut sa, 5, k as SaSint, 5, &buckets);
17793
17794 assert_eq!(sa, vec![0, 0, 0, 0, 14]);
17795 }
17796
17797 #[test]
17798 fn place_lms_suffixes_interval_32s_2k_moves_suffixes_into_bucket_intervals() {
17799 let mut sa = vec![10, 11, 12, 13, 14];
17800 let mut buckets = vec![0; 2 * 3];
17801 buckets[buckets_index2(0, 0)] = 2;
17802 buckets[buckets_index2(0, 1)] = 0;
17803 buckets[buckets_index2(1, 0)] = 5;
17804 buckets[buckets_index2(1, 1)] = 2;
17805 buckets[buckets_index2(2, 0)] = 5;
17806 buckets[buckets_index2(2, 1)] = 3;
17807
17808 place_lms_suffixes_interval_32s_2k(&mut sa, 5, 3, 5, &buckets);
17809
17810 assert_eq!(sa, vec![0, 0, 0, 0, 14]);
17811 }
17812
17813 #[test]
17814 fn place_lms_suffixes_interval_32s_1k_places_suffixes_by_symbol_bucket() {
17815 let t = vec![0, 1, 1, 2, 2];
17816 let mut sa = vec![1, 2, 3, 4, 99];
17817 let buckets = vec![0, 2, 5];
17818
17819 place_lms_suffixes_interval_32s_1k(&t, &mut sa, 3, 4, &buckets);
17820
17821 assert_eq!(sa, vec![1, 2, 0, 3, 4]);
17822 }
17823
17824 #[test]
17825 fn final_bwt_scan_left_to_right_8u_rewrites_sa_and_induces_suffixes() {
17826 let t = vec![0_u8, 1, 2, 1, 0];
17827 let mut sa = vec![1, 0, 0];
17828 let mut induction_bucket = vec![0, 1, 3];
17829
17830 final_bwt_scan_left_to_right_8u(&t, &mut sa, &mut induction_bucket, 0, 1);
17831
17832 assert_eq!(sa[0], 0);
17833 assert_eq!(induction_bucket[0], 1);
17834 }
17835
17836 #[test]
17837 fn final_bwt_aux_scan_left_to_right_8u_updates_sampling_array() {
17838 let t = vec![0_u8, 1, 2, 1, 0];
17839 let mut sa = vec![1, 0, 0];
17840 let mut induction_bucket = vec![0, 1, 3];
17841 let mut i_out = vec![0; 2];
17842
17843 final_bwt_aux_scan_left_to_right_8u(
17844 &t,
17845 &mut sa,
17846 0,
17847 &mut i_out,
17848 &mut induction_bucket,
17849 0,
17850 1,
17851 );
17852
17853 assert_eq!(i_out[0], 1);
17854 }
17855
17856 #[test]
17857 fn final_sorting_scan_left_to_right_8u_clears_marker_and_places_suffix() {
17858 let t = vec![0_u8, 1, 2, 1, 0];
17859 let mut sa = vec![1, 0, 0];
17860 let mut induction_bucket = vec![0, 1, 3];
17861
17862 final_sorting_scan_left_to_right_8u(&t, &mut sa, &mut induction_bucket, 0, 1);
17863
17864 assert_eq!(sa[0], 0);
17865 assert_eq!(induction_bucket[0], 1);
17866 }
17867
17868 #[test]
17869 fn final_sorting_scan_left_to_right_32s_clears_marker_and_places_suffix() {
17870 let t = vec![0, 1, 2, 1, 0];
17871 let mut sa = vec![1, 0, 0];
17872 let mut induction_bucket = vec![0, 1, 3];
17873
17874 final_sorting_scan_left_to_right_32s(&t, &mut sa, &mut induction_bucket, 0, 1);
17875
17876 assert_eq!(sa[0], 0);
17877 assert_eq!(induction_bucket[0], 1);
17878 }
17879
17880 #[test]
17881 fn final_bwt_scan_left_to_right_8u_block_prepare_records_cache_and_counts() {
17882 let t = vec![0_u8, 1, 2, 1, 0];
17883 let mut sa = vec![1, 2, 0];
17884 let mut buckets = vec![99; ALPHABET_SIZE];
17885 let mut cache = vec![ThreadCache::default(); 4];
17886
17887 let count = final_bwt_scan_left_to_right_8u_block_prepare(
17888 &t,
17889 &mut sa,
17890 ALPHABET_SIZE as SaSint,
17891 &mut buckets,
17892 &mut cache,
17893 0,
17894 2,
17895 );
17896
17897 assert_eq!(count, 2);
17898 assert_eq!(sa[0] & SAINT_MAX, 0);
17899 assert_eq!(sa[1], 1 | SAINT_MIN);
17900 assert_eq!(buckets[0], 1);
17901 assert_eq!(buckets[1], 1);
17902 assert_eq!(cache[0].symbol, 0);
17903 assert_eq!(cache[0].index & SAINT_MAX, 0);
17904 assert_eq!(cache[1].symbol, 1);
17905 assert_eq!(cache[1].index & SAINT_MAX, 1);
17906 }
17907
17908 #[test]
17909 fn final_sorting_scan_left_to_right_32s_block_omp_places_cached_suffixes() {
17910 let t = vec![0, 1, 2, 1, 0];
17911 let mut sa = vec![1, 2, 0, 0];
17912 let mut induction_bucket = vec![0, 1, 3];
17913 let mut cache = vec![ThreadCache::default(); LIBSAIS_PER_THREAD_CACHE_SIZE];
17914
17915 final_sorting_scan_left_to_right_32s_block_omp(
17916 &t,
17917 &mut sa,
17918 &mut induction_bucket,
17919 &mut cache,
17920 0,
17921 2,
17922 2,
17923 );
17924
17925 assert_eq!(sa[0] & SAINT_MAX, 0);
17926 assert_eq!(sa[1] & SAINT_MAX, 1);
17927 assert_eq!(induction_bucket[0], 1);
17928 assert_eq!(induction_bucket[1], 2);
17929 }
17930
17931 #[test]
17932 fn final_sorting_scan_left_to_right_8u_omp_wraps_sequential_behavior() {
17933 let t = vec![0_u8, 1, 2, 1, 0];
17934 let mut sa = vec![0; t.len()];
17935 let mut induction_bucket = vec![0, 1, 3];
17936 let mut expected_sa = sa.clone();
17937 let mut expected_bucket = induction_bucket.clone();
17938
17939 final_sorting_scan_left_to_right_8u_omp(
17940 &t,
17941 &mut expected_sa,
17942 t.len() as FastSint,
17943 ALPHABET_SIZE as SaSint,
17944 &mut expected_bucket,
17945 1,
17946 &mut [],
17947 );
17948
17949 let mut thread_state = alloc_thread_state(2).unwrap();
17950
17951 final_sorting_scan_left_to_right_8u_omp(
17952 &t,
17953 &mut sa,
17954 t.len() as FastSint,
17955 ALPHABET_SIZE as SaSint,
17956 &mut induction_bucket,
17957 2,
17958 &mut thread_state,
17959 );
17960
17961 assert_eq!(sa, expected_sa);
17962 assert_eq!(induction_bucket, expected_bucket);
17963 }
17964
17965 #[test]
17966 fn final_sorting_scan_left_to_right_8u_block_omp_uses_thread_buckets() {
17967 let block_start = 20_000usize;
17968 let block_size = 16_384usize;
17969 let n = block_start + block_size + 8;
17970 let t = vec![1_u8; n];
17971 let suffixes: Vec<SaSint> = (2..2 + block_size).map(|i| i as SaSint).collect();
17972
17973 let mut expected_sa = vec![0; n];
17974 expected_sa[block_start..block_start + block_size].copy_from_slice(&suffixes);
17975 let mut threaded_sa = expected_sa.clone();
17976 let mut expected_bucket = vec![0; ALPHABET_SIZE];
17977 let mut threaded_bucket = expected_bucket.clone();
17978 let mut thread_state = alloc_thread_state(4).unwrap();
17979
17980 final_sorting_scan_left_to_right_8u(
17981 &t,
17982 &mut expected_sa,
17983 &mut expected_bucket,
17984 block_start as FastSint,
17985 block_size as FastSint,
17986 );
17987 final_sorting_scan_left_to_right_8u_block_omp(
17988 &t,
17989 &mut threaded_sa,
17990 ALPHABET_SIZE as SaSint,
17991 &mut threaded_bucket,
17992 block_start as FastSint,
17993 block_size as FastSint,
17994 4,
17995 &mut thread_state,
17996 );
17997
17998 assert_eq!(threaded_sa, expected_sa);
17999 assert_eq!(threaded_bucket, expected_bucket);
18000 }
18001
18002 #[test]
18003 fn final_bwt_left_to_right_8u_block_omp_uses_thread_buckets() {
18004 let block_start = 20_000usize;
18005 let block_size = 16_384usize;
18006 let n = block_start + block_size + 8;
18007 let t = vec![1_u8; n];
18008 let suffixes: Vec<SaSint> = (2..2 + block_size).map(|i| i as SaSint).collect();
18009
18010 let mut expected_sa = vec![0; n];
18011 expected_sa[block_start..block_start + block_size].copy_from_slice(&suffixes);
18012 let mut threaded_sa = expected_sa.clone();
18013 let mut expected_bucket = vec![0; ALPHABET_SIZE];
18014 let mut threaded_bucket = expected_bucket.clone();
18015 let mut thread_state = alloc_thread_state(4).unwrap();
18016
18017 final_bwt_scan_left_to_right_8u(
18018 &t,
18019 &mut expected_sa,
18020 &mut expected_bucket,
18021 block_start as FastSint,
18022 block_size as FastSint,
18023 );
18024 final_bwt_scan_left_to_right_8u_block_omp(
18025 &t,
18026 &mut threaded_sa,
18027 ALPHABET_SIZE as SaSint,
18028 &mut threaded_bucket,
18029 block_start as FastSint,
18030 block_size as FastSint,
18031 4,
18032 &mut thread_state,
18033 );
18034
18035 assert_eq!(threaded_sa, expected_sa);
18036 assert_eq!(threaded_bucket, expected_bucket);
18037 }
18038
18039 #[test]
18040 fn final_bwt_aux_left_to_right_8u_block_omp_uses_thread_buckets() {
18041 let block_start = 20_000usize;
18042 let block_size = 16_384usize;
18043 let n = block_start + block_size + 8;
18044 let t = vec![1_u8; n];
18045 let suffixes: Vec<SaSint> = (2..2 + block_size).map(|i| i as SaSint).collect();
18046
18047 let mut expected_sa = vec![0; n];
18048 expected_sa[block_start..block_start + block_size].copy_from_slice(&suffixes);
18049 let mut threaded_sa = expected_sa.clone();
18050 let mut expected_i = vec![0; n];
18051 let mut threaded_i = vec![0; n];
18052 let mut expected_bucket = vec![0; ALPHABET_SIZE];
18053 let mut threaded_bucket = expected_bucket.clone();
18054 let mut thread_state = alloc_thread_state(4).unwrap();
18055
18056 final_bwt_aux_scan_left_to_right_8u(
18057 &t,
18058 &mut expected_sa,
18059 0,
18060 &mut expected_i,
18061 &mut expected_bucket,
18062 block_start as FastSint,
18063 block_size as FastSint,
18064 );
18065 final_bwt_aux_scan_left_to_right_8u_block_omp(
18066 &t,
18067 &mut threaded_sa,
18068 ALPHABET_SIZE as SaSint,
18069 0,
18070 &mut threaded_i,
18071 &mut threaded_bucket,
18072 block_start as FastSint,
18073 block_size as FastSint,
18074 4,
18075 &mut thread_state,
18076 );
18077
18078 assert_eq!(threaded_sa, expected_sa);
18079 assert_eq!(threaded_i, expected_i);
18080 assert_eq!(threaded_bucket, expected_bucket);
18081 }
18082
18083 #[test]
18084 fn final_bwt_scan_right_to_left_8u_returns_zero_index_and_induces_suffixes() {
18085 let t = vec![0_u8, 1, 2, 1, 0];
18086 let mut sa = vec![0, 2, 0];
18087 let mut induction_bucket = vec![1, 2, 3];
18088
18089 let index = final_bwt_scan_right_to_left_8u(&t, &mut sa, &mut induction_bucket, 0, 2);
18090
18091 assert_eq!(index, 0);
18092 assert_eq!(sa[1], 1);
18093 assert_eq!(induction_bucket[1], 1);
18094 }
18095
18096 #[test]
18097 fn final_sorting_scan_right_to_left_32s_block_omp_runs_block_pipeline() {
18098 let t = vec![0, 1, 2, 1, 0];
18099 let mut sa = vec![0, 2, 0, 0];
18100 let mut induction_bucket = vec![1, 2, 3];
18101 let mut expected_sa = sa.clone();
18102 let mut expected_bucket = induction_bucket.clone();
18103 let mut cache = vec![ThreadCache::default(); LIBSAIS_PER_THREAD_CACHE_SIZE];
18104
18105 final_sorting_scan_right_to_left_32s(&t, &mut expected_sa, &mut expected_bucket, 0, 2);
18106 final_sorting_scan_right_to_left_32s_block_omp(
18107 &t,
18108 &mut sa,
18109 &mut induction_bucket,
18110 &mut cache,
18111 0,
18112 2,
18113 2,
18114 );
18115
18116 assert_eq!(sa, expected_sa);
18117 assert_eq!(induction_bucket, expected_bucket);
18118 }
18119
18120 #[test]
18121 fn final_sorting_scan_right_to_left_8u_block_omp_uses_thread_buckets() {
18122 let block_start = 20_000usize;
18123 let block_size = 16_384usize;
18124 let n = block_start + block_size + 8;
18125 let t = vec![1_u8; n];
18126 let suffixes: Vec<SaSint> = (2..2 + block_size).map(|i| i as SaSint).collect();
18127
18128 let mut expected_sa = vec![0; n];
18129 expected_sa[block_start..block_start + block_size].copy_from_slice(&suffixes);
18130 let mut threaded_sa = expected_sa.clone();
18131 let mut expected_bucket = vec![0; ALPHABET_SIZE];
18132 expected_bucket[1] = n as SaSint;
18133 let mut threaded_bucket = expected_bucket.clone();
18134 let mut thread_state = alloc_thread_state(4).unwrap();
18135
18136 final_sorting_scan_right_to_left_8u(
18137 &t,
18138 &mut expected_sa,
18139 &mut expected_bucket,
18140 block_start as FastSint,
18141 block_size as FastSint,
18142 );
18143 final_sorting_scan_right_to_left_8u_block_omp(
18144 &t,
18145 &mut threaded_sa,
18146 ALPHABET_SIZE as SaSint,
18147 &mut threaded_bucket,
18148 block_start as FastSint,
18149 block_size as FastSint,
18150 4,
18151 &mut thread_state,
18152 );
18153
18154 assert_eq!(threaded_sa, expected_sa);
18155 assert_eq!(threaded_bucket, expected_bucket);
18156 }
18157
18158 #[test]
18159 fn final_bwt_right_to_left_8u_block_omp_uses_thread_buckets() {
18160 let block_start = 20_000usize;
18161 let block_size = 16_384usize;
18162 let n = block_start + block_size + 8;
18163 let t = vec![1_u8; n];
18164 let suffixes: Vec<SaSint> = (2..2 + block_size).map(|i| i as SaSint).collect();
18165
18166 let mut expected_sa = vec![0; n];
18167 expected_sa[block_start..block_start + block_size].copy_from_slice(&suffixes);
18168 let mut threaded_sa = expected_sa.clone();
18169 let mut expected_bucket = vec![0; ALPHABET_SIZE];
18170 expected_bucket[1] = n as SaSint;
18171 let mut threaded_bucket = expected_bucket.clone();
18172 let mut thread_state = alloc_thread_state(4).unwrap();
18173
18174 final_bwt_scan_right_to_left_8u(
18175 &t,
18176 &mut expected_sa,
18177 &mut expected_bucket,
18178 block_start as FastSint,
18179 block_size as FastSint,
18180 );
18181 final_bwt_scan_right_to_left_8u_block_omp(
18182 &t,
18183 &mut threaded_sa,
18184 ALPHABET_SIZE as SaSint,
18185 &mut threaded_bucket,
18186 block_start as FastSint,
18187 block_size as FastSint,
18188 4,
18189 &mut thread_state,
18190 );
18191
18192 assert_eq!(threaded_sa, expected_sa);
18193 assert_eq!(threaded_bucket, expected_bucket);
18194 }
18195
18196 #[test]
18197 fn final_bwt_aux_right_to_left_8u_block_omp_uses_thread_buckets() {
18198 let block_start = 20_000usize;
18199 let block_size = 16_384usize;
18200 let n = block_start + block_size + 8;
18201 let t = vec![1_u8; n];
18202 let suffixes: Vec<SaSint> = (2..2 + block_size).map(|i| i as SaSint).collect();
18203
18204 let mut expected_sa = vec![0; n];
18205 expected_sa[block_start..block_start + block_size].copy_from_slice(&suffixes);
18206 let mut threaded_sa = expected_sa.clone();
18207 let mut expected_i = vec![0; n];
18208 let mut threaded_i = vec![0; n];
18209 let mut expected_bucket = vec![0; ALPHABET_SIZE];
18210 expected_bucket[1] = n as SaSint;
18211 let mut threaded_bucket = expected_bucket.clone();
18212 let mut thread_state = alloc_thread_state(4).unwrap();
18213
18214 final_bwt_aux_scan_right_to_left_8u(
18215 &t,
18216 &mut expected_sa,
18217 0,
18218 &mut expected_i,
18219 &mut expected_bucket,
18220 block_start as FastSint,
18221 block_size as FastSint,
18222 );
18223 final_bwt_aux_scan_right_to_left_8u_block_omp(
18224 &t,
18225 &mut threaded_sa,
18226 ALPHABET_SIZE as SaSint,
18227 0,
18228 &mut threaded_i,
18229 &mut threaded_bucket,
18230 block_start as FastSint,
18231 block_size as FastSint,
18232 4,
18233 &mut thread_state,
18234 );
18235
18236 assert_eq!(threaded_sa, expected_sa);
18237 assert_eq!(threaded_i, expected_i);
18238 assert_eq!(threaded_bucket, expected_bucket);
18239 }
18240
18241 #[test]
18242 fn final_gsa_right_to_left_8u_block_omp_uses_thread_buckets() {
18243 let block_start = 20_000usize;
18244 let block_size = 16_384usize;
18245 let n = block_start + block_size + 8;
18246 let t = vec![1_u8; n];
18247 let suffixes: Vec<SaSint> = (2..2 + block_size).map(|i| i as SaSint).collect();
18248
18249 let mut expected_sa = vec![0; n];
18250 expected_sa[block_start..block_start + block_size].copy_from_slice(&suffixes);
18251 let mut threaded_sa = expected_sa.clone();
18252 let mut expected_bucket = vec![0; ALPHABET_SIZE];
18253 expected_bucket[1] = n as SaSint;
18254 let mut threaded_bucket = expected_bucket.clone();
18255 let mut thread_state = alloc_thread_state(4).unwrap();
18256
18257 final_gsa_scan_right_to_left_8u(
18258 &t,
18259 &mut expected_sa,
18260 &mut expected_bucket,
18261 block_start as FastSint,
18262 block_size as FastSint,
18263 );
18264 final_gsa_scan_right_to_left_8u_block_omp(
18265 &t,
18266 &mut threaded_sa,
18267 ALPHABET_SIZE as SaSint,
18268 &mut threaded_bucket,
18269 block_start as FastSint,
18270 block_size as FastSint,
18271 4,
18272 &mut thread_state,
18273 );
18274
18275 assert_eq!(threaded_sa, expected_sa);
18276 assert_eq!(threaded_bucket, expected_bucket);
18277 }
18278
18279 #[test]
18280 fn final_sorting_scan_right_to_left_8u_omp_matches_sequential_path() {
18281 let t = vec![0_u8, 1, 2, 1, 0];
18282 let mut sa = vec![0, 2, 0, 0];
18283 let mut induction_bucket = vec![1, 2, 3];
18284 let mut expected_sa = sa.clone();
18285 let mut expected_bucket = induction_bucket.clone();
18286
18287 final_sorting_scan_right_to_left_8u_omp(
18288 &t,
18289 &mut expected_sa,
18290 0,
18291 2,
18292 ALPHABET_SIZE as SaSint,
18293 &mut expected_bucket,
18294 1,
18295 &mut [],
18296 );
18297
18298 let mut thread_state = alloc_thread_state(2).unwrap();
18299 final_sorting_scan_right_to_left_8u_omp(
18300 &t,
18301 &mut sa,
18302 0,
18303 2,
18304 ALPHABET_SIZE as SaSint,
18305 &mut induction_bucket,
18306 2,
18307 &mut thread_state,
18308 );
18309
18310 assert_eq!(sa, expected_sa);
18311 assert_eq!(induction_bucket, expected_bucket);
18312 }
18313
18314 #[test]
18315 fn clear_lms_suffixes_omp_zeroes_requested_bucket_ranges() {
18316 let mut sa = vec![5, 4, 3, 2, 1, 9];
18317 let n = sa.len() as SaSint;
18318 let bucket_start = vec![1, 4, 5];
18319 let bucket_end = vec![3, 5, 5];
18320
18321 clear_lms_suffixes_omp(&mut sa, n, 3, &bucket_start, &bucket_end, 2);
18322
18323 assert_eq!(sa, vec![5, 0, 0, 2, 0, 9]);
18324 }
18325
18326 #[test]
18327 fn induce_final_order_8u_omp_non_bwt_matches_direct_final_scans() {
18328 let t = vec![0_u8, 1, 2, 1, 0];
18329 let mut sa = vec![0, 2, 0, 0, 0];
18330 let mut buckets = vec![0; 8 * ALPHABET_SIZE];
18331 buckets[6 * ALPHABET_SIZE..6 * ALPHABET_SIZE + 3].copy_from_slice(&[0, 1, 3]);
18332 buckets[7 * ALPHABET_SIZE..7 * ALPHABET_SIZE + 3].copy_from_slice(&[2, 4, 5]);
18333
18334 let mut expected_sa = sa.clone();
18335 let mut expected_left = vec![0, 1, 3];
18336 let mut expected_right = vec![2, 4, 5];
18337 final_sorting_scan_left_to_right_8u_omp(
18338 &t,
18339 &mut expected_sa,
18340 t.len() as FastSint,
18341 ALPHABET_SIZE as SaSint,
18342 &mut expected_left,
18343 1,
18344 &mut [],
18345 );
18346 final_sorting_scan_right_to_left_8u_omp(
18347 &t,
18348 &mut expected_sa,
18349 0,
18350 t.len() as FastSint,
18351 ALPHABET_SIZE as SaSint,
18352 &mut expected_right,
18353 1,
18354 &mut [],
18355 );
18356
18357 let mut thread_state = alloc_thread_state(2).unwrap();
18358 let result = induce_final_order_8u_omp(
18359 &t,
18360 &mut sa,
18361 t.len() as SaSint,
18362 ALPHABET_SIZE as SaSint,
18363 LIBSAIS_FLAGS_NONE,
18364 0,
18365 None,
18366 &mut buckets,
18367 2,
18368 &mut thread_state,
18369 );
18370
18371 assert_eq!(result, 0);
18372 assert_eq!(sa, expected_sa);
18373 assert_eq!(
18374 &buckets[6 * ALPHABET_SIZE..6 * ALPHABET_SIZE + 3],
18375 expected_left.as_slice()
18376 );
18377 assert_eq!(
18378 &buckets[7 * ALPHABET_SIZE..7 * ALPHABET_SIZE + 3],
18379 expected_right.as_slice()
18380 );
18381 }
18382
18383 #[test]
18384 fn renumber_unique_and_nonunique_lms_suffixes_32s_marks_new_unique_names() {
18385 let mut t = vec![0, 0, 0, 0];
18386 let mut sa = vec![0, 2, -1, 5];
18387
18388 let f = renumber_unique_and_nonunique_lms_suffixes_32s(&mut t, &mut sa, 2, 0, 0, 2);
18389
18390 assert_eq!(f, 1);
18391 assert_eq!(t[0], SAINT_MIN);
18392 assert_eq!(sa[2], SAINT_MIN);
18393 assert_eq!(sa[3], 4);
18394 }
18395
18396 #[test]
18397 fn renumber_unique_and_nonunique_lms_suffixes_32s_matches_upstream_c_helper() {
18398 let mut t_rust = vec![0, 0, 0, 0];
18399 let mut sa_rust = vec![0, 2, -1, 5];
18400 let mut t_c = t_rust.clone();
18401 let mut sa_c = sa_rust.clone();
18402
18403 let rust_f =
18404 renumber_unique_and_nonunique_lms_suffixes_32s(&mut t_rust, &mut sa_rust, 2, 0, 0, 2);
18405 let c_f = unsafe {
18406 probe_renumber_unique_and_nonunique_lms_suffixes_32s(
18407 t_c.as_mut_ptr(),
18408 sa_c.as_mut_ptr(),
18409 2,
18410 0,
18411 0,
18412 2,
18413 )
18414 };
18415
18416 assert_eq!(rust_f, c_f);
18417 assert_eq!(t_rust, t_c);
18418 assert_eq!(sa_rust, sa_c);
18419 }
18420
18421 #[test]
18422 fn renumber_unique_and_nonunique_lms_suffixes_32s_omp_matches_upstream_c_helper() {
18423 let mut t_rust = vec![0, 0, 0, 0];
18424 let mut sa_rust = vec![0, 2, -1, 5];
18425 let mut t_c = t_rust.clone();
18426 let mut sa_c = sa_rust.clone();
18427 let mut thread_state = alloc_thread_state(1).unwrap();
18428
18429 let rust_f = renumber_unique_and_nonunique_lms_suffixes_32s_omp(
18430 &mut t_rust,
18431 &mut sa_rust,
18432 2,
18433 1,
18434 &mut thread_state,
18435 );
18436 let c_f = unsafe {
18437 probe_renumber_unique_and_nonunique_lms_suffixes_32s_omp(
18438 t_c.as_mut_ptr(),
18439 sa_c.as_mut_ptr(),
18440 2,
18441 1,
18442 )
18443 };
18444
18445 assert_eq!(rust_f, c_f);
18446 assert_eq!(t_rust, t_c);
18447 assert_eq!(sa_rust, sa_c);
18448 }
18449
18450 #[test]
18451 fn renumber_unique_and_nonunique_lms_suffixes_32s_omp_uses_block_partition() {
18452 let m = 65_600usize;
18453 let n = 2 * m;
18454 let t = vec![0; n];
18455 let mut sa = vec![0; n];
18456 for i in 0..m {
18457 sa[i] = (2 * i) as SaSint;
18458 sa[m + i] = if i % 5 == 0 {
18459 -((i as SaSint) + 1)
18460 } else {
18461 i as SaSint + 7
18462 };
18463 }
18464
18465 let mut single_t = t.clone();
18466 let mut single_sa = sa.clone();
18467 let mut threaded_t = t;
18468 let mut threaded_sa = sa;
18469 let mut thread_state = alloc_thread_state(4).unwrap();
18470 let single_f = renumber_unique_and_nonunique_lms_suffixes_32s(
18471 &mut single_t,
18472 &mut single_sa,
18473 m as SaSint,
18474 0,
18475 0,
18476 m as FastSint,
18477 );
18478 let threaded_f = renumber_unique_and_nonunique_lms_suffixes_32s_omp(
18479 &mut threaded_t,
18480 &mut threaded_sa,
18481 m as SaSint,
18482 4,
18483 &mut thread_state,
18484 );
18485
18486 assert_eq!(threaded_f, single_f);
18487 assert_eq!(threaded_t, single_t);
18488 assert_eq!(threaded_sa, single_sa);
18489 }
18490
18491 #[test]
18492 fn compact_unique_and_nonunique_lms_suffixes_32s_splits_unique_and_nonunique_ranges() {
18493 let mut sa = vec![0, 0, 0, 0, SAINT_MIN, 4];
18494 let mut l = 2;
18495 let mut r = 6;
18496
18497 compact_unique_and_nonunique_lms_suffixes_32s(&mut sa, 2, &mut l, &mut r, 0, 2);
18498
18499 assert_eq!(l, 2);
18500 assert_eq!(r, 6);
18501 assert_eq!(sa[2], 0);
18502 assert_eq!(sa[3] & SAINT_MAX, 0);
18503 }
18504
18505 #[test]
18506 fn compact_lms_suffixes_32s_omp_runs_renumber_then_compaction() {
18507 let mut t = vec![0, 0, 0, 0];
18508 let mut sa = vec![0, 2, -1, 5, 77, 88];
18509 let mut thread_state = alloc_thread_state(2).unwrap();
18510
18511 let f = compact_lms_suffixes_32s_omp(&mut t, &mut sa, 4, 2, 2, 2, &mut thread_state);
18512
18513 assert_eq!(f, 1);
18514 assert_eq!(sa[2] & SAINT_MAX, 0);
18515 assert_eq!(sa[5], 3);
18516 }
18517
18518 #[test]
18519 fn compact_unique_and_nonunique_lms_suffixes_32s_omp_uses_block_partition() {
18520 let n = 131_200usize;
18521 let m = 65_600usize;
18522 let fs = m + 32;
18523 let half_n = n >> 1;
18524 let f = m / 5;
18525 let mut sa = vec![0; n + fs];
18526 for i in 0..half_n {
18527 sa[m + i] = if i % 5 == 0 {
18528 SAINT_MIN | i as SaSint
18529 } else {
18530 i as SaSint + 1
18531 };
18532 }
18533 for i in 0..f {
18534 sa[m - f + i] = (10_000 + i) as SaSint;
18535 }
18536
18537 let mut single = sa.clone();
18538 let mut threaded = sa;
18539 let mut single_state = alloc_thread_state(1).unwrap();
18540 let mut threaded_state = alloc_thread_state(4).unwrap();
18541 compact_unique_and_nonunique_lms_suffixes_32s_omp(
18542 &mut single,
18543 n as SaSint,
18544 m as SaSint,
18545 fs as SaSint,
18546 f as SaSint,
18547 1,
18548 &mut single_state,
18549 );
18550 compact_unique_and_nonunique_lms_suffixes_32s_omp(
18551 &mut threaded,
18552 n as SaSint,
18553 m as SaSint,
18554 fs as SaSint,
18555 f as SaSint,
18556 4,
18557 &mut threaded_state,
18558 );
18559
18560 let unique_dst = n + fs - m;
18561 assert_eq!(
18562 &threaded[unique_dst..unique_dst + f],
18563 &single[unique_dst..unique_dst + f]
18564 );
18565 }
18566
18567 #[test]
18568 fn compact_lms_suffixes_32s_omp_uses_large_input_paths() {
18569 let n = 131_200usize;
18570 let m = 65_600usize;
18571 let fs = m + 32;
18572 let t = vec![0; n];
18573 let mut sa = vec![0; n + fs];
18574 for i in 0..m {
18575 sa[i] = (2 * i) as SaSint;
18576 sa[m + i] = if i % 5 == 0 {
18577 -((i as SaSint) + 1)
18578 } else {
18579 i as SaSint + 7
18580 };
18581 }
18582
18583 let mut single_t = t.clone();
18584 let mut single_sa = sa.clone();
18585 let mut threaded_t = t;
18586 let mut threaded_sa = sa;
18587 let mut single_state = alloc_thread_state(1).unwrap();
18588 let mut threaded_state = alloc_thread_state(4).unwrap();
18589 let single_f = compact_lms_suffixes_32s_omp(
18590 &mut single_t,
18591 &mut single_sa,
18592 n as SaSint,
18593 m as SaSint,
18594 fs as SaSint,
18595 1,
18596 &mut single_state,
18597 );
18598 let threaded_f = compact_lms_suffixes_32s_omp(
18599 &mut threaded_t,
18600 &mut threaded_sa,
18601 n as SaSint,
18602 m as SaSint,
18603 fs as SaSint,
18604 4,
18605 &mut threaded_state,
18606 );
18607
18608 assert_eq!(threaded_f, single_f);
18609 assert_eq!(threaded_t, single_t);
18610 let unique_dst = n + fs - m;
18611 let unique_len = usize::try_from(threaded_f).expect("f must be non-negative");
18612 assert_eq!(
18613 &threaded_sa[unique_dst..unique_dst + unique_len],
18614 &single_sa[unique_dst..unique_dst + unique_len]
18615 );
18616 }
18617
18618 #[test]
18619 fn merge_unique_lms_suffixes_32s_noops_for_empty_block() {
18620 let mut t = vec![1, SAINT_MIN, 2, SAINT_MIN];
18621 let mut sa = vec![0, 0, 1, 3];
18622 let before_t = t.clone();
18623 let before_sa = sa.clone();
18624
18625 merge_unique_lms_suffixes_32s(&mut t, &mut sa, 4, 1, 0, 0, 0);
18626
18627 assert_eq!(t, before_t);
18628 assert_eq!(sa, before_sa);
18629 }
18630
18631 #[test]
18632 fn merge_nonunique_lms_suffixes_32s_noops_for_empty_block() {
18633 let mut sa = vec![0, 7, 0, 13, 11];
18634 let before = sa.clone();
18635
18636 merge_nonunique_lms_suffixes_32s(&mut sa, 4, 1, 0, 0, 0);
18637
18638 assert_eq!(sa, before);
18639 }
18640
18641 #[test]
18642 fn merge_compacted_lms_suffixes_32s_omp_preserves_input_text_and_fills_zero_slots() {
18643 let mut t = vec![1, 2, 3, 4];
18644 let mut sa = vec![0, 1, 2, 3, 4, 5];
18645 let before_t = t.clone();
18646 let mut thread_state = alloc_thread_state(2).unwrap();
18647
18648 merge_compacted_lms_suffixes_32s_omp(&mut t, &mut sa, 4, 1, 1, 2, &mut thread_state);
18649
18650 assert_eq!(t, before_t);
18651 assert_eq!(sa[0], 3);
18652 assert_eq!(sa[1], 1);
18653 }
18654
18655 #[test]
18656 fn merge_unique_lms_suffixes_32s_omp_uses_block_partition_for_large_inputs() {
18657 let n = 65_600usize;
18658 let m = 1_024usize;
18659 let mut t = vec![1; n];
18660 for i in (0..n).step_by(257) {
18661 t[i] = SAINT_MIN | ((i % 251) as SaSint);
18662 }
18663 let f = t.iter().filter(|&&value| value < 0).count();
18664 let mut sa = vec![-1; n];
18665 let src = n - m - 1;
18666 for i in 0..f {
18667 sa[src + i] = i as SaSint;
18668 }
18669
18670 let mut single_t = t.clone();
18671 let mut single_sa = sa.clone();
18672 let mut threaded_t = t;
18673 let mut threaded_sa = sa;
18674 let mut thread_state = alloc_thread_state(4).unwrap();
18675 merge_unique_lms_suffixes_32s_omp(
18676 &mut single_t,
18677 &mut single_sa,
18678 n as SaSint,
18679 m as SaSint,
18680 1,
18681 &mut [],
18682 );
18683 merge_unique_lms_suffixes_32s_omp(
18684 &mut threaded_t,
18685 &mut threaded_sa,
18686 n as SaSint,
18687 m as SaSint,
18688 4,
18689 &mut thread_state,
18690 );
18691
18692 assert_eq!(threaded_t, single_t);
18693 assert_eq!(threaded_sa, single_sa);
18694 }
18695
18696 #[test]
18697 fn merge_nonunique_lms_suffixes_32s_omp_uses_block_partition_for_large_inputs() {
18698 let n = 131_200usize;
18699 let m = 65_600usize;
18700 let f = 7usize;
18701 let mut sa = vec![1; n];
18702 let zero_count = (0..m).filter(|i| i % 17 == 0).count();
18703 for i in (0..m).step_by(17) {
18704 sa[i] = 0;
18705 }
18706 let src = n - m - 1 + f;
18707 for i in 0..zero_count {
18708 sa[src + i] = 10_000 + i as SaSint;
18709 }
18710
18711 let mut single = sa.clone();
18712 let mut threaded = sa;
18713 let mut thread_state = alloc_thread_state(4).unwrap();
18714 merge_nonunique_lms_suffixes_32s_omp(
18715 &mut single,
18716 n as SaSint,
18717 m as SaSint,
18718 f as SaSint,
18719 1,
18720 &mut [],
18721 );
18722 merge_nonunique_lms_suffixes_32s_omp(
18723 &mut threaded,
18724 n as SaSint,
18725 m as SaSint,
18726 f as SaSint,
18727 4,
18728 &mut thread_state,
18729 );
18730
18731 assert_eq!(threaded, single);
18732 }
18733
18734 #[test]
18735 fn merge_compacted_lms_suffixes_32s_omp_uses_block_partition_for_large_inputs() {
18736 let n = 131_200usize;
18737 let m = 65_600usize;
18738 let mut t = vec![1; n];
18739 for i in (0..n).step_by(257) {
18740 t[i] = SAINT_MIN | ((i % 251) as SaSint);
18741 }
18742 let f = t.iter().filter(|&&value| value < 0).count();
18743
18744 let mut sa = vec![1; n];
18745 let zero_count = (0..m).filter(|i| i % 17 == 0).count();
18746 for i in (0..m).step_by(17) {
18747 sa[i] = 0;
18748 }
18749 let unique_src = n - m - 1;
18750 for i in 0..f {
18751 sa[unique_src + i] = i as SaSint;
18752 }
18753 for i in 0..zero_count {
18754 sa[unique_src + f + i] = 10_000 + i as SaSint;
18755 }
18756
18757 let mut single_t = t.clone();
18758 let mut single_sa = sa.clone();
18759 let mut threaded_t = t;
18760 let mut threaded_sa = sa;
18761 let mut single_state = alloc_thread_state(1).unwrap();
18762 let mut threaded_state = alloc_thread_state(4).unwrap();
18763 merge_compacted_lms_suffixes_32s_omp(
18764 &mut single_t,
18765 &mut single_sa,
18766 n as SaSint,
18767 m as SaSint,
18768 f as SaSint,
18769 1,
18770 &mut single_state,
18771 );
18772 merge_compacted_lms_suffixes_32s_omp(
18773 &mut threaded_t,
18774 &mut threaded_sa,
18775 n as SaSint,
18776 m as SaSint,
18777 f as SaSint,
18778 4,
18779 &mut threaded_state,
18780 );
18781
18782 assert_eq!(threaded_t, single_t);
18783 assert_eq!(threaded_sa, single_sa);
18784 }
18785
18786 #[test]
18787 fn bwt_copy_8u_copies_low_bytes_from_suffix_array_storage() {
18788 let a = vec![65, 255, 256, -1];
18789 let mut u = vec![0_u8; 4];
18790
18791 bwt_copy_8u(&mut u, &a, 4);
18792
18793 assert_eq!(u, vec![65, 255, 0, 255]);
18794 }
18795
18796 #[test]
18797 fn bwt_copy_8u_omp_matches_sequential_copy() {
18798 let a = vec![1, 2, 3, 4, 5];
18799 let mut u = vec![0_u8; 5];
18800
18801 bwt_copy_8u_omp(&mut u, &a, 5, 4);
18802
18803 assert_eq!(u, vec![1, 2, 3, 4, 5]);
18804 }
18805
18806 #[test]
18807 fn bwt_copy_8u_omp_uses_block_partition_for_large_inputs() {
18808 let n = 65_600usize;
18809 let a: Vec<SaSint> = (0..n).map(|i| (i * 17) as SaSint).collect();
18810 let mut threaded = vec![0; n];
18811 let mut sequential = vec![0; n];
18812
18813 bwt_copy_8u_omp(&mut threaded, &a, n as SaSint, 4);
18814 bwt_copy_8u(&mut sequential, &a, n as SaSint);
18815
18816 assert_eq!(threaded, sequential);
18817 }
18818
18819 #[test]
18820 fn plcp_lcp_omp_wrappers_match_single_thread_on_large_inputs() {
18821 let n = 65_600usize;
18822 let text: Vec<u8> = (0..n).map(|i| (1 + (i % 251)) as u8).collect();
18823 let sa: Vec<SaSint> = (0..n as SaSint).collect();
18824
18825 let mut plcp_single = vec![0; n];
18826 let mut plcp_threaded = vec![0; n];
18827 compute_phi_omp(&sa, &mut plcp_single, n as SaSint, 1);
18828 compute_phi_omp(&sa, &mut plcp_threaded, n as SaSint, 4);
18829 assert_eq!(plcp_threaded, plcp_single);
18830
18831 compute_plcp_omp(&text, &mut plcp_single, n as SaSint, 1);
18832 compute_plcp_omp(&text, &mut plcp_threaded, n as SaSint, 4);
18833 assert_eq!(plcp_threaded, plcp_single);
18834
18835 let mut lcp_single = vec![0; n];
18836 let mut lcp_threaded = vec![0; n];
18837 compute_lcp_omp(&plcp_single, &sa, &mut lcp_single, n as SaSint, 1);
18838 compute_lcp_omp(&plcp_threaded, &sa, &mut lcp_threaded, n as SaSint, 4);
18839 assert_eq!(lcp_threaded, lcp_single);
18840 }
18841
18842 #[test]
18843 fn count_and_gather_lms_suffixes_8u_omp_preserves_sequential_wrapper_behavior() {
18844 let t = vec![2_u8, 1, 3, 1, 0];
18845 let mut sa = vec![0; t.len()];
18846 let mut buckets = vec![0; 4 * ALPHABET_SIZE];
18847 let mut thread_state = alloc_thread_state(2).unwrap();
18848 let m = count_and_gather_lms_suffixes_8u_omp(
18849 &t,
18850 &mut sa,
18851 t.len() as SaSint,
18852 &mut buckets,
18853 2,
18854 &mut thread_state,
18855 );
18856 assert_eq!(m, 1);
18857 assert_eq!(buckets.iter().sum::<SaSint>(), t.len() as SaSint);
18858 }
18859
18860 #[test]
18861 fn count_and_gather_lms_suffixes_8u_omp_uses_block_partition_for_large_inputs() {
18862 let n = 65_600usize;
18863 let text: Vec<u8> = (0..n)
18864 .map(|i| 1 + ((i * 37 + i / 17) % 251) as u8)
18865 .collect();
18866
18867 let mut sa_threaded = vec![-99; n];
18868 let mut sa_scalar = vec![-99; n];
18869 let mut buckets_threaded = vec![0; 4 * ALPHABET_SIZE];
18870 let mut buckets_scalar = vec![0; 4 * ALPHABET_SIZE];
18871 let mut thread_state = alloc_thread_state(4).unwrap();
18872
18873 let m_threaded = count_and_gather_lms_suffixes_8u_omp(
18874 &text,
18875 &mut sa_threaded,
18876 n as SaSint,
18877 &mut buckets_threaded,
18878 4,
18879 &mut thread_state,
18880 );
18881 let m_scalar = count_and_gather_lms_suffixes_8u(
18882 &text,
18883 &mut sa_scalar,
18884 n as SaSint,
18885 &mut buckets_scalar,
18886 0,
18887 n as FastSint,
18888 );
18889
18890 assert_eq!(m_threaded, m_scalar);
18891 assert_eq!(
18892 &sa_threaded[n - m_threaded as usize..],
18893 &sa_scalar[n - m_scalar as usize..]
18894 );
18895 assert_eq!(buckets_threaded, buckets_scalar);
18896 }
18897
18898 #[test]
18899 fn gather_lms_suffixes_8u_omp_uses_thread_state_for_large_inputs() {
18900 let n = 65_600usize;
18901 let text: Vec<u8> = (0..n)
18902 .map(|i| 1 + ((i * 37 + i / 17) % 251) as u8)
18903 .collect();
18904 let mut thread_state = alloc_thread_state(4).unwrap();
18905 let mut count_sa = vec![-99; n];
18906 let mut buckets = vec![0; 4 * ALPHABET_SIZE];
18907 let m = count_and_gather_lms_suffixes_8u_omp(
18908 &text,
18909 &mut count_sa,
18910 n as SaSint,
18911 &mut buckets,
18912 4,
18913 &mut thread_state,
18914 );
18915
18916 let mut threaded = vec![-99; n];
18917 let mut scalar = vec![-99; n];
18918 gather_lms_suffixes_8u_omp(&text, &mut threaded, n as SaSint, 4, &mut thread_state);
18919 gather_lms_suffixes_8u(
18920 &text,
18921 &mut scalar,
18922 n as SaSint,
18923 n as FastSint - 1,
18924 0,
18925 n as FastSint,
18926 );
18927
18928 assert_eq!(&threaded[n - m as usize..], &scalar[n - m as usize..]);
18929 }
18930
18931 #[test]
18932 fn count_and_gather_lms_suffixes_32s_4k_updates_counts_and_suffixes() {
18933 let t = vec![2, 1, 3, 1, 0];
18934 let mut sa = vec![0; t.len()];
18935 let mut buckets = vec![0; 4 * 4];
18936 let m = count_and_gather_lms_suffixes_32s_4k(
18937 &t,
18938 &mut sa,
18939 t.len() as SaSint,
18940 4,
18941 &mut buckets,
18942 0,
18943 t.len() as FastSint,
18944 );
18945 assert!(m >= 0);
18946 assert_eq!(buckets.iter().sum::<SaSint>(), t.len() as SaSint);
18947 }
18948
18949 #[test]
18950 fn count_and_gather_lms_suffixes_32s_2k_updates_counts_and_suffixes() {
18951 let t = vec![2, 1, 3, 1, 0];
18952 let mut sa = vec![0; t.len()];
18953 let mut buckets = vec![0; 2 * 4];
18954 let m = count_and_gather_lms_suffixes_32s_2k(
18955 &t,
18956 &mut sa,
18957 t.len() as SaSint,
18958 4,
18959 &mut buckets,
18960 0,
18961 t.len() as FastSint,
18962 );
18963 assert!(m >= 0);
18964 assert_eq!(buckets.iter().sum::<SaSint>(), t.len() as SaSint);
18965 }
18966
18967 #[test]
18968 fn count_and_gather_compacted_lms_suffixes_32s_2k_updates_counts_and_suffixes() {
18969 let t = vec![2, SAINT_MIN | 1, 3, 1, 0];
18970 let mut sa = vec![0; t.len()];
18971 let mut buckets = vec![0; 2 * 4];
18972 let m = count_and_gather_compacted_lms_suffixes_32s_2k(
18973 &t,
18974 &mut sa,
18975 t.len() as SaSint,
18976 4,
18977 &mut buckets,
18978 0,
18979 t.len() as FastSint,
18980 );
18981 assert!(m >= 0);
18982 assert_eq!(buckets.iter().sum::<SaSint>(), t.len() as SaSint);
18983 }
18984
18985 #[test]
18986 fn count_and_gather_lms_suffixes_32s_4k_nofs_omp_wraps_sequential_version() {
18987 let t = vec![2, 1, 3, 1, 0];
18988 let mut sa = vec![0; t.len()];
18989 let mut buckets = vec![0; 4 * 4];
18990 let m = count_and_gather_lms_suffixes_32s_4k_nofs_omp(
18991 &t,
18992 &mut sa,
18993 t.len() as SaSint,
18994 4,
18995 &mut buckets,
18996 2,
18997 );
18998 assert!(m >= 0);
18999 assert_eq!(buckets.iter().sum::<SaSint>(), t.len() as SaSint);
19000 }
19001
19002 #[test]
19003 fn count_and_gather_lms_suffixes_32s_2k_nofs_omp_wraps_sequential_version() {
19004 let t = vec![2, 1, 3, 1, 0];
19005 let mut sa = vec![0; t.len()];
19006 let mut buckets = vec![0; 2 * 4];
19007 let m = count_and_gather_lms_suffixes_32s_2k_nofs_omp(
19008 &t,
19009 &mut sa,
19010 t.len() as SaSint,
19011 4,
19012 &mut buckets,
19013 2,
19014 );
19015 assert!(m >= 0);
19016 assert_eq!(buckets.iter().sum::<SaSint>(), t.len() as SaSint);
19017 }
19018
19019 #[test]
19020 fn count_and_gather_compacted_lms_suffixes_32s_2k_nofs_omp_wraps_sequential_version() {
19021 let t = vec![2, SAINT_MIN | 1, 3, 1, 0];
19022 let mut sa = vec![0; t.len()];
19023 let mut buckets = vec![0; 2 * 4];
19024 let m = count_and_gather_compacted_lms_suffixes_32s_2k_nofs_omp(
19025 &t,
19026 &mut sa,
19027 t.len() as SaSint,
19028 4,
19029 &mut buckets,
19030 2,
19031 );
19032 assert!(m >= 0);
19033 assert_eq!(buckets.iter().sum::<SaSint>(), t.len() as SaSint);
19034 }
19035
19036 #[test]
19037 fn count_and_gather_lms_suffixes_32s_nofs_omp_uses_large_input_paths() {
19038 let n = 65_600usize;
19039 let k = 257usize;
19040 let text: Vec<SaSint> = (0..n)
19041 .map(|i| 1 + ((i * 37 + i / 17) % (k - 1)) as SaSint)
19042 .collect();
19043
19044 let mut sa_threaded = vec![-99; n];
19045 let mut sa_scalar = vec![-99; n];
19046 let mut buckets_threaded = vec![0; 4 * k];
19047 let mut buckets_scalar = vec![0; 4 * k];
19048 let m_threaded = count_and_gather_lms_suffixes_32s_4k_nofs_omp(
19049 &text,
19050 &mut sa_threaded,
19051 n as SaSint,
19052 k as SaSint,
19053 &mut buckets_threaded,
19054 4,
19055 );
19056 let m_scalar = count_and_gather_lms_suffixes_32s_4k(
19057 &text,
19058 &mut sa_scalar,
19059 n as SaSint,
19060 k as SaSint,
19061 &mut buckets_scalar,
19062 0,
19063 n as FastSint,
19064 );
19065 assert_eq!(m_threaded, m_scalar);
19066 assert_eq!(
19067 &sa_threaded[n - m_threaded as usize..],
19068 &sa_scalar[n - m_scalar as usize..]
19069 );
19070 assert_eq!(buckets_threaded, buckets_scalar);
19071
19072 let mut sa_threaded = vec![-99; n];
19073 let mut sa_scalar = vec![-99; n];
19074 let mut buckets_threaded = vec![0; 2 * k];
19075 let mut buckets_scalar = vec![0; 2 * k];
19076 let m_threaded = count_and_gather_lms_suffixes_32s_2k_nofs_omp(
19077 &text,
19078 &mut sa_threaded,
19079 n as SaSint,
19080 k as SaSint,
19081 &mut buckets_threaded,
19082 4,
19083 );
19084 let m_scalar = count_and_gather_lms_suffixes_32s_2k(
19085 &text,
19086 &mut sa_scalar,
19087 n as SaSint,
19088 k as SaSint,
19089 &mut buckets_scalar,
19090 0,
19091 n as FastSint,
19092 );
19093 assert_eq!(m_threaded, m_scalar);
19094 assert_eq!(
19095 &sa_threaded[n - m_threaded as usize..],
19096 &sa_scalar[n - m_scalar as usize..]
19097 );
19098 assert_eq!(buckets_threaded, buckets_scalar);
19099 }
19100
19101 #[test]
19102 fn count_and_gather_lms_suffixes_32s_fs_omp_uses_large_input_paths() {
19103 let n = 65_600usize;
19104 let k = 257usize;
19105 let text: Vec<SaSint> = (0..n)
19106 .map(|i| 1 + ((i * 37 + i / 17) % (k - 1)) as SaSint)
19107 .collect();
19108 let mut thread_state = alloc_thread_state(4).unwrap();
19109
19110 let mut sa_threaded = vec![-99; n];
19111 let mut sa_scalar = vec![-99; n];
19112 let mut buckets_threaded = vec![0; 4 * k];
19113 let mut buckets_scalar = vec![0; 4 * k];
19114 let m_threaded = count_and_gather_lms_suffixes_32s_4k_fs_omp(
19115 &text,
19116 &mut sa_threaded,
19117 n as SaSint,
19118 k as SaSint,
19119 &mut buckets_threaded,
19120 0,
19121 4,
19122 &mut thread_state,
19123 );
19124 let m_scalar = count_and_gather_lms_suffixes_32s_4k(
19125 &text,
19126 &mut sa_scalar,
19127 n as SaSint,
19128 k as SaSint,
19129 &mut buckets_scalar,
19130 0,
19131 n as FastSint,
19132 );
19133 assert_eq!(m_threaded, m_scalar);
19134 assert_eq!(
19135 &sa_threaded[n - m_threaded as usize..],
19136 &sa_scalar[n - m_scalar as usize..]
19137 );
19138 assert_eq!(buckets_threaded, buckets_scalar);
19139
19140 let mut sa_threaded = vec![-99; n];
19141 let mut sa_scalar = vec![-99; n];
19142 let mut buckets_threaded = vec![0; 2 * k];
19143 let mut buckets_scalar = vec![0; 2 * k];
19144 let m_threaded = count_and_gather_lms_suffixes_32s_2k_fs_omp(
19145 &text,
19146 &mut sa_threaded,
19147 n as SaSint,
19148 k as SaSint,
19149 &mut buckets_threaded,
19150 0,
19151 4,
19152 &mut thread_state,
19153 );
19154 let m_scalar = count_and_gather_lms_suffixes_32s_2k(
19155 &text,
19156 &mut sa_scalar,
19157 n as SaSint,
19158 k as SaSint,
19159 &mut buckets_scalar,
19160 0,
19161 n as FastSint,
19162 );
19163 assert_eq!(m_threaded, m_scalar);
19164 assert_eq!(
19165 &sa_threaded[n - m_threaded as usize..],
19166 &sa_scalar[n - m_scalar as usize..]
19167 );
19168 assert_eq!(buckets_threaded, buckets_scalar);
19169 }
19170
19171 #[test]
19172 fn count_and_gather_compacted_lms_suffixes_32s_nofs_omp_uses_large_input_path() {
19173 let n = 65_600usize;
19174 let k = 257usize;
19175 let text: Vec<SaSint> = (0..n)
19176 .map(|i| {
19177 let value = 1 + ((i * 37 + i / 17) % (k - 1)) as SaSint;
19178 if i % 19 == 0 {
19179 value | SAINT_MIN
19180 } else {
19181 value
19182 }
19183 })
19184 .collect();
19185
19186 let mut sa_threaded = vec![-99; n];
19187 let mut sa_split = vec![-99; n];
19188 let mut buckets_threaded = vec![0; 2 * k];
19189 let mut buckets_split = vec![0; 2 * k];
19190 let m_threaded = count_and_gather_compacted_lms_suffixes_32s_2k_nofs_omp(
19191 &text,
19192 &mut sa_threaded,
19193 n as SaSint,
19194 k as SaSint,
19195 &mut buckets_threaded,
19196 4,
19197 );
19198 count_compacted_lms_suffixes_32s_2k(&text, n as SaSint, k as SaSint, &mut buckets_split);
19199 let m_split = gather_compacted_lms_suffixes_32s(&text, &mut sa_split, n as SaSint);
19200
19201 assert_eq!(m_threaded, m_split);
19202 assert_eq!(
19203 &sa_threaded[n - m_threaded as usize..],
19204 &sa_split[n - m_split as usize..]
19205 );
19206 assert_eq!(buckets_threaded, buckets_split);
19207 }
19208
19209 #[test]
19210 fn count_and_gather_compacted_lms_suffixes_32s_fs_omp_uses_large_input_path() {
19211 let n = 65_600usize;
19212 let k = 257usize;
19213 let text: Vec<SaSint> = (0..n)
19214 .map(|i| {
19215 let value = 1 + ((i * 37 + i / 17) % (k - 1)) as SaSint;
19216 if i % 19 == 0 {
19217 value | SAINT_MIN
19218 } else {
19219 value
19220 }
19221 })
19222 .collect();
19223
19224 let mut sa_threaded = vec![-99; 2 * n];
19225 let mut sa_scalar = vec![-99; n];
19226 let mut buckets_threaded = vec![0; 2 * k];
19227 let mut buckets_scalar = vec![0; 2 * k];
19228 let mut thread_state = alloc_thread_state(4).unwrap();
19229 count_and_gather_compacted_lms_suffixes_32s_2k_fs_omp(
19230 &text,
19231 &mut sa_threaded,
19232 n as SaSint,
19233 k as SaSint,
19234 &mut buckets_threaded,
19235 0,
19236 4,
19237 &mut thread_state,
19238 );
19239 let m_scalar = count_and_gather_compacted_lms_suffixes_32s_2k(
19240 &text,
19241 &mut sa_scalar,
19242 n as SaSint,
19243 k as SaSint,
19244 &mut buckets_scalar,
19245 0,
19246 n as FastSint,
19247 );
19248
19249 assert_eq!(
19250 &sa_threaded[n - m_scalar as usize..n],
19251 &sa_scalar[n - m_scalar as usize..]
19252 );
19253 assert_eq!(buckets_threaded, buckets_scalar);
19254 }
19255
19256 #[test]
19257 fn accumulate_counts_helpers_match_prefix_bucket_addition() {
19258 let mut bucket00 = vec![4, 5, 6];
19259 let bucket01 = vec![1, 2, 3];
19260 let bucket02 = vec![7, 8, 9];
19261 let bucket03 = vec![10, 11, 12];
19262 let bucket04 = vec![13, 14, 15];
19263 let bucket05 = vec![16, 17, 18];
19264 let bucket06 = vec![19, 20, 21];
19265 let bucket07 = vec![22, 23, 24];
19266 let bucket08 = vec![25, 26, 27];
19267
19268 accumulate_counts_s32_2(&mut bucket00, &bucket01);
19269 assert_eq!(bucket00, vec![5, 7, 9]);
19270
19271 accumulate_counts_s32_3(&mut bucket00, &bucket01, &bucket02);
19272 assert_eq!(bucket00, vec![13, 17, 21]);
19273
19274 accumulate_counts_s32_4(&mut bucket00, &bucket01, &bucket02, &bucket03);
19275 assert_eq!(bucket00, vec![31, 38, 45]);
19276
19277 accumulate_counts_s32_5(&mut bucket00, &bucket01, &bucket02, &bucket03, &bucket04);
19278 assert_eq!(bucket00, vec![62, 73, 84]);
19279
19280 accumulate_counts_s32_6(
19281 &mut bucket00,
19282 &bucket01,
19283 &bucket02,
19284 &bucket03,
19285 &bucket04,
19286 &bucket05,
19287 );
19288 assert_eq!(bucket00, vec![109, 125, 141]);
19289
19290 accumulate_counts_s32_7(
19291 &mut bucket00,
19292 &bucket01,
19293 &bucket02,
19294 &bucket03,
19295 &bucket04,
19296 &bucket05,
19297 &bucket06,
19298 );
19299 assert_eq!(bucket00, vec![175, 197, 219]);
19300
19301 accumulate_counts_s32_8(
19302 &mut bucket00,
19303 &bucket01,
19304 &bucket02,
19305 &bucket03,
19306 &bucket04,
19307 &bucket05,
19308 &bucket06,
19309 &bucket07,
19310 );
19311 assert_eq!(bucket00, vec![263, 292, 321]);
19312
19313 accumulate_counts_s32_9(
19314 &mut bucket00,
19315 &bucket01,
19316 &bucket02,
19317 &bucket03,
19318 &bucket04,
19319 &bucket05,
19320 &bucket06,
19321 &bucket07,
19322 &bucket08,
19323 );
19324 assert_eq!(bucket00, vec![376, 413, 450]);
19325 }
19326
19327 #[test]
19328 fn accumulate_counts_s32_matches_c_dispatch_for_small_bucket_counts() {
19329 let mut buckets = vec![1, 2, 3, 4, 5, 6, 7, 8];
19330 accumulate_counts_s32(&mut buckets, 2, 2, 4);
19331 assert_eq!(buckets, vec![1, 2, 3, 4, 5, 6, 16, 20]);
19332 }
19333
19334 #[test]
19335 fn accumulate_counts_s32_matches_c_dispatch_for_nine_buckets() {
19336 let mut buckets = vec![
19337 1, 10, 2, 20, 3, 30, 4, 40, 5, 50, 6, 60, 7, 70, 8, 80, 9, 90,
19338 ];
19339 accumulate_counts_s32(&mut buckets, 2, 2, 9);
19340 assert_eq!(
19341 buckets,
19342 vec![1, 10, 2, 20, 3, 30, 4, 40, 5, 50, 6, 60, 7, 70, 8, 80, 45, 450]
19343 );
19344 }
19345
19346 #[test]
19347 fn accumulate_counts_s32_matches_c_chunked_nine_then_tail_behavior() {
19348 let mut buckets = (1..=11).collect::<Vec<SaSint>>();
19349 accumulate_counts_s32(&mut buckets, 1, 1, 11);
19350 assert_eq!(buckets, vec![1, 2, 3, 4, 5, 6, 7, 8, 45, 10, 66]);
19351 }
19352}