caps_sa/lib.rs
1//! Cache-friendly, parallel, sample-sort-based suffix array construction.
2//!
3//! This crate is a Rust port of [CaPS-SA] (Khan et al., WABI 2023), a parallel
4//! and cache-friendly suffix-array constructor based on sample sort with
5//! LCP-enhanced comparison.
6//!
7//! The crate is generic over the symbol type (`u8`, `u16`, …; any `Ord + Copy`)
8//! and the index type (`u32`, `u64`; via the [`Index`] trait). It produces a
9//! standard lexicographic suffix array. Callers who need a *generalized* suffix
10//! array (multiple strings, sentinel-terminated) should rewrite their text with
11//! distinct sentinels before invoking — the resulting standard SA over the
12//! transformed text is then the generalized SA they want.
13//!
14//! Phase 1 of the port provides the **in-memory** algorithm; the external-memory
15//! variant (disk-spilling buckets) is layered on top in a later phase.
16//!
17//! [CaPS-SA]: https://github.com/jamshed/CaPS-SA
18
19mod ext_bucket;
20mod ext_mem;
21mod lcp;
22mod limits;
23mod sample_sort;
24
25pub use ext_mem::{
26 ExtMemOpts, build_ext_mem, build_ext_mem_for_filter, build_ext_mem_for_filter_with,
27 build_ext_mem_for_positions, build_ext_mem_for_positions_with, build_ext_mem_with,
28 build_in_memory_sample_sort, build_in_memory_sample_sort_for_positions,
29 build_in_memory_sample_sort_for_positions_with, build_in_memory_sample_sort_with,
30};
31pub use lcp::{LcpDispatch, Symbol, lcp, lcp_scalar, lcp_u8, suffix_cmp};
32pub use limits::{LimitProvider, PlainText, SegmentedText};
33pub use sample_sort::{
34 Opts, build_in_memory, build_in_memory_for_positions, build_in_memory_for_positions_with,
35 build_in_memory_for_positions_with_opts, build_in_memory_with, build_in_memory_with_opts,
36};
37
38/// Trait implemented by integer types usable as suffix array indices.
39///
40/// Provided for `u32`, `u64`, and `usize`. Callers pick the narrowest type
41/// large enough to address their text.
42pub trait Index:
43 Copy
44 + Eq
45 + Ord
46 + Send
47 + Sync
48 + std::fmt::Debug
49 + std::ops::Add<Output = Self>
50 + std::ops::Sub<Output = Self>
51{
52 /// Convert from `usize`. Panics if the value does not fit.
53 fn from_usize(v: usize) -> Self;
54 /// Convert to `usize`. Lossless for `u32`/`u64`/`usize` on 64-bit targets.
55 fn to_usize(self) -> usize;
56 /// The zero value.
57 fn zero() -> Self;
58}
59
60macro_rules! impl_index {
61 ($t:ty) => {
62 impl Index for $t {
63 #[inline]
64 fn from_usize(v: usize) -> Self {
65 v as $t
66 }
67 #[inline]
68 fn to_usize(self) -> usize {
69 self as usize
70 }
71 #[inline]
72 fn zero() -> Self {
73 0
74 }
75 }
76 };
77}
78
79impl_index!(u32);
80impl_index!(u64);
81impl_index!(usize);