Skip to main content

caps_sa/
lib.rs

1//! Cache-friendly, parallel, sample-sort-based suffix array construction.
2//!
3//! This crate is a Rust port of [CaPS-SA] (Khan et al., WABI 2023), a parallel
4//! and cache-friendly suffix-array constructor based on sample sort with
5//! LCP-enhanced comparison.
6//!
7//! The crate is generic over the symbol type (`u8`, `u16`, …; any `Ord + Copy`)
8//! and the index type (`u32`, `u64`; via the [`Index`] trait). It produces a
9//! standard lexicographic suffix array. Callers who need a *generalized* suffix
10//! array (multiple strings, sentinel-terminated) should rewrite their text with
11//! distinct sentinels before invoking — the resulting standard SA over the
12//! transformed text is then the generalized SA they want.
13//!
14//! Phase 1 of the port provides the **in-memory** algorithm; the external-memory
15//! variant (disk-spilling buckets) is layered on top in a later phase.
16//!
17//! [CaPS-SA]: https://github.com/jamshed/CaPS-SA
18
19mod ext_bucket;
20mod ext_mem;
21mod lcp;
22mod limits;
23mod sample_sort;
24
25pub use ext_mem::{
26    ExtMemOpts, build_ext_mem, build_ext_mem_for_positions, build_ext_mem_for_positions_with,
27    build_ext_mem_with, build_in_memory_sample_sort, build_in_memory_sample_sort_for_positions,
28    build_in_memory_sample_sort_for_positions_with, build_in_memory_sample_sort_with,
29};
30pub use lcp::{LcpDispatch, Symbol, lcp, lcp_scalar, lcp_u8, suffix_cmp};
31pub use limits::{LimitProvider, PlainText, SegmentedText};
32pub use sample_sort::{
33    Opts, build_in_memory, build_in_memory_for_positions, build_in_memory_for_positions_with,
34    build_in_memory_for_positions_with_opts, build_in_memory_with, build_in_memory_with_opts,
35};
36
37/// Trait implemented by integer types usable as suffix array indices.
38///
39/// Provided for `u32`, `u64`, and `usize`. Callers pick the narrowest type
40/// large enough to address their text.
41pub trait Index:
42    Copy
43    + Eq
44    + Ord
45    + Send
46    + Sync
47    + std::fmt::Debug
48    + std::ops::Add<Output = Self>
49    + std::ops::Sub<Output = Self>
50{
51    /// Convert from `usize`. Panics if the value does not fit.
52    fn from_usize(v: usize) -> Self;
53    /// Convert to `usize`. Lossless for `u32`/`u64`/`usize` on 64-bit targets.
54    fn to_usize(self) -> usize;
55    /// The zero value.
56    fn zero() -> Self;
57}
58
59macro_rules! impl_index {
60    ($t:ty) => {
61        impl Index for $t {
62            #[inline]
63            fn from_usize(v: usize) -> Self {
64                v as $t
65            }
66            #[inline]
67            fn to_usize(self) -> usize {
68                self as usize
69            }
70            #[inline]
71            fn zero() -> Self {
72                0
73            }
74        }
75    };
76}
77
78impl_index!(u32);
79impl_index!(u64);
80impl_index!(usize);