1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
//! The pSACAK suffix sorting algorithm for huge in-memory data on multicore machines. //! //! The algorithm is described in this [paper](https://doi.org/10.1109/TC.2018.2842050). //! //! # Examples //! //! Create new suffix array. //! //! ```rust //! use psacak::psacak; //! //! let text = b"mississippi"; //! let suf = psacak(text); //! //! assert_eq!(suf, vec![10, 7, 4, 1, 0, 9, 8, 6, 3, 5, 2]); //! ``` //! //! Compute the suffix array in a slice. //! //! ```rust //! use psacak::psacak_inplace; //! //! let text = b"mississippi"; //! let mut suf = vec![0; text.len()]; //! psacak_inplace(text, &mut suf[..]); //! //! assert_eq!(suf, vec![10, 7, 4, 1, 0, 9, 8, 6, 3, 5, 2]); //! ``` //! //! Note that the position of sentinel is not presented in the suffix array calculated by pSACAK. //! //! To obtain a suffix array which included the position of sentinel, you can try this: //! //! ```rust //! use psacak::psacak_inplace; //! //! let text = b"mississippi"; //! let mut suf = vec![0; text.len() + 1]; //! suf[0] = text.len() as u32; //! psacak_inplace(text, &mut suf[1..]); //! //! assert_eq!(suf, vec![11, 10, 7, 4, 1, 0, 9, 8, 6, 3, 5, 2]); //! ``` #[cfg(test)] #[macro_use] extern crate quickcheck_macros; #[macro_use] extern crate cfg_if; mod common; mod naming; mod pipeline; mod psacak32; mod psacak8; mod types; use psacak8::psacak8; /// The pSACAK suffix sorting algorithm. /// /// It would panic if the length of text has exceeded the limitation of 32-bit integer. pub fn psacak(text: &[u8]) -> Vec<u32> { assert!(text.len() < std::u32::MAX as usize); let mut suf = vec![0; text.len()]; psacak8(text, &mut suf[..]); suf } /// In-place version of the pSACAK suffix sorting algorithm. /// /// If any of the following conditions is met, it would panic: /// * The length of text has exceeded the limitation of 32-bit integer. /// * The length of text is greater than the length of suffix array. /// * Memory layout of the suffix array slice is not aligned. pub fn psacak_inplace(text: &[u8], suf: &mut [u32]) { assert!(text.len() <= suf.len()); assert!(text.len() < std::u32::MAX as usize); let suf = &mut suf[..text.len()]; psacak8(text, suf); }