saxx/
lib.rs

1extern crate libc;
2
3
4use std::collections::HashMap;
5use std::result;
6use std::vec::Vec;
7pub use libc::c_int;
8
9
10/// Raw bindings to C functions `esaxx_*()`.
11extern "C" {
12    pub fn esaxx_c8i8  (t: *const  u8, sa: *mut  i8, l: *mut  i8, r: *mut  i8, d: *mut  i8, n:  i8, k:  i8, m: *mut  i8) -> c_int;
13    pub fn esaxx_c8i16 (t: *const  u8, sa: *mut i16, l: *mut i16, r: *mut i16, d: *mut i16, n: i16, k: i16, m: *mut i16) -> c_int;
14    pub fn esaxx_c8i32 (t: *const  u8, sa: *mut i32, l: *mut i32, r: *mut i32, d: *mut i32, n: i32, k: i32, m: *mut i32) -> c_int;
15    pub fn esaxx_c8i64 (t: *const  u8, sa: *mut i64, l: *mut i64, r: *mut i64, d: *mut i64, n: i64, k: i64, m: *mut i64) -> c_int;
16    pub fn esaxx_c16i8 (t: *const u16, sa: *mut  i8, l: *mut  i8, r: *mut  i8, d: *mut  i8, n:  i8, k:  i8, m: *mut  i8) -> c_int;
17    pub fn esaxx_c16i16(t: *const u16, sa: *mut i16, l: *mut i16, r: *mut i16, d: *mut i16, n: i16, k: i16, m: *mut i16) -> c_int;
18    pub fn esaxx_c16i32(t: *const u16, sa: *mut i32, l: *mut i32, r: *mut i32, d: *mut i32, n: i32, k: i32, m: *mut i32) -> c_int;
19    pub fn esaxx_c16i64(t: *const u16, sa: *mut i64, l: *mut i64, r: *mut i64, d: *mut i64, n: i64, k: i64, m: *mut i64) -> c_int;
20    pub fn esaxx_c32i8 (t: *const u32, sa: *mut  i8, l: *mut  i8, r: *mut  i8, d: *mut  i8, n:  i8, k:  i8, m: *mut  i8) -> c_int;
21    pub fn esaxx_c32i16(t: *const u32, sa: *mut i16, l: *mut i16, r: *mut i16, d: *mut i16, n: i16, k: i16, m: *mut i16) -> c_int;
22    pub fn esaxx_c32i32(t: *const u32, sa: *mut i32, l: *mut i32, r: *mut i32, d: *mut i32, n: i32, k: i32, m: *mut i32) -> c_int;
23    pub fn esaxx_c32i64(t: *const u32, sa: *mut i64, l: *mut i64, r: *mut i64, d: *mut i64, n: i64, k: i64, m: *mut i64) -> c_int;
24    pub fn esaxx_c64i8 (t: *const u64, sa: *mut  i8, l: *mut  i8, r: *mut  i8, d: *mut  i8, n:  i8, k:  i8, m: *mut  i8) -> c_int;
25    pub fn esaxx_c64i16(t: *const u64, sa: *mut i16, l: *mut i16, r: *mut i16, d: *mut i16, n: i16, k: i16, m: *mut i16) -> c_int;
26    pub fn esaxx_c64i32(t: *const u64, sa: *mut i32, l: *mut i32, r: *mut i32, d: *mut i32, n: i32, k: i32, m: *mut i32) -> c_int;
27    pub fn esaxx_c64i64(t: *const u64, sa: *mut i64, l: *mut i64, r: *mut i64, d: *mut i64, n: i64, k: i64, m: *mut i64) -> c_int;
28}
29
30
31/// An error type for representing error values returned from the underlying C++ function `esaxx()`.
32///
33/// A value of this type can be either -1 or -2.
34pub type Error = i8;
35
36/// A specialized `Result` type with the error type `saxx::Error`.
37pub type Result<T> = result::Result<T, Error>;
38
39/// Representation of a computed enhanced suffix array.
40#[derive(PartialEq, Eq, Clone, Debug)]
41pub struct Esa<T> {
42    pub sa: Vec<T>,
43    pub l: Vec<T>,
44    pub r: Vec<T>,
45    pub d: Vec<T>,
46    pub m: T,
47}
48
49
50impl<Index> Esa<Index> {
51    pub fn from_slice<T>(t: &[T]) -> Result<Esa<Index>>
52        where [T]: Esaxx<Index>
53    {
54        Esaxx::<Index>::esaxx(t)
55    }
56}
57
58
59/// A type that can be passed to `esaxx()`.
60pub trait Esaxx<Index> {
61    fn esaxx(t: &Self) -> Result<Esa<Index>>;
62}
63
64
65macro_rules! impl_esaxx_for_slice {
66    ( $char: ty, $index: ty, $esaxx_fn: expr ) => {
67        impl Esaxx<$index> for [$char] {
68            fn esaxx(t: &[$char]) -> Result<Esa<$index>> {
69                let length = t.len();
70                let max_char: $char = *t.iter().max().unwrap_or(&0);
71
72                assert!(length <= <$index>::max_value() as usize);
73                assert!((max_char as u64) < <$index>::max_value() as u64);
74
75                // These conversions are safe because of the above assertions
76                let n: $index = length as $index;
77                let k: $index = max_char as $index + 1;
78
79                let mut sa: Vec<$index> = Vec::new();
80                let mut  l: Vec<$index> = Vec::new();
81                let mut  r: Vec<$index> = Vec::new();
82                let mut  d: Vec<$index> = Vec::new();
83                sa.resize(n as usize, 0);
84                l.resize(n as usize, 0);
85                r.resize(n as usize, 0);
86                d.resize(n as usize, 0);
87
88                let mut  m: $index = 0;
89
90                let ret = unsafe {
91                    $esaxx_fn(t.as_ptr(),
92                              sa.as_mut_ptr(),
93                              l.as_mut_ptr(),
94                              r.as_mut_ptr(),
95                              d.as_mut_ptr(),
96                              n as $index,
97                              k as $index,
98                              &mut m as *mut $index) as Error
99                };
100
101                if ret == 0 {
102                    Ok(Esa { sa, l, r, d, m })
103                }
104                else {
105                    Err(ret)
106                }
107            }
108        }
109    };
110}
111
112impl_esaxx_for_slice!( u8,  i8, esaxx_c8i8);
113impl_esaxx_for_slice!( u8, i16, esaxx_c8i16);
114impl_esaxx_for_slice!( u8, i32, esaxx_c8i32);
115impl_esaxx_for_slice!( u8, i64, esaxx_c8i64);
116impl_esaxx_for_slice!(u16,  i8, esaxx_c16i8);
117impl_esaxx_for_slice!(u16, i16, esaxx_c16i16);
118impl_esaxx_for_slice!(u16, i32, esaxx_c16i32);
119impl_esaxx_for_slice!(u16, i64, esaxx_c16i64);
120impl_esaxx_for_slice!(u32,  i8, esaxx_c32i8);
121impl_esaxx_for_slice!(u32, i16, esaxx_c32i16);
122impl_esaxx_for_slice!(u32, i32, esaxx_c32i32);
123impl_esaxx_for_slice!(u32, i64, esaxx_c32i64);
124impl_esaxx_for_slice!(u64,  i8, esaxx_c64i8);
125impl_esaxx_for_slice!(u64, i16, esaxx_c64i16);
126impl_esaxx_for_slice!(u64, i32, esaxx_c64i32);
127impl_esaxx_for_slice!(u64, i64, esaxx_c64i64);
128
129
130pub fn compact(s: &[u8]) -> (u8, Vec<u8>) {
131    let mut map: HashMap<u8, u8> = HashMap::new();
132    let mut t: Vec<u8> = Vec::new();
133    for &chr in s.iter() {
134        let next_chr: u8 = map.len() as u8;
135        let new_chr: u8 = {
136            *map.entry(chr).or_insert(next_chr)
137        };
138        t.push(new_chr);
139    }
140    (map.len() as u8, t)
141}
142
143
144#[cfg(test)]
145mod tests {
146    use super::*;
147
148    #[test]
149    fn esaxx_compact() {
150        let input: &[u8] = b"aabbccaabbcc".as_ref();
151        let result1 = Esaxx::<i8>::esaxx(input);
152        let result2 = Esaxx::<i8>::esaxx(compact(input).1.as_slice());
153        assert_eq!(result1, result2);
154    }
155
156    #[test]
157    fn esa_from_slice() {
158        let input: &[u8] = b"mississippi".as_ref();
159        let result1 = Esaxx::<i8>::esaxx(input);
160        let result2: Result<Esa<i8>> = Esa::from_slice(input);
161        assert_eq!(result1, result2);
162    }
163
164    #[test]
165    fn compact_aabbccaabbcc() {
166        let (k, t) = compact(b"aabbccaabbcc");
167        assert_eq!(k, 3);
168        assert_eq!(t, [0, 0, 1, 1, 2, 2, 0, 0, 1, 1, 2, 2]);
169    }
170}