sdsl/interface/wavelet_trees/
wt_huff.rs

1use crate::meta;
2use crate::{backend::sdsl_c, interface::common::Ptr};
3use anyhow::{format_err, Result};
4
5use crate::interface::common::{self, Code, Id};
6use crate::interface::wavelet_trees::layouts;
7
8/// A Huffman-shaped wavelet tree.
9///
10/// A wavelet tree is built for a vector of characters over the byte alphabet
11/// $\Sigma$. If you need a wavelet tree for a integer alphabet you should
12/// use `sdsl::wavelet_trees::WtInt`.
13/// The wavelet tree $wt$ consists of a tree of bitvectors and provides
14/// three efficient methods:
15///  - The "[]"-operator: `wt[i]` returns the i-th symbol of vector for
16///    which the wavelet tree was build for.
17/// - The rank method: `wt.rank(i,c)` returns the number of occurrences
18///   of symbol $c$ in the prefix [0..i-1] in the vector for which the
19///   wavelet tree was build for.
20/// - The select method: `wt.select(j,c)` returns the index
21///   $i\in [0..\mathrm{len}()-1]$ of the j-th occurrence of symbol $c$.
22///
23/// ## Space complexity
24/// $n H_0 + 2|\Sigma|\log n$ bits, where $n$ is the size
25/// of the vector the wavelet tree was build for.
26///
27/// # Arguments
28/// * `BitVector` - Underlying bitvector structure.
29/// * `RankSupport1` - Rank support for pattern `1` on the bitvector.
30/// * `SelectSupport1` - Select support for pattern `1` on the bitvector.
31/// * `SelectSupport0` - Select support for pattern `0` on the bitvector.
32/// * `TreeStrategy` - Layout of the tree structure in memory.
33///
34/// # References
35/// The idea of using a Huffman shaped wavelet was first mentioned on page 17
36/// of the following technical report:
37///
38/// Veli Mäkinen and Gonzalo Navarro:
39/// "Succinct Suffix Arrays based on Run-Length Encoding.",
40/// <http://swp.dcc.uchile.cl/TR/2005/TR_DCC-2005-004.pdf>
41///
42/// # Example
43///
44/// ```ignore
45/// let bv = sdsl::bit_vector! {1, 1, 0, 1};
46/// let wt = sdsl::wavelet_trees::WtHuff::<>::from_bit_vector(&bv)?;
47///
48/// let result = wt.get_int(1, 3);
49/// let expected = 5;
50/// assert_eq!(result, expected);
51/// ```
52///
53/// For further examples see [here](https://github.com/sdsl-rs/sdsl-rs/blob/master/examples/src/wavelet_trees/wt_huff.rs).
54pub struct WtHuff<
55    'a,
56    BitVector = crate::bit_vectors::BitVector,
57    RankSupport1 = crate::rank_supports::RankSupportV<'a, crate::bit_patterns::P1>,
58    SelectSupport1 = crate::select_supports::SelectSupportMcl<'a, crate::bit_patterns::P1>,
59    SelectSupport0 = crate::select_supports::SelectSupportMcl<'a, crate::bit_patterns::P0>,
60    TreeStrategy = layouts::byte_tree::ByteTree,
61> where
62    BitVector: common::Code,
63    RankSupport1: common::Code,
64    SelectSupport1: common::Code,
65    SelectSupport0: common::Code,
66    TreeStrategy: layouts::common::TreeStrategy + common::Code,
67{
68    // Dummy fields which are never used, always None. Included so that generic parameters are used.
69    _bs: Option<BitVector>,
70    _rs1: &'a Option<RankSupport1>,
71    _ss1: &'a Option<SelectSupport1>,
72    _ss0: &'a Option<SelectSupport0>,
73    _ts: Option<TreeStrategy>,
74
75    ptr: common::VoidPtr,
76    interface: Interface<TreeStrategy::Value, TreeStrategy::Size>,
77}
78
79impl<'a, BitVector, RankSupport1, SelectSupport1, SelectSupport0, TreeStrategy>
80    WtHuff<'a, BitVector, RankSupport1, SelectSupport1, SelectSupport0, TreeStrategy>
81where
82    BitVector: common::Code + 'a,
83    RankSupport1: common::Code,
84    SelectSupport1: common::Code,
85    SelectSupport0: common::Code,
86    TreeStrategy: layouts::common::TreeStrategy + common::Code + 'a,
87{
88    /// Construct a Huffman-shaped wavelet tree from file.
89    /// # Arguments
90    /// * `path` - File path.
91    pub fn from_file(path: &std::path::PathBuf) -> Result<Self> {
92        let path = path
93            .to_str()
94            .ok_or(format_err!("Failed to convert PathBuf into str."))?;
95        let path = std::ffi::CString::new(path)?;
96
97        let id = Self::id()?;
98        let interface = Interface::new(&id)?;
99        let ptr = (interface.from_file)(path.as_ptr());
100        let wt = Self::new(interface, ptr)?;
101        Ok(wt)
102    }
103
104    /// Construct a Huffman-shaped wavelet tree from a string.
105    /// # Arguments
106    /// * `string` - Data string.
107    pub fn from_str(string: &str) -> Result<Self> {
108        let id = Self::id()?;
109        let interface = Interface::new(&id)?;
110        let c_string = std::ffi::CString::new(string)?;
111        let ptr = (interface.from_string)(c_string.as_ptr());
112        let wt = Self::new(interface, ptr)?;
113        Ok(wt)
114    }
115
116    /// Construct a Huffman-shaped wavelet tree from an integer vector.
117    /// # Arguments
118    /// * `int_vector` - Integer vector.
119    pub fn from_int_vector<const WIDTH: u8>(
120        int_vector: &crate::interface::int_vector::IntVector<WIDTH>,
121    ) -> Result<Self> {
122        let id = Self::id()?;
123        let interface = Interface::new(&id)?;
124        let ptr = (interface.from_int_vector)(*int_vector.ptr());
125        let wt = Self::new(interface, ptr)?;
126        Ok(wt)
127    }
128
129    /// Construct a Huffman-shaped wavelet tree from a bit vector.
130    /// # Arguments
131    /// * `bit_vector` - Bitvector.
132    pub fn from_bit_vector(
133        bit_vector: &crate::interface::bit_vectors::bit_vector::BitVector,
134    ) -> Result<Self> {
135        let id = Self::id()?;
136        let interface = Interface::new(&id)?;
137        let ptr = (interface.from_bit_vector)(*bit_vector.ptr());
138        let wt = Self::new(interface, ptr)?;
139        Ok(wt)
140    }
141
142    fn new(
143        interface: Interface<TreeStrategy::Value, TreeStrategy::Size>,
144        ptr: common::VoidPtr,
145    ) -> Result<Self> {
146        Ok(Self {
147            _bs: None,
148            _rs1: &None,
149            _ss1: &None,
150            _ss0: &None,
151            _ts: None,
152
153            ptr,
154            interface,
155        })
156    }
157
158    /// Returns the length of the original vector that was used in constructing the wavelet tree.
159    pub fn len(&self) -> usize {
160        (self.interface.len)(self.ptr)
161    }
162
163    /// Returns true if the wavelet tree contains no data, otherwise returns false.
164    pub fn is_empty(&self) -> bool {
165        (self.interface.is_empty)(self.ptr)
166    }
167
168    /// Get the i-th element of the original vector that was used in constructing the wavelet tree.
169    /// # Arguments
170    /// * `index` - An index in range $ [0, \mathrm{len}()) $.
171    pub fn get(&self, index: usize) -> TreeStrategy::Value {
172        (self.interface.get)(self.ptr, index)
173    }
174
175    /// Returns a count of the given symbol within the prefix $ [0, \mathrm{index}-1] $.
176    ///
177    /// The time complexity is $ \mathcal{O}(H_0) $ on average, where $ H_0 $ is the zero order entropy of the sequence.
178    /// # Arguments
179    /// * `index` - An index in range $ [0, \mathrm{len}()) $.
180    /// * `symbol` - Symbol.
181    pub fn rank(
182        &self,
183        index: TreeStrategy::Size,
184        symbol: TreeStrategy::Value,
185    ) -> TreeStrategy::Size {
186        (self.interface.rank)(self.ptr, index, symbol)
187    }
188
189    /// Returns the symbol `wt[index]` and a count of its occurrences within the prefix $ [0, \mathrm{index}-1] $.
190    ///
191    /// The time complexity is $ \mathcal{O}(H_0) $ on average, where $ H_0 $ is the zero order entropy of the sequence.
192    /// # Arguments
193    /// * `index` - An index in range $ [0, \mathrm{len}()) $.
194    pub fn inverse_select(
195        &self,
196        index: TreeStrategy::Size,
197    ) -> (TreeStrategy::Value, TreeStrategy::Size) {
198        let (rank, symbol) = (self.interface.inverse_select)(self.ptr, index).into();
199        (symbol, rank)
200    }
201
202    /// Returns the index of the i-th occurrence of the given symbol in the supported vector.
203    ///
204    /// The time complexity is $ \mathcal{O}(H_0) $ on average, where $ H_0 $ is the zero order entropy of the sequence.
205    /// Precondition: $ 1 \leq \mathrm{index} \leq \mathrm{rank}(\mathrm{len}(), \mathrm{symbol}) $.
206    /// # Arguments
207    /// * `i` - i-th symbol occurrence.
208    /// * `symbol` - Symbol.
209    pub fn select(&self, i: TreeStrategy::Size, symbol: TreeStrategy::Value) -> TreeStrategy::Size {
210        (self.interface.select)(self.ptr, i, symbol)
211    }
212
213    /// For each symbol c in wt[i..j-1] get rank(i,c) and rank(j,c).
214    ///
215    /// The time complexity is $ \mathcal{O}(\min{\sigma, k \log \sigma}) $
216    /// Precondition:
217    ///
218    /// # Arguments
219    /// * `start_index` - The start index (inclusive) of the interval.
220    /// * `end_index` - The end index (exclusive) of the interval.
221    pub fn interval_symbols(
222        &self,
223        start_index: TreeStrategy::Size,
224        end_index: TreeStrategy::Size,
225    ) -> IntervalSymbols<TreeStrategy::Value, TreeStrategy::Size> {
226        let result = (self.interface.interval_symbols)(self.ptr, start_index, end_index);
227        IntervalSymbols {
228            interval_alphabet_size: result.interval_alphabet_size,
229            interval_symbols: common::array_from_c_array(result.cs, result.length.into()),
230            rank_symbols_lower: common::array_from_c_array(result.rank_c_i, result.length.into()),
231            rank_symbols_upper: common::array_from_c_array(result.rank_c_j, result.length.into()),
232
233            internal_results: result,
234            interface: self.interface.clone(),
235        }
236    }
237
238    /// Returns a count of elements which are lexicographic smaller/greater than `symbol` in [i..j-1].
239    ///
240    /// This method is only available for lex ordered tree strategies.
241    ///
242    /// # Arguments
243    /// * `start_index` - The start index (inclusive) of the interval.
244    /// * `end_index` - The end index (exclusive) of the interval.
245    /// * `symbol` - Symbol.
246    pub fn lex_count(
247        &self,
248        start_index: TreeStrategy::Size,
249        end_index: TreeStrategy::Size,
250        symbol: TreeStrategy::Value,
251    ) -> LexCount {
252        assert!(
253            TreeStrategy::LEX_ORDERED,
254            "TreeStrategy is not lex ordered."
255        );
256        (self.interface.lex_count)(self.ptr, start_index, end_index, symbol)
257    }
258
259    /// Returns a count of symbols which are lexicographic smaller than `symbol` in [0..i-1].
260    ///
261    /// This method is only available for lex ordered tree strategies.
262    ///
263    /// # Arguments
264    /// * `index` - Exclusive right bound of the range.
265    /// * `symbol` - Symbol.
266    pub fn lex_smaller_count(
267        &self,
268        index: TreeStrategy::Size,
269        symbol: TreeStrategy::Value,
270    ) -> LexSmallerCount {
271        assert!(
272            TreeStrategy::LEX_ORDERED,
273            "TreeStrategy is not lex ordered."
274        );
275        (self.interface.lex_smaller_count)(self.ptr, index, symbol)
276    }
277
278    /// For a given symbol returns the next larger or equal symbol in the wavelet tree.
279    /// Returns None if a valid symbol was not found.
280    ///
281    /// # Arguments
282    /// * `symbol` - Symbol.
283    pub fn symbol_gte(&self, symbol: TreeStrategy::Value) -> Option<TreeStrategy::Value> {
284        let result = (self.interface.symbol_gte)(self.ptr, symbol);
285        if result.found {
286            Some(result.symbol)
287        } else {
288            None
289        }
290    }
291
292    /// For a given symbol returns the next lesser or equal symbol in the wavelet tree.
293    /// Returns None if a valid symbol was not found.
294    ///
295    /// # Arguments
296    /// * `symbol` - Symbol.
297    pub fn symbol_lte(&self, symbol: TreeStrategy::Value) -> Option<TreeStrategy::Value> {
298        let result = (self.interface.symbol_lte)(self.ptr, symbol);
299        if result.found {
300            Some(result.symbol)
301        } else {
302            None
303        }
304    }
305
306    /// Returns a count of the number of different symbols in the wavelet tree.
307    pub fn alphabet_size(&self) -> TreeStrategy::Size {
308        (self.interface.alphabet_size)(self.ptr)
309    }
310
311    /// Returns an iterator over the vector that was used in constructing the wavelet tree.
312    pub fn iter(&self) -> common::VectorIterator<TreeStrategy::Value, Self> {
313        common::VectorIterator::new(&self, self.len())
314    }
315}
316
317impl<'a, BitVector, RankSupport1, SelectSupport1, SelectSupport0, TreeStrategy> common::io::IO
318    for WtHuff<'a, BitVector, RankSupport1, SelectSupport1, SelectSupport0, TreeStrategy>
319where
320    BitVector: common::Code,
321    RankSupport1: common::Code,
322    SelectSupport1: common::Code,
323    SelectSupport0: common::Code,
324    TreeStrategy: layouts::common::TreeStrategy + common::Code,
325{
326    fn io(&self) -> &common::io::Interface {
327        &self.interface.io
328    }
329}
330
331impl<'a, BitVector, RankSupport1, SelectSupport1, SelectSupport0, TreeStrategy> common::Ptr
332    for WtHuff<'a, BitVector, RankSupport1, SelectSupport1, SelectSupport0, TreeStrategy>
333where
334    BitVector: common::Code,
335    RankSupport1: common::Code,
336    SelectSupport1: common::Code,
337    SelectSupport0: common::Code,
338    TreeStrategy: layouts::common::TreeStrategy + common::Code,
339{
340    fn ptr(&self) -> &common::VoidPtr {
341        &self.ptr
342    }
343}
344
345impl<'a, BitVector, RankSupport1, SelectSupport1, SelectSupport0, TreeStrategy> common::Id
346    for WtHuff<'a, BitVector, RankSupport1, SelectSupport1, SelectSupport0, TreeStrategy>
347where
348    BitVector: common::Code + 'a,
349    RankSupport1: common::Code + 'a,
350    SelectSupport1: common::Code + 'a,
351    SelectSupport0: common::Code + 'a,
352    TreeStrategy: layouts::common::TreeStrategy + common::Code + 'a,
353{
354    fn id() -> Result<String> {
355        let meta = Box::new(meta::wavelet_trees::wt_huff::WtHuffMeta::new())
356            as Box<dyn meta::common::Meta>;
357        let parameters_c_code = Self::parameters_c_code()?;
358        let id = sdsl_c::specification::get_id(&meta.c_code(&parameters_c_code)?)?;
359        Ok(id)
360    }
361}
362
363impl<'a, BitVector, RankSupport1, SelectSupport1, SelectSupport0, TreeStrategy> common::Code
364    for WtHuff<'a, BitVector, RankSupport1, SelectSupport1, SelectSupport0, TreeStrategy>
365where
366    BitVector: common::Code,
367    RankSupport1: common::Code + 'a,
368    SelectSupport1: common::Code + 'a,
369    SelectSupport0: common::Code + 'a,
370    TreeStrategy: layouts::common::TreeStrategy + common::Code,
371{
372    fn c_code() -> Result<String> {
373        let meta = Box::new(meta::wavelet_trees::wt_huff::WtHuffMeta::new())
374            as Box<dyn meta::common::Meta>;
375        let parameters_c_code = Self::parameters_c_code()?;
376        Ok(meta.c_code(&parameters_c_code)?)
377    }
378
379    fn parameters_c_code() -> Result<Vec<String>> {
380        Ok(vec![
381            BitVector::c_code()?,
382            RankSupport1::c_code()?,
383            SelectSupport1::c_code()?,
384            SelectSupport0::c_code()?,
385            TreeStrategy::c_code()?,
386        ])
387    }
388}
389
390impl<'a, BitVector, RankSupport1, SelectSupport1, SelectSupport0, TreeStrategy>
391    common::IterGet<TreeStrategy::Value>
392    for WtHuff<'a, BitVector, RankSupport1, SelectSupport1, SelectSupport0, TreeStrategy>
393where
394    BitVector: common::Code,
395    RankSupport1: common::Code,
396    SelectSupport1: common::Code,
397    SelectSupport0: common::Code,
398    TreeStrategy: layouts::common::TreeStrategy + common::Code,
399{
400    fn iter_get(&self, index: usize) -> TreeStrategy::Value {
401        (self.interface.get)(self.ptr, index)
402    }
403}
404
405impl<'a, BitVector, RankSupport1, SelectSupport1, SelectSupport0, TreeStrategy> Drop
406    for WtHuff<'a, BitVector, RankSupport1, SelectSupport1, SelectSupport0, TreeStrategy>
407where
408    BitVector: common::Code,
409    RankSupport1: common::Code,
410    SelectSupport1: common::Code,
411    SelectSupport0: common::Code,
412    TreeStrategy: layouts::common::TreeStrategy + common::Code,
413{
414    fn drop(&mut self) {
415        (self.interface.drop)(self.ptr)
416    }
417}
418
419impl<'a, BitVector, RankSupport1, SelectSupport1, SelectSupport0, TreeStrategy> Clone
420    for WtHuff<'a, BitVector, RankSupport1, SelectSupport1, SelectSupport0, TreeStrategy>
421where
422    BitVector: common::Code,
423    RankSupport1: common::Code,
424    SelectSupport1: common::Code,
425    SelectSupport0: common::Code,
426    TreeStrategy: layouts::common::TreeStrategy + common::Code,
427{
428    fn clone(&self) -> Self {
429        Self {
430            _bs: None,
431            _rs1: &None,
432            _ss1: &None,
433            _ss0: &None,
434            _ts: None,
435
436            ptr: (self.interface.clone)(self.ptr),
437            interface: self.interface.clone(),
438        }
439    }
440}
441
442#[repr(C)]
443struct SymbolGte<Value> {
444    pub found: bool,
445    pub symbol: Value,
446}
447
448#[repr(C)]
449struct SymbolLte<Value> {
450    pub found: bool,
451    pub symbol: Value,
452}
453
454#[repr(C)]
455pub struct LexCount {
456    pub rank: usize,
457    pub count_smaller_symbols: usize,
458    pub count_greater_symbols: usize,
459}
460
461#[repr(C)]
462pub struct LexSmallerCount {
463    pub rank: usize,
464    pub count_smaller_symbols: usize,
465}
466
467pub struct IntervalSymbols<'a, Value, Size> {
468    pub interval_alphabet_size: Size,
469    pub interval_symbols: &'a [Value],
470    pub rank_symbols_lower: &'a [u64],
471    pub rank_symbols_upper: &'a [u64],
472
473    internal_results: ResultIntervalSymbols<Value, Size>,
474    interface: Interface<Value, Size>,
475}
476
477impl<'a, Value, Size> Drop for IntervalSymbols<'a, Value, Size> {
478    fn drop(&mut self) {
479        (self.interface.free_result_interval_symbols)(
480            self.internal_results.cs,
481            self.internal_results.rank_c_i,
482            self.internal_results.rank_c_j,
483        )
484    }
485}
486
487#[repr(C)]
488struct ResultIntervalSymbols<Value, Size> {
489    interval_alphabet_size: Size,
490    length: Size,
491    cs: *const Value,
492    rank_c_i: *const u64,
493    rank_c_j: *const u64,
494}
495
496#[derive(Clone)]
497struct Interface<Value, Size> {
498    create: extern "C" fn() -> common::VoidPtr,
499    from_file: extern "C" fn(*const std::os::raw::c_char) -> common::VoidPtr,
500    from_string: extern "C" fn(*const std::os::raw::c_char) -> common::VoidPtr,
501    from_int_vector: extern "C" fn(common::VoidPtr) -> common::VoidPtr,
502    from_bit_vector: extern "C" fn(common::VoidPtr) -> common::VoidPtr,
503    drop: extern "C" fn(common::VoidPtr),
504    clone: extern "C" fn(common::VoidPtr) -> common::VoidPtr,
505
506    len: extern "C" fn(common::VoidPtr) -> usize,
507    is_empty: extern "C" fn(common::VoidPtr) -> bool,
508    get: extern "C" fn(common::VoidPtr, usize) -> Value,
509    rank: extern "C" fn(common::VoidPtr, Size, Value) -> Size,
510    inverse_select: extern "C" fn(common::VoidPtr, Size) -> common::Pair<Size, Value>,
511    select: extern "C" fn(common::VoidPtr, Size, Value) -> Size,
512    interval_symbols:
513        extern "C" fn(common::VoidPtr, Size, Size) -> ResultIntervalSymbols<Value, Size>,
514    free_result_interval_symbols: extern "C" fn(*const Value, *const u64, *const u64),
515    lex_count: extern "C" fn(common::VoidPtr, Size, Size, Value) -> LexCount,
516    lex_smaller_count: extern "C" fn(common::VoidPtr, Size, Value) -> LexSmallerCount,
517    symbol_gte: extern "C" fn(common::VoidPtr, Value) -> SymbolGte<Value>,
518    symbol_lte: extern "C" fn(common::VoidPtr, Value) -> SymbolLte<Value>,
519    alphabet_size: extern "C" fn(common::VoidPtr) -> Size,
520
521    pub io: common::io::Interface,
522    _lib: std::sync::Arc<sharedlib::Lib>,
523}
524
525impl<Value, Size> Interface<Value, Size> {
526    pub fn new(id: &str) -> Result<Self> {
527        let lib = sdsl_c::LIB.clone();
528        let builder = sdsl_c::FunctionBuilder::new(Some("wt_huff"), id, lib.clone());
529
530        Ok(Self {
531            create: builder.get("create")?,
532            from_file: builder.get("from_file")?,
533            from_string: builder.get("from_string")?,
534            from_int_vector: builder.get("from_int_vector")?,
535            from_bit_vector: builder.get("from_bit_vector")?,
536            drop: builder.get("destroy")?,
537            clone: builder.get("copy")?,
538
539            len: builder.get("size")?,
540            is_empty: builder.get("empty")?,
541            get: builder.get("get_element")?,
542            rank: builder.get("rank")?,
543            inverse_select: builder.get("inverse_select")?,
544            select: builder.get("select")?,
545            interval_symbols: builder.get("interval_symbols")?,
546            free_result_interval_symbols: builder.get("free_result_interval_symbols")?,
547            lex_count: builder.get("lex_count")?,
548            lex_smaller_count: builder.get("lex_smaller_count")?,
549            symbol_gte: builder.get("symbol_gte")?,
550            symbol_lte: builder.get("symbol_lte")?,
551            alphabet_size: builder.get("alphabet_size")?,
552
553            io: common::io::Interface::new(&id)?,
554            _lib: lib.clone(),
555        })
556    }
557}