logo
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
// Copyright 2014-2016 Johannes Köster.
// Licensed under the MIT license (http://opensource.org/licenses/MIT)
// This file may not be copied, modified, or distributed
// except according to those terms.

//! A data structure for a sequence of small integers with a few big integers.
//! Small ints are stored in type S (e.g. a byte), big ints are stored separately (in type B) in a BTree.
//! The implementation provides vector-like operations on the data structure (e.g. retrieve a position,
//! add an integer, etc.).
//!
//! # Example
//!
//! ```
//! use bio::data_structures::smallints::SmallInts;
//! let mut smallints: SmallInts<u8, usize> = SmallInts::new();
//! smallints.push(3);
//! smallints.push(4);
//! smallints.push(255);
//! smallints.push(305093);
//! assert_eq!(smallints.get(0).unwrap(), 3);
//! smallints.set(0, 50000);
//! let values: Vec<usize> = smallints.iter().collect();
//! assert_eq!(values, [50000, 4, 255, 305093]);
//! ```

use std::collections::BTreeMap;
use std::iter::{repeat, Enumerate};
use std::mem::size_of;
use std::slice;

use num_integer::Integer;
use num_traits::{cast, Bounded, Num, NumCast};

/// Data structure for storing a sequence of small integers with few big ones space efficiently
/// while supporting classical vector operations.
#[derive(Serialize, Deserialize)]
pub struct SmallInts<F: Integer + Bounded + NumCast + Copy, B: Integer + NumCast + Copy> {
    smallints: Vec<F>,
    bigints: BTreeMap<usize, B>,
}

impl<S: Integer + Bounded + NumCast + Copy, B: Integer + NumCast + Copy> Default
    for SmallInts<S, B>
{
    fn default() -> Self {
        assert!(
            size_of::<S>() < size_of::<B>(),
            "S has to be smaller than B"
        );
        SmallInts {
            smallints: Vec::new(),
            bigints: BTreeMap::new(),
        }
    }
}

impl<S: Integer + Bounded + NumCast + Copy, B: Integer + NumCast + Copy> SmallInts<S, B> {
    /// Create a new instance.
    pub fn new() -> Self {
        Default::default()
    }

    /// Create a new instance with a given capacity.
    pub fn with_capacity(n: usize) -> Self {
        assert!(
            size_of::<S>() < size_of::<B>(),
            "S has to be smaller than B"
        );
        SmallInts {
            smallints: Vec::with_capacity(n),
            bigints: BTreeMap::new(),
        }
    }

    /// Create a new instance containing `n` times the integer `v` (and `v` is expected to be small).
    pub fn from_elem(v: S, n: usize) -> Self {
        assert!(
            size_of::<S>() < size_of::<B>(),
            "S has to be smaller than B"
        );
        if v > cast(0).unwrap() {
            assert!(v < S::max_value(), "v has to be smaller than maximum value");
        }

        SmallInts {
            smallints: repeat(v).take(n).collect(),
            bigints: BTreeMap::new(),
        }
    }

    /// Return the integer at position `i`.
    pub fn get(&self, i: usize) -> Option<B> {
        if i < self.smallints.len() {
            self.real_value(i, self.smallints[i])
        } else {
            None
        }
    }

    /// Append `v` to the sequence. This will determine whether `v` is big or small and store it accordingly.
    pub fn push(&mut self, v: B) {
        let maxv: S = S::max_value();
        match cast(v) {
            Some(v) if v < maxv => self.smallints.push(v),
            _ => {
                let i = self.smallints.len();
                self.smallints.push(maxv);
                self.bigints.insert(i, v);
            }
        }
    }

    /// Set value of position `i` to `v`. This will determine whether `v` is big or small and store it accordingly.
    pub fn set(&mut self, i: usize, v: B) {
        let maxv: S = S::max_value();
        match cast(v) {
            Some(v) if v < maxv => self.smallints[i] = v,
            _ => {
                self.smallints[i] = maxv;
                self.bigints.insert(i, v);
            }
        }
    }

    /// Iterate over sequence. Values will be returned in the big integer type (`B`).
    pub fn iter(&self) -> Iter<'_, S, B> {
        Iter {
            smallints: self,
            items: self.smallints.iter().enumerate(),
        }
    }

    /// Decompress into a normal vector of big integers (type `B`).
    pub fn decompress(&self) -> Vec<B> {
        self.iter().collect()
    }

    /// Length of the sequence.
    pub fn len(&self) -> usize {
        self.smallints.len()
    }

    /// is the sequence empty?
    pub fn is_empty(&self) -> bool {
        self.smallints.is_empty()
    }

    fn real_value(&self, i: usize, v: S) -> Option<B> {
        if v < S::max_value() {
            cast(v)
        } else {
            self.bigints.get(&i).cloned()
        }
    }
}

/// Iterator over the elements of a `SmallInts` sequence.
pub struct Iter<'a, S, B>
where
    S: Integer + Bounded + NumCast + Copy,
    B: Integer + NumCast + Copy,
    <S as Num>::FromStrRadixErr: 'a,
    <B as Num>::FromStrRadixErr: 'a,
{
    smallints: &'a SmallInts<S, B>,
    items: Enumerate<slice::Iter<'a, S>>,
}

impl<'a, S, B> Iterator for Iter<'a, S, B>
where
    S: 'a + Integer + Bounded + NumCast + Copy,
    B: 'a + Integer + NumCast + Copy,
    <S as Num>::FromStrRadixErr: 'a,
    <B as Num>::FromStrRadixErr: 'a,
{
    type Item = B;

    fn next(&mut self) -> Option<B> {
        match self.items.next() {
            Some((i, &v)) => self.smallints.real_value(i, v),
            None => None,
        }
    }
}

#[cfg(tests)]
mod tests {
    #[test]
    fn test_serde() {
        use serde::{Deserialize, Serialize};
        fn impls_serde_traits<S: Serialize + Deserialize>() {}

        impls_serde_traits::<SmallInts<i8, isize>>();
    }
}