concise/
lib.rs

1#![allow(overflowing_literals)]
2#![feature(wrapping_int_impl)]
3
4use std::cmp;
5use std::num::Wrapping;
6
7#[derive(Debug, Clone)]
8pub struct CONCISE {
9    words: Option<Vec<Wrapping<i32>>>,
10    last: i32,
11    size: i32,
12    last_word_index: i32,
13}
14
15impl CONCISE {
16    pub fn new() -> CONCISE {
17        return CONCISE{
18            words: None,
19            last: -1,
20            size: 0,
21            last_word_index: -1,
22        }
23    }
24
25    pub fn words_view(&self) -> &[Wrapping<i32>] {
26        &self.words.as_ref().unwrap()[0..=self.last_word_index as usize]
27    }
28
29    const MAX_LITERAL_LENGTH: i32 = 31;
30    const ALL_ZEROS_LITERAL: Wrapping<i32> = Wrapping(0x80000000);
31    const ALL_ONES_LITERAL: Wrapping<i32> = Wrapping(0xFFFFFFFF);
32    const SEQUENCE_BIT: Wrapping<i32> = Wrapping(0x40000000);
33
34    pub fn append(&mut self, i: i32) {
35        if self.words.is_none() {
36            let zero_blocks = i / 31;
37            if zero_blocks == 0 {
38                self.words = Some(vec![Wrapping(0); 1]);
39                self.last_word_index = 0;
40            } else if zero_blocks == 1 {
41                self.words = Some(vec![Wrapping(0); 2]);
42                self.last_word_index = 1;
43                self.words.as_mut().unwrap()[0] = CONCISE::ALL_ZEROS_LITERAL;
44            } else {
45                self.words = Some(vec![Wrapping(0); 2]);
46                self.last_word_index = 1;
47                self.words.as_mut().unwrap()[0] = Wrapping(zero_blocks - 1);
48            }
49            self.last = i;
50            self.size = 1;
51            self.words.as_mut().unwrap()[self.last_word_index as usize] = CONCISE::ALL_ZEROS_LITERAL | Wrapping(1 << (i % 31));
52            return;
53        }
54
55        let mut bit = self.last % 31 + i - self.last;
56
57        if bit >= CONCISE::MAX_LITERAL_LENGTH {
58            let zero_blocks = bit / 31 - 1;
59            bit %= 31;
60            if zero_blocks == 0 {
61                self.ensure_capacity((self.last_word_index + 1) as usize);
62            } else {
63                self.ensure_capacity((self.last_word_index + 2) as usize);
64                self.append_fill(Wrapping(zero_blocks), Wrapping(0));
65            }
66            self.append_literal(CONCISE::ALL_ZEROS_LITERAL | Wrapping(1 << bit));
67        } else {
68            self.words.as_mut().unwrap()[self.last_word_index as usize] |= Wrapping(1 << bit);
69            if self.words.as_mut().unwrap()[self.last_word_index as usize] == CONCISE::ALL_ONES_LITERAL {
70                self.last_word_index -= 1;
71                self.append_literal(CONCISE::ALL_ONES_LITERAL);
72            }
73        }
74
75        self.last = i;
76        if self.size >= 0 {
77            self.size += 1;
78        }
79    }
80
81    fn ensure_capacity(&mut self, index: usize) {
82        let mut capacity = if self.words.is_none() { 0 } else { self.words.as_mut().unwrap().len() };
83        if capacity > index {
84            return;
85        }
86        capacity = cmp::max(capacity << 1, index + 1);
87
88        // XXX: This is probably inefficient
89        if self.words.is_none() {
90            self.words = Some(vec![Wrapping(0); capacity]);
91            return;
92        }
93        let mut new_words = vec![Wrapping(0i32); capacity];
94        for (i, word) in self.words.as_mut().unwrap().iter().enumerate() {
95            new_words[i] = *word;
96        }
97        self.words = Some(new_words);
98    }
99
100    fn append_fill(&mut self, length: Wrapping<i32>, mut fill_type: Wrapping<i32>) {
101        // XXX: Are these really necessary?
102        assert!(length > Wrapping(0));
103        assert!(self.last_word_index >= -1);
104
105        fill_type &= CONCISE::SEQUENCE_BIT;
106
107        if length == Wrapping(1) {
108            self.append_literal(if fill_type == Wrapping(0) { CONCISE::ALL_ZEROS_LITERAL } else { CONCISE::ALL_ONES_LITERAL });
109            return;
110        }
111
112        if self.last_word_index < 0 {
113            self.words.as_mut().unwrap()[self.last_word_index as usize] = fill_type | (length - Wrapping(1));
114            return;
115        }
116
117        let last_word = self.words.as_mut().unwrap()[self.last_word_index as usize];
118        if self.is_literal(last_word) {
119            if fill_type == Wrapping(0) && last_word == CONCISE::ALL_ZEROS_LITERAL {
120                self.words.as_mut().unwrap()[self.last_word_index as usize] = length;
121            } else if fill_type == CONCISE::SEQUENCE_BIT && last_word == CONCISE::ALL_ONES_LITERAL {
122                self.words.as_mut().unwrap()[self.last_word_index as usize] = CONCISE::SEQUENCE_BIT | length;
123            } else {
124                if fill_type == Wrapping(0) && self.contains_only_one_bit(self.get_literal_bits(last_word)) {
125                    self.words.as_mut().unwrap()[self.last_word_index as usize] = length | Wrapping((1 + last_word.trailing_zeros() as i32) << 25);
126                } else if fill_type == CONCISE::SEQUENCE_BIT && self.contains_only_one_bit(!last_word) {
127                    self.words.as_mut().unwrap()[self.last_word_index as usize] = CONCISE::SEQUENCE_BIT | length | Wrapping((1 + (!last_word).trailing_zeros() as i32) << 25);
128                } else {
129                    self.last_word_index += 1;
130                    self.words.as_mut().unwrap()[self.last_word_index as usize] = fill_type | (length - Wrapping(1));
131                }
132            }
133        } else {
134            if last_word & Wrapping(0xC0000000) == fill_type {
135                self.words.as_mut().unwrap()[self.last_word_index as usize] += length;
136            } else {
137                self.last_word_index += 1;
138                self.words.as_mut().unwrap()[self.last_word_index as usize] = fill_type | (length - Wrapping(1));
139            }
140        }
141    }
142
143    fn append_literal(&mut self, word: Wrapping<i32>) {
144        if self.last_word_index == 0 && word == CONCISE::ALL_ZEROS_LITERAL && self.words.as_mut().unwrap()[0] == Wrapping(0x01FFFFFF) {
145            return;
146        }
147
148        if self.last_word_index < 0 {
149            self.last_word_index = 0;
150            self.words.as_mut().unwrap()[self.last_word_index as usize] = word;
151            return;
152        }
153
154        let last_word = self.words.as_mut().unwrap()[self.last_word_index as usize];
155        if word == CONCISE::ALL_ZEROS_LITERAL {
156            if last_word == CONCISE::ALL_ZEROS_LITERAL {
157                self.words.as_mut().unwrap()[self.last_word_index as usize] = Wrapping(1);
158            } else if self.is_zero_sequence(last_word) {
159                self.words.as_mut().unwrap()[self.last_word_index as usize] += Wrapping(1);
160            } else if self.contains_only_one_bit(self.get_literal_bits(last_word)) {
161                self.words.as_mut().unwrap()[self.last_word_index as usize] = Wrapping(1 | ((1 + last_word.trailing_zeros() as i32) << 25));
162            } else {
163                self.last_word_index += 1;
164                self.words.as_mut().unwrap()[self.last_word_index as usize] = word;
165            }
166        } else if word == CONCISE::ALL_ONES_LITERAL {
167            if last_word == CONCISE::ALL_ONES_LITERAL {
168                self.words.as_mut().unwrap()[self.last_word_index as usize] = CONCISE::SEQUENCE_BIT | Wrapping(1);
169            } else if self.is_one_sequence(last_word) {
170                self.words.as_mut().unwrap()[self.last_word_index as usize] += Wrapping(1);
171            } else if self.contains_only_one_bit(!last_word) {
172                self.words.as_mut().unwrap()[self.last_word_index as usize] = CONCISE::SEQUENCE_BIT | Wrapping(1) | Wrapping((1 + (!last_word).trailing_zeros() as i32) << 25);
173            } else {
174                self.last_word_index += 1;
175                self.words.as_mut().unwrap()[self.last_word_index as usize] = word;
176            }
177        } else {
178            self.last_word_index += 1;
179            self.words.as_mut().unwrap()[self.last_word_index as usize] = word;
180        }
181    }
182
183    fn is_zero_sequence(&self, word: Wrapping<i32>) -> bool {
184        return (word & Wrapping(0xC0000000)) == Wrapping(0);
185    }
186
187    fn is_one_sequence(&self, word: Wrapping<i32>) -> bool {
188        return (word & Wrapping(0xC0000000)) == CONCISE::SEQUENCE_BIT;
189    }
190
191    fn is_literal(&self, word: Wrapping<i32>) -> bool {
192        return (word & Wrapping(0x80000000)) != Wrapping(0);
193    }
194
195    fn contains_only_one_bit(&self, literal: Wrapping<i32>) -> bool {
196        return (literal & (literal - Wrapping(1))) == Wrapping(0);
197    }
198
199    fn get_literal_bits(&self, word: Wrapping<i32>) -> Wrapping<i32> {
200        return Wrapping(0x7FFFFFFF) & word;
201    }
202}
203
204#[cfg(test)]
205mod tests {
206    use super::*;
207
208    #[test]
209    fn word_iterator_next1() {
210        let mut concise = CONCISE::new();
211        for i in 1..=5 {
212            concise.append(i);
213        }
214
215        let words = concise.words.unwrap();
216        assert_eq!(words.len(), 1);
217        assert_eq!(words[0], Wrapping(0x8000003E));
218    }
219
220    #[test]
221    fn word_iterator_next2() {
222        let mut concise = CONCISE::new();
223        for i in 0..100000 {
224            concise.append(i);
225        }
226
227        let words = concise.words.unwrap();
228        assert_eq!(words.len(), 2);
229        assert_eq!(words[0], Wrapping(0x40000C98));
230        assert_eq!(words[1], Wrapping(0x81FFFFFF));
231    }
232}