fid_rs/fid/
fid_impl.rs

1use super::{Blocks, Chunks, Fid};
2use crate::internal_data_structure::popcount_table::PopcountTable;
3use crate::internal_data_structure::raw_bit_vector::RawBitVector;
4use std::ops::Index;
5
6impl From<&str> for Fid {
7    /// Constructor from string representation of bit sequence.
8    ///
9    /// - '0' is interpreted as _0_.
10    /// - '1' is interpreted as _1_.
11    /// - '_' is just ignored.
12    ///
13    /// # Examples
14    /// ```
15    /// use fid_rs::Fid;
16    ///
17    /// let fid = Fid::from("01_11");
18    /// assert_eq!(fid[0], false);
19    /// assert_eq!(fid[1], true);
20    /// assert_eq!(fid[2], true);
21    /// assert_eq!(fid[3], true);
22    /// ```
23    ///
24    /// # Panics
25    /// When:
26    /// - `s` contains any character other than '0', '1', and '_'.
27    /// - `s` does not contain any '0' or '1'
28    fn from(s: &str) -> Self {
29        let bits: Vec<bool> = s
30            .as_bytes()
31            .iter()
32            .filter_map(|c| match c {
33                48 /* '0' */ => Some(false),
34                49 /* '1' */ => Some(true),
35                95 /* '_' */ => None,
36                _ => panic!("`s` must consist of '0' or '1'. '{}' included.", c),
37            })
38            .collect();
39        Self::from(&bits[..])
40    }
41}
42
43impl From<&[bool]> for Fid {
44    /// Constructor from slice of boolean.
45    ///
46    /// # Examples
47    /// ```
48    /// use fid_rs::Fid;
49    ///
50    /// let bits = [false, true, true, true];
51    /// let fid = Fid::from(&bits[..]);
52    /// assert_eq!(fid[0], false);
53    /// assert_eq!(fid[1], true);
54    /// assert_eq!(fid[2], true);
55    /// assert_eq!(fid[3], true);
56    /// ```
57    ///
58    /// # Panics
59    /// When:
60    /// - `bits` is empty.
61    fn from(bits: &[bool]) -> Self {
62        assert!(!bits.is_empty());
63
64        let mut byte_vec: Vec<u8> = Vec::with_capacity(bits.len() / 8 + 1);
65        let mut last_byte_len = 0u8;
66
67        for bits8 in bits.chunks(8) {
68            last_byte_len = bits8.len() as u8; // although this bits8 might not be a last byte.
69
70            let byte = (0..last_byte_len).fold(0, |byte, i| {
71                byte + if bits8[i as usize] { 1 << (7 - i) } else { 0 }
72            });
73            byte_vec.push(byte);
74        }
75
76        Fid::build(byte_vec, last_byte_len)
77    }
78}
79
80static TRUE: bool = true;
81static FALSE: bool = false;
82
83impl Index<u64> for Fid {
84    type Output = bool;
85
86    /// Returns `i`-th element of the `Fid`.
87    ///
88    /// # Panics
89    /// When _`i` >= length of the `Fid`_.
90    fn index(&self, index: u64) -> &Self::Output {
91        if self.rbv().access(index) {
92            &TRUE
93        } else {
94            &FALSE
95        }
96    }
97}
98
99impl Fid {
100    /// Build FID from byte vector.
101    fn build(byte_vec: Vec<u8>, last_byte_len: u8) -> Self {
102        let bit_len = (byte_vec.len() - 1) as u64 * 8 + last_byte_len as u64;
103        let rbv = RawBitVector::new(&byte_vec[..], 0, last_byte_len);
104        let chunks = Chunks::new(&rbv);
105        let table = PopcountTable::new(Blocks::calc_block_size(rbv.len()));
106        Self {
107            byte_vec,
108            bit_len,
109            chunks,
110            table,
111        }
112    }
113
114    /// Returns the number of _1_ in _[0, `i`]_ elements of the `Fid`.
115    ///
116    /// # Panics
117    /// When _`i` >= length of the `Fid`_.
118    ///
119    /// # Implementation detail
120    ///
121    /// ```text
122    ///  00001000 01000001 00000100 11000000 00100000 00000101 00100000 00010000 001  Raw data (N=67)
123    ///                                                           ^
124    ///                                                           i = 51
125    /// |                  7                    |                13                |  Chunk (size = (log N)^2 = 36)
126    ///                                         ^
127    ///                chunk_left            i_chunk = 1      chunk_right
128    ///
129    /// |0 |1 |1  |2 |2 |3  |3 |4 |6  |6 |6  |7 |0 |0  |0 |2 |3 |3 |4  |4 |4 |5  |5|  Block (size = log N / 2 = 3)
130    ///                                                         ^
131    ///                                                      i_block = 17
132    ///                                              block_left | block_right
133    /// ```
134    ///
135    /// 1. Find `i_chunk`. _`i_chunk` = `i` / `chunk_size`_.
136    /// 2. Get _`chunk_left` = Chunks[`i_chunk` - 1]_ only if _`i_chunk` > 0_.
137    /// 3. Get _rank from chunk_left_ if `chunk_left` exists.
138    /// 4. Get _`chunk_right` = Chunks[`i_chunk`]_.
139    /// 5. Find `i_block`. _`i_block` = (`i` - `i_chunk` * `chunk_size`) / block size_.
140    /// 6. Get _`block_left` = `chunk_right.blocks`[ `i_block` - 1]`_ only if _`i_block` > 0_.
141    /// 7. Get _rank from block_left_ if `block_left` exists.
142    /// 8. Get inner-block data _`block_bits`. `block_bits` must be of _block size_ length, fulfilled with _0_ in right bits.
143    /// 9. Calculate _rank of `block_bits`_ in _O(1)_ using a table memonizing _block size_ bit's popcount.
144    pub fn rank(&self, i: u64) -> u64 {
145        let n = self.len();
146        assert!(i < n);
147        let chunk_size = Chunks::calc_chunk_size(n);
148        let block_size = Blocks::calc_block_size(n);
149
150        // 1.
151        let i_chunk = i / chunk_size as u64;
152
153        // 3.
154        let rank_from_chunk = if i_chunk == 0 {
155            0
156        } else {
157            // 2., 3.
158            let chunk_left = self.chunks.access(i_chunk - 1);
159            chunk_left.value()
160        };
161
162        // 4.
163        let chunk_right = self.chunks.access(i_chunk);
164
165        // 5.
166        let i_block = (i - i_chunk * chunk_size as u64) / block_size as u64;
167
168        // 7.
169        let rank_from_block = if i_block == 0 {
170            0
171        } else {
172            // 6., 7.
173            let block_left = chunk_right.blocks.access(i_block - 1);
174            block_left.value()
175        };
176
177        // 8.
178        let block_right = chunk_right.blocks.access(i_block);
179        let pos_block_start = i_chunk * chunk_size as u64 + i_block * block_size as u64;
180        assert!(i - pos_block_start < block_right.length() as u64);
181        let block_right_rbv = self
182            .rbv()
183            .clone_sub(pos_block_start, block_right.length() as u64);
184        let block_right_as_u32 = block_right_rbv.as_u32();
185        let bits_to_use = i - pos_block_start + 1;
186        let block_bits = block_right_as_u32 >> (32 - bits_to_use);
187        let rank_from_table = self.table.popcount(block_bits as u64);
188
189        // 9.
190        rank_from_chunk + rank_from_block as u64 + rank_from_table as u64
191    }
192
193    /// Returns the number of _0_ in _[0, `i`]_ elements of the `Fid`.
194    ///
195    /// # Panics
196    /// When _`i` >= length of the `Fid`_.
197    pub fn rank0(&self, i: u64) -> u64 {
198        (i + 1) - self.rank(i)
199    }
200
201    /// Returns the minimum position (0-origin) `i` where _`rank(i)` == num_ of `num`-th _1_ if exists. Else returns None.
202    ///
203    /// # Panics
204    /// When _`num` > length of the `Fid`_.
205    ///
206    /// # Implementation detail
207    /// Binary search using `rank()`.
208    pub fn select(&self, num: u64) -> Option<u64> {
209        let n = self.len();
210        assert!(num <= n);
211
212        if num == 0 || num == 1 && self[0] {
213            return Some(0);
214        }
215        if self.rank(n - 1) < num {
216            return None;
217        };
218
219        let mut ng = 0;
220        let mut ok = n - 1;
221        while ok - ng > 1 {
222            let mid = (ok + ng) / 2;
223            if self.rank(mid) >= num {
224                ok = mid;
225            } else {
226                ng = mid;
227            }
228        }
229        Some(ok)
230    }
231
232    /// Returns the minimum position (0-origin) `i` where _`rank(i)` == num_ of `num`-th _0_ if exists. Else returns None.
233    ///
234    /// # Panics
235    /// When _`num` > length of the `Fid`_.
236    pub fn select0(&self, num: u64) -> Option<u64> {
237        let n = self.bit_len;
238        assert!(num <= n);
239
240        if num == 0 || num == 1 && !self[0] {
241            return Some(0);
242        }
243        if self.rank0(n - 1) < num {
244            return None;
245        };
246
247        let mut ng = 0;
248        let mut ok = n - 1;
249        while ok - ng > 1 {
250            let mid = (ok + ng) / 2;
251            if self.rank0(mid) >= num {
252                ok = mid;
253            } else {
254                ng = mid;
255            }
256        }
257        Some(ok)
258    }
259
260    /// Returns bit length of this FID.
261    pub fn len(&self) -> u64 {
262        self.bit_len
263    }
264
265    /// Returns whether the FID is empty.
266    pub fn is_empty(&self) -> bool {
267        self.bit_len == 0
268    }
269
270    fn rbv(&self) -> RawBitVector {
271        let last_byte_len_or_0 = (self.bit_len % 8) as u8;
272        RawBitVector::new(
273            &self.byte_vec[..],
274            0,
275            if last_byte_len_or_0 == 0 {
276                8
277            } else {
278                last_byte_len_or_0
279            },
280        )
281    }
282}
283
284#[cfg(test)]
285mod from_str_success_tests {
286    use crate::Fid;
287
288    macro_rules! parameterized_tests {
289        ($($name:ident: $value:expr,)*) => {
290        $(
291            #[test]
292            fn $name() {
293                let (s, expected_bits) = $value;
294                let fid = Fid::from(s);
295
296                // TODO length check
297                // assert_eq!(fid.length(), expected_bits);
298                for (i, bit) in expected_bits.iter().enumerate() {
299                    assert_eq!(fid[i as u64], *bit);
300                }
301            }
302        )*
303        }
304    }
305
306    parameterized_tests! {
307        t1: ("0", vec![false]),
308        t2: ("1", vec![true]),
309        t3: ("00", vec![false, false]),
310        t4: ("01", vec![false, true]),
311        t5: ("10", vec![true, false]),
312        t6: ("11", vec![true, true]),
313        t7: ("0101_0101__0101_1100__1000_001", vec![
314            false, true, false, true,
315            false, true, false, true,
316            false, true, false, true,
317            true, true, false, false,
318            true, false, false, false,
319            false, false, true,
320        ]),
321    }
322}
323
324#[cfg(test)]
325mod from_str_failure_tests {
326    // well-tested in BitString::new()
327}
328
329#[cfg(test)]
330mod from_slice_success_tests {
331    use crate::Fid;
332
333    macro_rules! parameterized_tests {
334        ($($name:ident: $value:expr,)*) => {
335        $(
336            #[test]
337            fn $name() {
338                let arr = $value;
339                let fid = Fid::from(&arr[..]);
340
341                // TODO length check
342                // assert_eq!(fid.length(), expected_bits);
343                for (i, bit) in arr.iter().enumerate() {
344                    assert_eq!(fid[i as u64], *bit);
345                }
346            }
347        )*
348        }
349    }
350
351    parameterized_tests! {
352        t1: [false],
353        t2: [true],
354        t3: [false, false],
355        t4: [false, true],
356        t5: [true, false],
357        t6: [true, true],
358        t7: [false; 100],
359        t8: [true; 100],
360    }
361}
362
363#[cfg(test)]
364mod from_slice_failure_tests {
365    use crate::Fid;
366
367    #[test]
368    #[should_panic]
369    fn empty() {
370        let _ = Fid::from(&[][..]);
371    }
372}
373
374#[cfg(test)]
375mod index_u64_success_tests {
376    // well-tested in fid_builder::{builder_from_length_success_tests, builder_from_bit_string_success_tests}
377}
378
379#[cfg(test)]
380mod index_u64_failure_tests {
381    use crate::Fid;
382
383    #[test]
384    #[should_panic]
385    fn over_upper_bound() {
386        let fid = Fid::from("00");
387        let _ = fid[2];
388    }
389}
390
391#[cfg(test)]
392#[allow(non_snake_case)]
393mod rank_success_tests {
394    use crate::Fid;
395
396    macro_rules! parameterized_tests {
397        ($($name:ident: $value:expr,)*) => {
398        $(
399            #[test]
400            fn $name() {
401                let (in_fid_str, in_i, expected_rank) = $value;
402                assert_eq!(
403                    Fid::from(in_fid_str).rank(in_i),
404                    expected_rank
405                );
406            }
407        )*
408        }
409    }
410
411    parameterized_tests! {
412        rank1_1: ("0", 0, 0),
413
414        rank2_1: ("00", 0, 0),
415        rank2_2: ("00", 1, 0),
416
417        rank3_1: ("01", 0, 0),
418        rank3_2: ("01", 1, 1),
419
420        rank4_1: ("10", 0, 1),
421        rank4_2: ("10", 1, 1),
422
423        rank5_1: ("11", 0, 1),
424        rank5_2: ("11", 1, 2),
425
426        rank6_1: ("10010", 0, 1),
427        rank6_2: ("10010", 1, 1),
428        rank6_3: ("10010", 2, 1),
429        rank6_4: ("10010", 3, 2),
430        rank6_5: ("10010", 4, 2),
431
432        bugfix_11110110_11010101_01000101_11101111_10101011_10100101_01100011_00110100_01010101_10010000_01001100_10111111_00110011_00111110_01110101_11011100: (
433            "11110110_11010101_01000101_11101111_10101011_10100101_01100011_00110100_01010101_10010000_01001100_10111111_00110011_00111110_01110101_11011100",
434            49, 31,
435        ),
436        bugfix_10100001_01010011_10101100_11100001_10110010_10000110_00010100_01001111_01011100_11010011_11110000_00011010_01101111_10101010_11000111_0110011: (
437            "10100001_01010011_10101100_11100001_10110010_10000110_00010100_01001111_01011100_11010011_11110000_00011010_01101111_10101010_11000111_0110011",
438            111, 55,
439        ),
440        bugfix_100_111_101_011_011_100_101_001_111_001_001_101_100_011_000_111_1___01_000_101_100_101_101_001_011_110_010_001_101_010_010_010_111_111_111_001_111_001_100_010_001_010_101_11: (
441            "100_111_101_011_011_100_101_001_111_001_001_101_100_011_000_111_1___01_000_101_100_101_101_001_011_110_010_001_101_010_010_010_111_111_111_001_111_001_100_010_001_010_101_11",
442            48, 28,
443        ),
444        bugfix_11100100_10110100_10000000_10111111_01110101_01100110_00101111_11101001_01100100_00001000_11010100_10100000_00010001_10100101_01100100_0010010: (
445            "11100100_10110100_10000000_10111111_01110101_01100110_00101111_11101001_01100100_00001000_11010100_10100000_00010001_10100101_01100100_0010010",
446            126, 56,
447        ),
448    }
449    // Tested more in tests/ (integration test)
450}
451
452#[cfg(test)]
453mod rank_failure_tests {
454    use crate::Fid;
455
456    #[test]
457    #[should_panic]
458    fn rank_over_upper_bound() {
459        let fid = Fid::from("00");
460        let _ = fid.rank(2);
461    }
462}
463
464#[cfg(test)]
465#[allow(non_snake_case)]
466mod rank0_success_tests {
467    use crate::Fid;
468
469    macro_rules! parameterized_tests {
470        ($($name:ident: $value:expr,)*) => {
471        $(
472            #[test]
473            fn $name() {
474                let (in_fid_str, in_i, expected_rank0) = $value;
475                assert_eq!(
476                    Fid::from(in_fid_str).rank0(in_i),
477                    expected_rank0
478                );
479            }
480        )*
481        }
482    }
483
484    parameterized_tests! {
485        rank0_1_1: ("0", 0, 1),
486
487        rank0_2_1: ("00", 0, 1),
488        rank0_2_2: ("00", 1, 2),
489
490        rank0_3_1: ("01", 0, 1),
491        rank0_3_2: ("01", 1, 1),
492
493        rank0_4_1: ("10", 0, 0),
494        rank0_4_2: ("10", 1, 1),
495
496        rank0_5_1: ("11", 0, 0),
497        rank0_5_2: ("11", 1, 0),
498
499        rank0_6_1: ("10010", 0, 0),
500        rank0_6_2: ("10010", 1, 1),
501        rank0_6_3: ("10010", 2, 2),
502        rank0_6_4: ("10010", 3, 2),
503        rank0_6_5: ("10010", 4, 3),
504    }
505    // Tested more in tests/ (integration test)
506}
507
508#[cfg(test)]
509mod rank0_0_failure_tests {
510    use crate::Fid;
511
512    #[test]
513    #[should_panic]
514    fn rank0_over_upper_bound() {
515        let fid = Fid::from("00");
516        let _ = fid.rank0(2);
517    }
518}
519
520#[cfg(test)]
521mod select_success_tests {
522    // Tested well in tests/ (integration test)
523}
524
525#[cfg(test)]
526mod select_failure_tests {
527    use crate::Fid;
528
529    #[test]
530    #[should_panic]
531    fn select_over_max_rank() {
532        let fid = Fid::from("00");
533        let _ = fid.select(3);
534    }
535}
536
537#[cfg(test)]
538mod select0_success_tests {
539    // Tested well in tests/ (integration test)
540}
541
542#[cfg(test)]
543mod select0_failure_tests {
544    use crate::Fid;
545
546    #[test]
547    #[should_panic]
548    fn select_over_max_rank() {
549        let fid = Fid::from("00");
550        let _ = fid.select0(3);
551    }
552}