Skip to main content

dsi_bitstream/impls/
bit_reader.rs

1/*
2 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
3 * SPDX-FileCopyrightText: 2023 Inria
4 * SPDX-FileCopyrightText: 2023 Sebastiano Vigna
5 *
6 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
7 */
8
9use core::convert::Infallible;
10#[cfg(feature = "mem_dbg")]
11use mem_dbg::{MemDbg, MemSize};
12
13use crate::codes::params::{DefaultReadParams, ReadParams};
14use crate::traits::*;
15
16/// An implementation of [`BitRead`] for a [`WordRead`] with word `u64` and of
17/// [`BitSeek`] for a [`WordSeek`].
18///
19/// This implementation randomly accesses the underlying [`WordRead`] without
20/// any buffering. It is usually slower than
21/// [`BufBitReader`](crate::impls::BufBitReader).
22///
23/// The peek word is `u32`. The value returned by
24/// [`peek_bits`](crate::traits::BitRead::peek_bits) contains at least 32 bits
25/// (extended with zeros beyond end of stream), that is, a full peek word.
26///
27/// The additional type parameter `RP` is used to select the parameters for the
28/// instantaneous codes, but the casual user should be happy with the default
29/// value. See [`ReadParams`] for more details.
30///
31/// For additional flexibility, when the `std` feature is enabled, this
32/// structure implements [`std::io::Read`]. Note that because of coherence
33/// rules it is not possible to implement [`std::io::Read`] for a generic
34/// [`BitRead`].
35
36#[derive(Debug, Clone)]
37#[cfg_attr(feature = "mem_dbg", derive(MemDbg, MemSize))]
38pub struct BitReader<E: Endianness, WR, RP: ReadParams = DefaultReadParams> {
39    /// The backend from which we will read words.
40    backend: WR,
41    /// The index of the current bit.
42    bit_index: u64,
43    _marker: core::marker::PhantomData<(E, RP)>,
44}
45
46impl<E: Endianness, WR, RP: ReadParams> BitReader<E, WR, RP> {
47    /// Creates a new [`BitReader`] with the given word reader.
48    #[must_use]
49    pub const fn new(backend: WR) -> Self {
50        Self {
51            backend,
52            bit_index: 0,
53            _marker: core::marker::PhantomData,
54        }
55    }
56}
57
58impl<WR: WordRead<Word = u64> + WordSeek<Error = <WR as WordRead>::Error>, RP: ReadParams>
59    BitRead<BE> for BitReader<BE, WR, RP>
60{
61    type Error = <WR as WordRead>::Error;
62    type PeekWord = u32;
63    const PEEK_BITS: usize = 32;
64
65    #[inline]
66    fn skip_bits(&mut self, n_bits: usize) -> Result<(), Self::Error> {
67        self.bit_index += n_bits as u64;
68        Ok(())
69    }
70
71    #[inline]
72    fn read_bits(&mut self, num_bits: usize) -> Result<u64, Self::Error> {
73        #[cfg(feature = "checks")]
74        assert!(num_bits <= 64);
75
76        if num_bits == 0 {
77            return Ok(0);
78        }
79
80        self.backend.set_word_pos(self.bit_index / 64)?;
81        let in_word_offset = (self.bit_index % 64) as usize;
82
83        let res = if (in_word_offset + num_bits) <= 64 {
84            // single word access
85            let word = self.backend.read_word()?.to_be();
86            (word << in_word_offset) >> (64 - num_bits)
87        } else {
88            // double word access
89            let high_word = self.backend.read_word()?.to_be();
90            let low_word = self.backend.read_word()?.to_be();
91            let shamt1 = 64 - num_bits;
92            let shamt2 = 128 - in_word_offset - num_bits;
93            ((high_word << in_word_offset) >> shamt1) | (low_word >> shamt2)
94        };
95        self.bit_index += num_bits as u64;
96        Ok(res)
97    }
98
99    #[inline]
100    fn peek_bits(&mut self, n_bits: usize) -> Result<u32, Self::Error> {
101        if n_bits == 0 {
102            return Ok(0);
103        }
104
105        #[cfg(feature = "checks")]
106        assert!(n_bits <= 32);
107
108        self.backend.set_word_pos(self.bit_index / 64)?;
109        let in_word_offset = (self.bit_index % 64) as usize;
110
111        let res = if (in_word_offset + n_bits) <= 64 {
112            // single word access
113            let word = self.backend.read_word()?.to_be();
114            (word << in_word_offset) >> (64 - n_bits)
115        } else {
116            // double word access
117            let high_word = self.backend.read_word()?.to_be();
118            let low_word = self.backend.read_word()?.to_be();
119            let shamt1 = 64 - n_bits;
120            let shamt2 = 128 - in_word_offset - n_bits;
121            ((high_word << in_word_offset) >> shamt1) | (low_word >> shamt2)
122        };
123        Ok(res as u32)
124    }
125
126    #[inline]
127    fn read_unary(&mut self) -> Result<u64, Self::Error> {
128        self.backend.set_word_pos(self.bit_index / 64)?;
129        let in_word_offset = self.bit_index % 64;
130        let mut bits_in_word = 64 - in_word_offset;
131        let mut total = 0;
132
133        let mut word = self.backend.read_word()?.to_be();
134        word <<= in_word_offset;
135        loop {
136            let zeros = word.leading_zeros() as u64;
137            // the unary code fits in the word
138            if zeros < bits_in_word {
139                self.bit_index += total + zeros + 1;
140                return Ok(total + zeros);
141            }
142            total += bits_in_word;
143            bits_in_word = 64;
144            word = self.backend.read_word()?.to_be();
145        }
146    }
147
148    #[inline(always)]
149    fn skip_bits_after_peek(&mut self, n: usize) {
150        self.bit_index += n as u64;
151    }
152}
153
154impl<E: Endianness, WR: WordSeek, RP: ReadParams> BitSeek for BitReader<E, WR, RP> {
155    type Error = Infallible;
156
157    fn bit_pos(&mut self) -> Result<u64, Self::Error> {
158        Ok(self.bit_index)
159    }
160
161    fn set_bit_pos(&mut self, bit_index: u64) -> Result<(), Self::Error> {
162        self.bit_index = bit_index;
163        Ok(())
164    }
165}
166
167impl<WR: WordRead<Word = u64> + WordSeek<Error = <WR as WordRead>::Error>, RP: ReadParams>
168    BitRead<LE> for BitReader<LE, WR, RP>
169{
170    type Error = <WR as WordRead>::Error;
171    type PeekWord = u32;
172    const PEEK_BITS: usize = 32;
173
174    #[inline]
175    fn skip_bits(&mut self, n_bits: usize) -> Result<(), Self::Error> {
176        self.bit_index += n_bits as u64;
177        Ok(())
178    }
179
180    #[inline]
181    fn read_bits(&mut self, num_bits: usize) -> Result<u64, Self::Error> {
182        #[cfg(feature = "checks")]
183        assert!(num_bits <= 64);
184
185        if num_bits == 0 {
186            return Ok(0);
187        }
188
189        self.backend.set_word_pos(self.bit_index / 64)?;
190        let in_word_offset = (self.bit_index % 64) as usize;
191
192        let res = if (in_word_offset + num_bits) <= 64 {
193            // single word access
194            let word = self.backend.read_word()?.to_le();
195            let shamt = 64 - num_bits;
196            (word << (shamt - in_word_offset)) >> shamt
197        } else {
198            // double word access
199            let low_word = self.backend.read_word()?.to_le();
200            let high_word = self.backend.read_word()?.to_le();
201            let shamt1 = 128 - in_word_offset - num_bits;
202            let shamt2 = 64 - num_bits;
203            ((high_word << shamt1) >> shamt2) | (low_word >> in_word_offset)
204        };
205        self.bit_index += num_bits as u64;
206        Ok(res)
207    }
208
209    #[inline]
210    fn peek_bits(&mut self, n_bits: usize) -> Result<u32, Self::Error> {
211        if n_bits == 0 {
212            return Ok(0);
213        }
214
215        #[cfg(feature = "checks")]
216        assert!(n_bits <= 32);
217
218        self.backend.set_word_pos(self.bit_index / 64)?;
219        let in_word_offset = (self.bit_index % 64) as usize;
220
221        let res = if (in_word_offset + n_bits) <= 64 {
222            // single word access
223            let word = self.backend.read_word()?.to_le();
224            let shamt = 64 - n_bits;
225            (word << (shamt - in_word_offset)) >> shamt
226        } else {
227            // double word access
228            let low_word = self.backend.read_word()?.to_le();
229            let high_word = self.backend.read_word()?.to_le();
230            let shamt1 = 128 - in_word_offset - n_bits;
231            let shamt2 = 64 - n_bits;
232            ((high_word << shamt1) >> shamt2) | (low_word >> in_word_offset)
233        };
234        Ok(res as u32)
235    }
236
237    #[inline]
238    fn read_unary(&mut self) -> Result<u64, Self::Error> {
239        self.backend.set_word_pos(self.bit_index / 64)?;
240        let in_word_offset = self.bit_index % 64;
241        let mut bits_in_word = 64 - in_word_offset;
242        let mut total = 0;
243
244        let mut word = self.backend.read_word()?.to_le();
245        word >>= in_word_offset;
246        loop {
247            let zeros = word.trailing_zeros() as u64;
248            // the unary code fits in the word
249            if zeros < bits_in_word {
250                self.bit_index += total + zeros + 1;
251                return Ok(total + zeros);
252            }
253            total += bits_in_word;
254            bits_in_word = 64;
255            word = self.backend.read_word()?.to_le();
256        }
257    }
258
259    #[inline(always)]
260    fn skip_bits_after_peek(&mut self, n: usize) {
261        self.bit_index += n as u64;
262    }
263}
264
265#[cfg(feature = "std")]
266impl<WR: WordRead<Word = u64> + WordSeek<Error = <WR as WordRead>::Error>, RP: ReadParams>
267    std::io::Read for BitReader<LE, WR, RP>
268{
269    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
270        let mut iter = buf.chunks_exact_mut(8);
271
272        for chunk in &mut iter {
273            let word = self
274                .read_bits(64)
275                .map_err(|_| std::io::ErrorKind::UnexpectedEof)?;
276            chunk.copy_from_slice(&word.to_le_bytes());
277        }
278
279        let rem = iter.into_remainder();
280        if !rem.is_empty() {
281            let word = self
282                .read_bits(rem.len() * 8)
283                .map_err(|_| std::io::ErrorKind::UnexpectedEof)?;
284            rem.copy_from_slice(&word.to_le_bytes()[..rem.len()]);
285        }
286
287        Ok(buf.len())
288    }
289}
290
291#[cfg(feature = "std")]
292impl<WR: WordRead<Word = u64> + WordSeek<Error = <WR as WordRead>::Error>, RP: ReadParams>
293    std::io::Read for BitReader<BE, WR, RP>
294{
295    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
296        let mut iter = buf.chunks_exact_mut(8);
297
298        for chunk in &mut iter {
299            let word = self
300                .read_bits(64)
301                .map_err(|_| std::io::ErrorKind::UnexpectedEof)?;
302            chunk.copy_from_slice(&word.to_be_bytes());
303        }
304
305        let rem = iter.into_remainder();
306        if !rem.is_empty() {
307            let word = self
308                .read_bits(rem.len() * 8)
309                .map_err(|_| std::io::ErrorKind::UnexpectedEof)?;
310            rem.copy_from_slice(&word.to_be_bytes()[8 - rem.len()..]);
311        }
312
313        Ok(buf.len())
314    }
315}