Skip to main content

dsi_bitstream/impls/
word_adapter.rs

1/*
2 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
3 * SPDX-FileCopyrightText: 2023 Inria
4 * SPDX-FileCopyrightText: 2023 Sebastiano Vigna
5 *
6 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
7 */
8
9use crate::traits::*;
10#[cfg(feature = "mem_dbg")]
11use mem_dbg::{MemDbg, MemSize};
12use std::io::{Read, Seek, SeekFrom, Write};
13
14/// An adapter from [`Read`], [`Write`], and [`Seek`], to [`WordRead`],
15/// [`WordWrite`], and [`WordSeek`], respectively.
16///
17/// Instances of this struct can be created using [`WordAdapter::new`]. They
18/// turn every standard (possibly seekable) source or destination of bytes (such
19/// as [`std::fs::File`], [`std::io::BufReader`], sockets, etc.) into a source
20/// or destination of words.
21///
22/// Due to the necessity of managing files whose length is not a multiple of the
23/// word length, [`read_word`](WordAdapter::read_word) will return a partially
24/// read word extended with zeros at the end of such files.
25///
26/// To provide a sensible value after such a read,
27/// [`word_pos`](WordAdapter::word_pos) will always return the position
28/// of the underlying [`Seek`] rounded up to the next multiple of the byte size of `W`.
29/// This approach, however, requires that if you adapt a [`Seek`],
30/// its current position must be a multiple of the byte size of `W`, or the
31/// results of [`word_pos`](WordAdapter::word_pos)
32/// will be shifted by the rounding.
33#[derive(Debug, Clone)]
34#[cfg_attr(feature = "mem_dbg", derive(MemDbg, MemSize))]
35pub struct WordAdapter<W: Word, B> {
36    backend: B,
37    _marker: core::marker::PhantomData<W>,
38}
39
40impl<W: Word, B> WordAdapter<W, B> {
41    /// Creates a new [`WordAdapter`].
42    #[must_use]
43    pub const fn new(backend: B) -> Self {
44        Self {
45            backend,
46            _marker: core::marker::PhantomData,
47        }
48    }
49
50    /// Consumes this adapter and returns the underlying backend.
51    #[must_use]
52    pub fn into_inner(self) -> B {
53        self.backend
54    }
55}
56
57impl<W: Word, B: Read> WordRead for WordAdapter<W, B>
58where
59    W::Bytes: Default + AsMut<[u8]>,
60{
61    type Error = std::io::Error;
62    type Word = W;
63
64    #[inline(always)]
65    fn read_word(&mut self) -> Result<W, Self::Error> {
66        let mut res: W::Bytes = Default::default();
67        self.backend
68            .read_exact(res.as_mut())
69            .map_err(|e|
70                match e.kind() {
71                std::io::ErrorKind::UnexpectedEof => {
72                    std::io::Error::new(
73                        e.kind(),
74                        format!(concat!(
75                            "Unexpected end of file. ",
76                            "This might happen because the file length is not a multiple of the word size used for reading ({0} bytes). ",
77                            "In this case, please pad with zeros at the end of the file so that the file length is a multiple of {0}. ",
78                            "The inner std::io::Error was {1:?}"), (W::BITS as usize / 8), e),
79                    )
80                }
81                _ => e,
82            })?;
83        Ok(W::from_ne_bytes(res))
84    }
85}
86
87impl<W: Word, B: Write> WordWrite for WordAdapter<W, B>
88where
89    W::Bytes: AsRef<[u8]>,
90{
91    type Error = std::io::Error;
92    type Word = W;
93
94    #[inline(always)]
95    fn write_word(&mut self, word: W) -> Result<(), std::io::Error> {
96        self.backend.write_all(word.to_ne_bytes().as_ref())?;
97        Ok(())
98    }
99
100    fn flush(&mut self) -> Result<(), Self::Error> {
101        self.backend.flush()
102    }
103}
104
105impl<W: Word, B: Seek> WordSeek for WordAdapter<W, B> {
106    type Error = std::io::Error;
107
108    #[inline(always)]
109    fn word_pos(&mut self) -> Result<u64, std::io::Error> {
110        Ok(self
111            .backend
112            .stream_position()?
113            .div_ceil((W::BITS as usize / 8) as u64))
114    }
115
116    #[inline(always)]
117    fn set_word_pos(&mut self, word_index: u64) -> Result<(), std::io::Error> {
118        self.backend
119            .seek(SeekFrom::Start(word_index * (W::BITS as usize / 8) as u64))?;
120        Ok(())
121    }
122}
123
124#[cfg(test)]
125mod tests {
126    use crate::prelude::*;
127    #[test]
128    fn test_word_adapter() -> std::io::Result<()> {
129        let data: Vec<u32> = vec![
130            0xa6032421, 0xc9d01b28, 0x168b4ecd, 0xc5ccbed9, 0xfd007100, 0x08469d41, 0x989fd8c2,
131            0x954d351a, 0x3225ec9f, 0xbca253f9, 0x915aad84, 0x274c0de1, 0x4bfc6982, 0x59a47341,
132            0x4e32a33a, 0x9e0d2208,
133        ];
134        let path = std::env::temp_dir().join("test_file_adapter");
135        {
136            let mut writer = <WordAdapter<u32, _>>::new(std::fs::File::create(&path)?);
137            for value in &data {
138                writer.write_word(*value)?;
139            }
140        }
141        {
142            let mut reader = <WordAdapter<u32, _>>::new(std::fs::File::open(&path)?);
143            for value in &data {
144                assert_eq!(*value, reader.read_word()?);
145            }
146        }
147        Ok(())
148    }
149
150    #[test]
151    fn test_word_adapter_codes() -> std::io::Result<()> {
152        let data: Vec<u8> = vec![
153            0x5f, 0x68, 0xdb, 0xca, 0x79, 0x17, 0xf3, 0x37, 0x2c, 0x46, 0x63, 0xf7, 0xf3, 0x28,
154            0xa4, 0x8d, 0x29, 0x3b, 0xb6, 0xd5, 0xc7, 0xe2, 0x22, 0x3f, 0x6e, 0xb5, 0xf2, 0xda,
155            0x13, 0x1d, 0x37, 0x18, 0x5b, 0xf8, 0x45, 0x59, 0x33, 0x38, 0xaf, 0xc4, 0x8a, 0x1d,
156            0x78, 0x81, 0xc8, 0xc3, 0xdb, 0xab, 0x23, 0xe1, 0x13, 0xb0, 0x04, 0xd7, 0x3c, 0x21,
157            0x0e, 0xba, 0x5d, 0xfc, 0xac, 0x4f, 0x04, 0x2d,
158        ];
159        let path = std::env::temp_dir().join("test_file_adapter_codes");
160        {
161            let mut writer = <BufBitWriter<BE, _>>::new(<WordAdapter<u64, _>>::new(
162                std::fs::File::create(&path)?,
163            ));
164            for value in &data {
165                writer.write_gamma(*value as _)?;
166            }
167        }
168        {
169            let mut reader =
170                <BufBitReader<BE, _>>::new(<WordAdapter<u32, _>>::new(std::fs::File::open(&path)?));
171            for value in &data {
172                assert_eq!(*value as u64, reader.read_gamma()?);
173            }
174        }
175        {
176            let mut writer = <BufBitWriter<LE, _>>::new(<WordAdapter<u64, _>>::new(
177                std::fs::File::create(&path)?,
178            ));
179            for value in &data {
180                writer.write_gamma(*value as _)?;
181            }
182        }
183        {
184            let mut reader =
185                <BufBitReader<LE, _>>::new(<WordAdapter<u32, _>>::new(std::fs::File::open(&path)?));
186            for value in &data {
187                assert_eq!(*value as u64, reader.read_gamma()?);
188            }
189        }
190        Ok(())
191    }
192}