hdt/containers/
sequence.rs

1use super::vbyte::encode_vbyte;
2use crate::containers::vbyte::read_vbyte;
3use bytesize::ByteSize;
4#[cfg(feature = "cache")]
5use serde::{self, Deserialize, Serialize};
6use std::fmt;
7use std::io::{BufRead, Write};
8use std::mem::size_of;
9
10const USIZE_BITS: usize = usize::BITS as usize;
11
12pub type Result<T> = core::result::Result<T, Error>;
13
14/// Integer sequence with a given number of bits, which means numbers may be represented along byte boundaries.
15/// Also called "array" in the HDT spec, only Log64 is supported.
16//#[derive(Clone)]
17#[cfg_attr(feature = "cache", derive(Deserialize, Serialize))]
18pub struct Sequence {
19    /// Number of integers in the sequence.
20    pub entries: usize,
21    /// Number of bits that each integer uses.
22    pub bits_per_entry: usize,
23    /// Data in blocks.
24    pub data: Vec<usize>,
25}
26
27enum SequenceType {
28    Log64 = 1,
29    #[allow(dead_code)]
30    UInt32 = 2,
31    #[allow(dead_code)]
32    UInt64 = 3,
33}
34
35impl TryFrom<u8> for SequenceType {
36    type Error = Error;
37
38    fn try_from(value: u8) -> Result<Self> {
39        match value {
40            1 => Ok(SequenceType::Log64),
41            _ => Err(Error::UnsupportedSequenceType(value)),
42        }
43    }
44}
45
46/// The error type for the sequence read function.
47#[derive(thiserror::Error, Debug)]
48pub enum Error {
49    #[error("IO error")]
50    Io(#[from] std::io::Error),
51    #[error("Invalid CRC8-CCIT checksum {0}, expected {1}")]
52    InvalidCrc8Checksum(u8, u8),
53    #[error("Invalid CRC32C checksum {0}, expected {1}")]
54    InvalidCrc32Checksum(u32, u32),
55    #[error("Failed to turn raw bytes into usize")]
56    TryFromSliceError(#[from] std::array::TryFromSliceError),
57    #[error("invalid LogArray type {0} != 1")]
58    UnsupportedSequenceType(u8),
59    #[error("entry size of {0} bit too large (>64 bit)")]
60    EntrySizeTooLarge(usize),
61}
62
63impl fmt::Debug for Sequence {
64    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
65        write!(
66            f,
67            "{} with {} entries, {} bits per entry, starting with {:?}",
68            ByteSize(self.size_in_bytes() as u64),
69            self.entries,
70            self.bits_per_entry,
71            self.into_iter().take(10).collect::<Vec::<_>>()
72        )
73    }
74}
75
76pub struct SequenceIter<'a> {
77    sequence: &'a Sequence,
78    i: usize,
79}
80
81impl Iterator for SequenceIter<'_> {
82    type Item = usize;
83    fn next(&mut self) -> Option<Self::Item> {
84        if self.i >= self.sequence.entries {
85            return None;
86        }
87        let e = self.sequence.get(self.i);
88        self.i += 1;
89        Some(e)
90    }
91}
92
93impl<'a> IntoIterator for &'a Sequence {
94    type Item = usize;
95    type IntoIter = SequenceIter<'a>;
96
97    fn into_iter(self) -> Self::IntoIter {
98        SequenceIter { sequence: self, i: 0 }
99    }
100}
101
102impl Sequence {
103    /// Get the integer at the given index, counting from 0.
104    /// Panics if the index is out of bounds.
105    pub fn get(&self, index: usize) -> usize {
106        let scaled_index = index * self.bits_per_entry;
107        let block_index = scaled_index / USIZE_BITS;
108        let bit_index = scaled_index % USIZE_BITS;
109
110        let mut result;
111
112        let result_shift = USIZE_BITS - self.bits_per_entry;
113        if bit_index + self.bits_per_entry <= USIZE_BITS {
114            let block_shift = USIZE_BITS - bit_index - self.bits_per_entry;
115            result = (self.data[block_index] << block_shift) >> result_shift;
116        } else {
117            let block_shift = (USIZE_BITS << 1) - bit_index - self.bits_per_entry;
118            result = self.data[block_index] >> bit_index;
119            result |= (self.data[block_index + 1] << block_shift) >> result_shift;
120        }
121        result
122    }
123
124    /// Size in bytes on the heap.
125    pub const fn size_in_bytes(&self) -> usize {
126        (self.data.len() * USIZE_BITS) >> 3
127    }
128
129    /// Read sequence including metadata from HDT data.
130    pub fn read<R: BufRead>(reader: &mut R) -> Result<Self> {
131        // read entry metadata
132        // keep track of history for CRC8
133        let mut history = Vec::<u8>::new();
134
135        // read and validate type
136        let mut buffer = [0_u8];
137        reader.read_exact(&mut buffer)?;
138        history.extend_from_slice(&buffer);
139        SequenceType::try_from(buffer[0])?;
140
141        // read number of bits per entry
142        let mut buffer = [0_u8];
143        reader.read_exact(&mut buffer)?;
144        history.extend_from_slice(&buffer);
145        let bits_per_entry = buffer[0] as usize;
146        if bits_per_entry > USIZE_BITS {
147            return Err(Error::EntrySizeTooLarge(bits_per_entry));
148        }
149
150        // read number of entries
151        let (entries, bytes_read) = read_vbyte(reader)?;
152        history.extend_from_slice(&bytes_read);
153
154        // read entry metadata CRC8
155        let mut crc_code = [0_u8];
156        reader.read_exact(&mut crc_code)?;
157        let crc_code = crc_code[0];
158
159        // validate entry metadata CRC8
160        let crc8 = crc::Crc::<u8>::new(&crc::CRC_8_SMBUS);
161        let mut digest = crc8.digest();
162        digest.update(&history);
163
164        let crc_calculated = digest.finalize();
165        if crc_calculated != crc_code {
166            return Err(Error::InvalidCrc8Checksum(crc_calculated, crc_code));
167        }
168
169        // read body data
170        // read all but the last entry, since the last one is byte aligned
171        let total_bits = bits_per_entry * entries;
172        let full_byte_amount = (total_bits.div_ceil(USIZE_BITS).saturating_sub(1)) * size_of::<usize>();
173        let mut full_words = vec![0_u8; full_byte_amount];
174        reader.read_exact(&mut full_words)?;
175        let mut data: Vec<usize> = Vec::with_capacity(full_byte_amount / size_of::<usize>() + 2);
176        // read entry body
177
178        // turn the raw bytes into usize values
179        for word in full_words.chunks_exact(size_of::<usize>()) {
180            data.push(usize::from_le_bytes(<[u8; size_of::<usize>()]>::try_from(word)?));
181        }
182
183        // keep track of history for CRC32
184        let mut history = full_words;
185        // read the last few bits, byte aligned
186        let mut bits_read = 0;
187        let mut last_value: usize = 0;
188        let last_entry_bits = if total_bits == 0 { 0 } else { ((total_bits - 1) % USIZE_BITS) + 1 };
189
190        while bits_read < last_entry_bits {
191            let mut buffer = [0u8];
192            reader.read_exact(&mut buffer)?;
193            history.extend_from_slice(&buffer);
194            last_value |= (buffer[0] as usize) << bits_read;
195            bits_read += size_of::<usize>();
196        }
197        data.push(last_value);
198        // read entry body CRC32
199        let mut crc_code = [0_u8; 4];
200        reader.read_exact(&mut crc_code)?;
201
202        let crc_code32 = u32::from_le_bytes(crc_code);
203        //let start = std::time::Instant::now();
204        // validate entry body CRC32
205        let crc32 = crc::Crc::<u32>::new(&crc::CRC_32_ISCSI);
206        let mut digest = crc32.digest();
207        digest.update(&history);
208        let crc_calculated32 = digest.finalize();
209        //println!("Sequence of {} validated in {:?}", ByteSize(history.len() as u64), start.elapsed());
210        if crc_calculated32 != crc_code32 {
211            return Err(Error::InvalidCrc32Checksum(crc_calculated32, crc_code32));
212        }
213
214        Ok(Sequence { entries, bits_per_entry, data })
215    }
216
217    /// save sequence per HDT spec using CRC
218    pub fn write(&self, dest_writer: &mut impl Write) -> Result<()> {
219        let crc8 = crc::Crc::<u8>::new(&crc::CRC_8_SMBUS);
220        let mut digest = crc8.digest();
221        // libhdt/src/sequence/LogSequence2.cpp::save()
222        // Write offsets using variable-length encoding
223        let seq_type: [u8; 1] = [1];
224        dest_writer.write_all(&seq_type)?;
225        digest.update(&seq_type);
226        // Write numbits
227        let bits_per_entry: [u8; 1] = [self.bits_per_entry.try_into().unwrap()];
228        dest_writer.write_all(&bits_per_entry)?;
229        digest.update(&bits_per_entry);
230        // Write numentries
231        let buf = &encode_vbyte(self.entries);
232        dest_writer.write_all(buf)?;
233        digest.update(buf);
234        let checksum: u8 = digest.finalize();
235        dest_writer.write_all(&[checksum])?;
236
237        // Write data
238        let crc32 = crc::Crc::<u32>::new(&crc::CRC_32_ISCSI);
239        let mut digest32 = crc32.digest();
240        let bytes: Vec<u8> = self.data.iter().flat_map(|&val| val.to_le_bytes()).collect();
241        //  unused zero bytes in the last usize are not written
242        let num_bytes = (self.bits_per_entry * self.entries).div_ceil(8);
243        let bytes = &bytes[..num_bytes];
244        dest_writer.write_all(bytes)?;
245        digest32.update(bytes);
246        let checksum32 = digest32.finalize();
247        dest_writer.write_all(&checksum32.to_le_bytes())?;
248        dest_writer.flush()?;
249        Ok(())
250    }
251
252    /// Pack the given integers., which have to fit into the given number of bits.
253    pub fn new(nums: &[usize], bits_per_entry: usize) -> Sequence {
254        use sucds::int_vectors::CompactVector;
255        let entries = nums.len();
256        if entries == 0 && bits_per_entry == 0 {
257            return Sequence { entries, bits_per_entry, data: vec![] };
258        }
259        let mut cv = CompactVector::with_capacity(nums.len(), bits_per_entry).expect("value too large");
260        cv.extend(nums.iter().copied()).unwrap();
261        let data = cv.into_bit_vector().into_words();
262        Sequence { entries, bits_per_entry, data }
263    }
264}
265
266#[cfg(test)]
267mod tests {
268    use super::*;
269    use crate::tests::init;
270    use pretty_assertions::assert_eq;
271
272    impl PartialEq for Sequence {
273        fn eq(&self, other: &Self) -> bool {
274            self.entries == other.entries && self.bits_per_entry == other.bits_per_entry && self.data == other.data
275        }
276    }
277
278    #[test]
279    fn write_read() -> color_eyre::Result<()> {
280        init();
281        let data = vec![(5 << 16) + (4 << 12) + (3 << 8) + (2 << 4) + 1];
282        // little endian
283        let s = Sequence { entries: 5, bits_per_entry: 4, data: data.clone() };
284        let numbers: Vec<usize> = s.into_iter().collect();
285        //let expected = vec![1];
286        let expected = vec![1, 2, 3, 4, 5];
287        assert_eq!(numbers, expected);
288        let mut buf = Vec::<u8>::new();
289        s.write(&mut buf)?;
290        // 1 - type, 4 - bits per entry, 133 - 5 entries as vbyte, 173 crc8 -> 4 bytes
291        // total_bits = bits_per_entry * entries = 20 -> 3 more bytes: 67, 5, 145
292        // 4 more bytes for crc32, 11 in total
293        // Sequence struct doesn't save crc
294        let expected = vec![1u8, 4, 133, 173, 33, 67, 5, 145, 176, 96, 218];
295        assert_eq!(buf, expected);
296        assert_eq!(encode_vbyte(5), [133]);
297        let mut cursor = std::io::Cursor::new(&buf);
298        let s2 = Sequence::read(&mut cursor)?;
299        assert_eq!(s, s2);
300        let numbers2: Vec<usize> = s2.into_iter().collect();
301        assert_eq!(numbers, numbers2);
302        assert_eq!(cursor.position(), buf.len() as u64);
303        // new and pack_bits
304        let s3 = Sequence::new(&numbers, 4);
305        let mut buf3 = Vec::<u8>::new();
306        s3.write(&mut buf3)?;
307        assert_eq!(s, s3);
308        Ok(())
309    }
310}