sbdf/
reader.rs

1use crate::{
2    BinaryArray, BoolArray, ColumnMetadata, ColumnProperties, ColumnSlice, Date, DateArray,
3    DateTime, DateTimeArray, Decimal, DecimalArray, DoubleArray, EncodedBitArray, EncodedRunLength,
4    EncodedValue, FileHeader, FloatArray, IntArray, LongArray, Metadata, Object, Property,
5    SbdfError, SectionId, StringArray, TableMetadata, TableSlice, TimeArray, TimeSpanArray,
6    ValueArrayEncoding, ValueType, BITS_PER_BYTE, COLUMN_METADATA_NAME, COLUMN_METADATA_TYPE,
7    PROPERTY_ERROR_CODE, PROPERTY_HAS_REPLACED_VALUE, PROPERTY_IS_INVALID,
8};
9use std::io::{Cursor, Read};
10
11#[derive(Debug)]
12pub struct SbdfReader<'a> {
13    cursor: Cursor<&'a [u8]>,
14}
15
16impl<'a> SbdfReader<'a> {
17    pub fn new(bytes: &'a [u8]) -> Self {
18        let cursor = Cursor::new(bytes);
19        SbdfReader { cursor }
20    }
21
22    fn read_byte(&mut self) -> Result<u8, SbdfError> {
23        let mut buffer = [0; 1];
24        match self.cursor.read_exact(&mut buffer) {
25            Ok(()) => Ok(buffer[0]),
26            Err(_) => Err(SbdfError::InvalidBytes),
27        }
28    }
29
30    fn read_7bit_packed_int(&mut self) -> Result<i32, SbdfError> {
31        let mut value = 0;
32
33        for i in 0..5 {
34            let byte = self.read_byte()?;
35            value |= ((byte & 0x7f) as i32) << (7 * i);
36            if byte & 0x80 == 0 {
37                break;
38            }
39        }
40
41        Ok(value)
42    }
43
44    fn read_int(&mut self) -> Result<i32, SbdfError> {
45        let mut buffer = [0; 4];
46        match self.cursor.read_exact(&mut buffer) {
47            Ok(()) => Ok(i32::from_le_bytes(buffer)),
48            Err(_) => Err(SbdfError::InvalidInt),
49        }
50    }
51
52    fn read_long(&mut self) -> Result<i64, SbdfError> {
53        let mut buffer = [0; 8];
54        match self.cursor.read_exact(&mut buffer) {
55            Ok(()) => Ok(i64::from_le_bytes(buffer)),
56            Err(_) => Err(SbdfError::InvalidLong),
57        }
58    }
59
60    fn read_float(&mut self) -> Result<f32, SbdfError> {
61        let mut buffer = [0; 4];
62        match self.cursor.read_exact(&mut buffer) {
63            Ok(()) => Ok(f32::from_le_bytes(buffer)),
64            Err(_) => Err(SbdfError::InvalidFloat),
65        }
66    }
67
68    fn read_double(&mut self) -> Result<f64, SbdfError> {
69        let mut buffer = [0; 8];
70        match self.cursor.read_exact(&mut buffer) {
71            Ok(()) => Ok(f64::from_le_bytes(buffer)),
72            Err(_) => Err(SbdfError::InvalidDouble),
73        }
74    }
75
76    fn read_string(&mut self, is_packed_array: bool) -> Result<String, SbdfError> {
77        let bytes = self
78            .read_bytes(is_packed_array)
79            .map_err(|_| SbdfError::InvalidString)?;
80
81        Ok(String::from_utf8(bytes).map_err(|_| SbdfError::InvalidString)?)
82    }
83
84    fn read_bool(&mut self) -> Result<bool, SbdfError> {
85        let byte = self.read_byte()?;
86        match byte {
87            0 => Ok(false),
88            1 => Ok(true),
89            _ => Err(SbdfError::InvalidBool),
90        }
91    }
92
93    fn read_bytes(&mut self, is_packed_array: bool) -> Result<Vec<u8>, SbdfError> {
94        let length = if is_packed_array {
95            self.read_7bit_packed_int()?
96        } else {
97            self.read_int()?
98        } as usize;
99
100        let mut buffer = vec![0; length];
101        match self.cursor.read_exact(&mut buffer) {
102            Ok(()) => Ok(buffer),
103            Err(_) => Err(SbdfError::InvalidBytes),
104        }
105    }
106
107    fn read_decimal(&mut self) -> Result<Decimal, SbdfError> {
108        let mut buffer = [0; 16];
109        match self.cursor.read_exact(&mut buffer) {
110            Ok(()) => Ok(buffer),
111            Err(_) => Err(SbdfError::InvalidBytes),
112        }
113    }
114
115    fn read_multiple<T, F>(&mut self, count: usize, read_value: F) -> Result<Vec<T>, SbdfError>
116    where
117        F: Fn(&mut Self) -> Result<T, SbdfError>,
118    {
119        let mut values = Vec::with_capacity(count);
120        for _ in 0..count {
121            values.push(read_value(self)?);
122        }
123        Ok(values)
124    }
125
126    fn read_value_type(&mut self) -> Result<ValueType, SbdfError> {
127        self.read_byte()?.try_into()
128    }
129
130    fn read_object(
131        &mut self,
132        value_type: ValueType,
133        count: usize,
134        is_packed_array: bool,
135    ) -> Result<Object, SbdfError> {
136        Ok(match (value_type, count) {
137            (ValueType::Bool, 1) => Object::Bool(self.read_bool()?),
138            (ValueType::Int, 1) => Object::Int(self.read_int()?),
139            (ValueType::Long, 1) => Object::Long(self.read_long()?),
140            (ValueType::Float, 1) => Object::Float(self.read_float()?),
141            (ValueType::Double, 1) => Object::Double(self.read_double()?),
142            (ValueType::DateTime, 1) => Object::DateTime(DateTime(self.read_long()?)),
143            (ValueType::Date, 1) => Object::Date(Date(self.read_long()?)),
144            (ValueType::Time, 1) => Object::Time(self.read_long()?),
145            (ValueType::TimeSpan, 1) => Object::TimeSpan(self.read_long()?),
146            (ValueType::String, 1) => {
147                if is_packed_array {
148                    // Ignore byte size.
149                    let _ = self.read_int()?;
150                }
151
152                Object::String(self.read_string(is_packed_array)?)
153            }
154            (ValueType::Binary, 1) => {
155                if is_packed_array {
156                    // Ignore byte size.
157                    let _ = self.read_int()?;
158                }
159
160                Object::Binary(self.read_bytes(is_packed_array)?.into_boxed_slice())
161            }
162            (ValueType::Decimal, 1) => Object::Decimal(self.read_decimal()?),
163            (ValueType::Bool, _) => Object::BoolArray(BoolArray(
164                self.read_multiple(count, SbdfReader::read_bool)?
165                    .into_boxed_slice(),
166            )),
167            (ValueType::Int, _) => Object::IntArray(IntArray(
168                self.read_multiple(count, SbdfReader::read_int)?
169                    .into_boxed_slice(),
170            )),
171            (ValueType::Long, _) => Object::LongArray(LongArray(
172                self.read_multiple(count, |reader| reader.read_long())
173                    .map_err(|_| SbdfError::InvalidObject)?
174                    .into_boxed_slice(),
175            )),
176            (ValueType::Float, _) => Object::FloatArray(FloatArray(
177                self.read_multiple(count, SbdfReader::read_float)?
178                    .into_boxed_slice(),
179            )),
180            (ValueType::Double, _) => Object::DoubleArray(DoubleArray(
181                self.read_multiple(count, SbdfReader::read_double)?
182                    .into_boxed_slice(),
183            )),
184            (ValueType::DateTime, _) => Object::DateTimeArray(DateTimeArray(
185                self.read_multiple(count, SbdfReader::read_long)?
186                    .into_boxed_slice(),
187            )),
188            (ValueType::Date, _) => Object::DateArray(DateArray(
189                self.read_multiple(count, SbdfReader::read_long)?
190                    .into_boxed_slice(),
191            )),
192            (ValueType::Time, _) => Object::TimeArray(TimeArray(
193                self.read_multiple(count, SbdfReader::read_long)?
194                    .into_boxed_slice(),
195            )),
196            (ValueType::TimeSpan, _) => Object::TimeSpanArray(TimeSpanArray(
197                self.read_multiple(count, SbdfReader::read_long)?
198                    .into_boxed_slice(),
199            )),
200            (ValueType::String, _) => {
201                let mut result = Vec::with_capacity(count);
202
203                if is_packed_array {
204                    // Ignore byte size.
205                    let _ = self.read_int()?;
206                }
207
208                for _ in 0..count {
209                    result.push(self.read_string(is_packed_array)?);
210                }
211
212                Object::StringArray(StringArray(result.into_boxed_slice()))
213            }
214            (ValueType::Binary, _) => {
215                let mut result = Vec::with_capacity(count);
216
217                if is_packed_array {
218                    // Ignore byte size.
219                    let _ = self.read_int()?;
220                }
221
222                for _ in 0..count {
223                    result.push(self.read_bytes(is_packed_array)?.into_boxed_slice());
224                }
225
226                Object::BinaryArray(BinaryArray(result.into_boxed_slice()))
227            }
228            (ValueType::Decimal, _) => Object::DecimalArray(DecimalArray(
229                self.read_multiple(count, SbdfReader::read_decimal)?
230                    .into_boxed_slice(),
231            )),
232        })
233    }
234
235    fn read_unpacked_object(&mut self, value_type: ValueType) -> Result<Object, SbdfError> {
236        self.read_object(value_type, 1, false)
237    }
238
239    pub fn read_section_id(&mut self) -> Result<SectionId, SbdfError> {
240        if self.read_byte()? != 0xdfu8 {
241            return Err(SbdfError::MagicNumberMismatch);
242        }
243
244        if self.read_byte()? != 0x5bu8 {
245            return Err(SbdfError::MagicNumberMismatch);
246        }
247
248        self.read_byte().and_then(|value| value.try_into())
249    }
250
251    pub fn expect_section_id(&mut self, expected: SectionId) -> Result<(), SbdfError> {
252        let actual = self.read_section_id()?;
253        if actual != expected {
254            return Err(SbdfError::WrongSectionId { expected, actual });
255        }
256        Ok(())
257    }
258
259    pub fn read_file_header(&mut self) -> Result<FileHeader, SbdfError> {
260        let major_version = self.read_byte()?;
261        let minor_version = self.read_byte()?;
262
263        if major_version != 1 || minor_version != 0 {
264            return Err(SbdfError::UnsupportedVersion {
265                major_version,
266                minor_version,
267            });
268        }
269
270        Ok(FileHeader {
271            major_version,
272            minor_version,
273        })
274    }
275
276    pub fn read_metadata_value(
277        &mut self,
278        value_type: ValueType,
279    ) -> Result<Option<Object>, SbdfError> {
280        match self.read_byte()? {
281            0 => Ok(None),
282            1 => Ok(Some(self.read_unpacked_object(value_type)?)),
283            _ => Err(SbdfError::MetadataValueArrayLengthMustBeZeroOrOne),
284        }
285    }
286
287    fn read_metadata(&mut self) -> Result<Metadata, SbdfError> {
288        let name = self.read_string(false)?;
289        let value_type = self.read_value_type()?;
290        let value = match self.read_metadata_value(value_type)? {
291            Some(value) => value,
292            None => value_type.default_object()?,
293        };
294        let default_value = self.read_metadata_value(value_type)?;
295
296        Ok(Metadata {
297            name,
298            value,
299            default_value,
300        })
301    }
302
303    pub fn read_table_metadata(&mut self) -> Result<TableMetadata, SbdfError> {
304        let table_metadata_count: usize = self
305            .read_int()?
306            .try_into()
307            .map_err(|_| SbdfError::InvalidSize)?;
308
309        let mut table_metadata = Vec::with_capacity(table_metadata_count);
310
311        for _ in 0..table_metadata_count {
312            table_metadata.push(self.read_metadata()?);
313        }
314
315        let column_count = self.read_int()? as usize;
316        let mut columns = Vec::with_capacity(column_count);
317
318        let metadata_count = self.read_int()? as usize;
319        let mut metadata = Vec::with_capacity(metadata_count);
320
321        for _ in 0..metadata_count {
322            let name = self.read_string(false)?;
323            let value_type = self.read_value_type()?;
324            let object = self.read_metadata_value(value_type)?;
325            metadata.push((name, value_type, object));
326        }
327
328        for _ in 0..column_count {
329            let mut maybe_name = None;
330            let mut maybe_type = None;
331
332            let mut column_metadata = Vec::with_capacity(metadata_count.saturating_sub(2));
333
334            for j in 0..metadata_count {
335                let has_metadata = self.read_bool()?;
336                if !has_metadata {
337                    continue;
338                }
339
340                let (name, ty, default_value) = &metadata[j];
341                let value = self.read_unpacked_object(*ty)?;
342
343                // Add metadata to the current column.
344                match name.as_str() {
345                    COLUMN_METADATA_NAME => {
346                        maybe_name = match value {
347                            Object::String(name) => Some(name),
348                            _ => return Err(SbdfError::InvalidMetadata),
349                        };
350                    }
351                    COLUMN_METADATA_TYPE => {
352                        maybe_type = match value {
353                            Object::Binary(ty_raw) => {
354                                if ty_raw.len() != 1 {
355                                    return Err(SbdfError::InvalidMetadata);
356                                }
357
358                                Some(ty_raw[0].try_into()?)
359                            }
360                            _ => return Err(SbdfError::InvalidMetadata),
361                        }
362                    }
363                    _ => {
364                        column_metadata.push(Metadata {
365                            name: name.clone(),
366                            value,
367                            default_value: default_value.clone(),
368                        });
369                    }
370                }
371            }
372
373            column_metadata.shrink_to_fit();
374            columns.push(ColumnMetadata {
375                name: maybe_name.ok_or(SbdfError::InvalidMetadata)?,
376                ty: maybe_type.ok_or(SbdfError::InvalidMetadata)?,
377                other: column_metadata.into_boxed_slice(),
378            });
379        }
380
381        Ok(TableMetadata {
382            metadata: table_metadata.into_boxed_slice(),
383            columns: columns.into_boxed_slice(),
384        })
385    }
386
387    fn read_object_packed_array(&mut self, value_type: ValueType) -> Result<Object, SbdfError> {
388        let count = self.read_int()? as usize;
389        self.read_object(value_type, count, true)
390    }
391
392    fn read_value_array(&mut self) -> Result<EncodedValue, SbdfError> {
393        let encoding: ValueArrayEncoding = self.read_byte()?.try_into()?;
394        let value_type = self.read_value_type()?;
395        Ok(match encoding {
396            ValueArrayEncoding::Plain => {
397                let value = self.read_object_packed_array(value_type)?;
398                EncodedValue::Plain(value)
399            }
400            ValueArrayEncoding::RunLength => {
401                let _item_count = self.read_int()?;
402
403                // The repetitions are byte arrays, so we can just read them directly instead of
404                // going through the object deserialization process.
405                let repetitions = self.read_bytes(false)?;
406
407                let values = self.read_object_packed_array(value_type)?;
408                EncodedValue::RunLength(EncodedRunLength {
409                    repetitions: repetitions.into_boxed_slice(),
410                    values,
411                })
412            }
413            ValueArrayEncoding::BitArray => {
414                let bit_count = self.read_int()? as usize;
415                // Round up to the nearest byte.
416                let byte_length = bit_count.div_ceil(BITS_PER_BYTE);
417                let mut bytes = vec![0; byte_length];
418                self.cursor
419                    .read_exact(&mut bytes)
420                    .map_err(|_| SbdfError::InvalidBytes)?;
421
422                EncodedValue::BitArray(EncodedBitArray {
423                    bit_count,
424                    bytes: bytes.into_boxed_slice(),
425                })
426            }
427        })
428    }
429
430    fn read_properties(&mut self) -> Result<ColumnProperties, SbdfError> {
431        let count = self.read_int()? as usize;
432        let mut properties = Vec::with_capacity(count);
433
434        let mut is_invalid = None;
435        let mut error_code = None;
436        let mut has_replaced_value = None;
437
438        for _ in 0..count {
439            let name = self.read_string(false)?;
440            let values = self.read_value_array()?;
441
442            // Try to recognize standard properties when the names and types match.
443            match (name.as_str(), values) {
444                (PROPERTY_IS_INVALID, EncodedValue::BitArray(bit_array)) => {
445                    is_invalid = Some(bit_array);
446                }
447                (PROPERTY_ERROR_CODE, encoded) => {
448                    error_code = Some(encoded);
449                }
450                (PROPERTY_HAS_REPLACED_VALUE, EncodedValue::BitArray(bit_array)) => {
451                    has_replaced_value = Some(bit_array);
452                }
453                (_, values) => properties.push(Property { name, values }),
454            }
455        }
456
457        Ok(ColumnProperties {
458            is_invalid,
459            error_code,
460            has_replaced_value,
461            other: properties.into_boxed_slice(),
462        })
463    }
464
465    fn read_column_slice(&mut self) -> Result<ColumnSlice, SbdfError> {
466        self.expect_section_id(SectionId::ColumnSlice)?;
467
468        let values = self.read_value_array()?;
469        let properties = self.read_properties()?;
470
471        Ok(ColumnSlice { values, properties })
472    }
473
474    pub fn read_table_slice(
475        &mut self,
476        table_metadata: &TableMetadata,
477    ) -> Result<TableSlice, SbdfError> {
478        let column_count = self.read_int()? as usize;
479
480        if table_metadata.columns.len() != column_count {
481            return Err(SbdfError::ColumnCountMismatch);
482        }
483
484        let mut column_slices = Vec::with_capacity(column_count);
485
486        for _ in 0..column_count {
487            column_slices.push(self.read_column_slice()?);
488        }
489
490        Ok(TableSlice {
491            column_slices: column_slices.into_boxed_slice(),
492        })
493    }
494}
495
496#[cfg(test)]
497mod tests {
498    use super::*;
499
500    #[test]
501    fn read_byte() {
502        let buffer = [0x12, 0x34];
503        let mut reader = SbdfReader::new(&buffer);
504        assert_eq!(reader.read_byte().unwrap(), 0x12);
505        assert_eq!(reader.read_byte().unwrap(), 0x34);
506    }
507
508    #[test]
509    fn read_7bit_packed_int() {
510        let buffer = [0x80, 0x08, 0x01, 0];
511        let mut reader = SbdfReader::new(&buffer);
512        assert_eq!(reader.read_7bit_packed_int().unwrap(), 1024);
513        assert_eq!(reader.read_7bit_packed_int().unwrap(), 1);
514        assert_eq!(reader.read_7bit_packed_int().unwrap(), 0);
515    }
516
517    #[test]
518    fn read_int() {
519        let buffer = [0x0, 0x4, 0x0, 0x0];
520        let mut reader = SbdfReader::new(&buffer);
521        assert_eq!(reader.read_int().unwrap(), 1024);
522    }
523
524    #[test]
525    fn read_long() {
526        let buffer = [0x0, 0x4, 0x0, 0x0, 0x0, 0x4, 0x0, 0x0];
527        let mut reader = SbdfReader::new(&buffer);
528        assert_eq!(reader.read_long().unwrap(), 1024 | 1024 << 32);
529    }
530
531    #[test]
532    fn read_float() {
533        let buffer = 123.456f32.to_le_bytes();
534        let mut reader = SbdfReader::new(&buffer);
535        assert_eq!(reader.read_float().unwrap(), 123.456);
536    }
537
538    #[test]
539    fn read_double() {
540        let buffer = 123.456f64.to_le_bytes();
541        let mut reader = SbdfReader::new(&buffer);
542        assert_eq!(reader.read_double().unwrap(), 123.456);
543    }
544
545    #[test]
546    fn read_string_unpacked() {
547        let mut buffer = Vec::new();
548        let text = b"Hello, world!";
549        let length = (text.len() as i32).to_le_bytes();
550        buffer.extend_from_slice(&length);
551        buffer.extend_from_slice(text);
552        let mut reader = SbdfReader::new(&buffer);
553        assert_eq!(reader.read_string(false).unwrap(), "Hello, world!");
554    }
555
556    #[test]
557    fn read_string_packed() {
558        let mut buffer = Vec::new();
559        let text = b"Hello, world!";
560        // Length is short enough to fit into a single byte without a continuation bit.
561        let length = text.len() as u8;
562        buffer.push(length);
563        buffer.extend_from_slice(text);
564        let mut reader = SbdfReader::new(&buffer);
565        assert_eq!(reader.read_string(true).unwrap(), "Hello, world!");
566    }
567
568    #[test]
569    fn read_bool() {
570        let buffer = [0, 1];
571        let mut reader = SbdfReader::new(&buffer);
572        assert_eq!(reader.read_bool().unwrap(), false);
573        assert_eq!(reader.read_bool().unwrap(), true);
574    }
575
576    #[test]
577    fn read_bytes_unpacked() {
578        let mut buffer = Vec::new();
579        let text = b"Hello, world!";
580        let length = (text.len() as i32).to_le_bytes();
581        buffer.extend_from_slice(&length);
582        buffer.extend_from_slice(text);
583        let mut reader = SbdfReader::new(&buffer);
584        assert_eq!(reader.read_bytes(false).unwrap(), b"Hello, world!");
585    }
586
587    #[test]
588    fn read_bytes_packed() {
589        let mut buffer = Vec::new();
590        let text = b"Hello, world!";
591        // Length is short enough to fit into a single byte without a continuation bit.
592        let length = text.len() as u8;
593        buffer.push(length);
594        buffer.extend_from_slice(text);
595        let mut reader = SbdfReader::new(&buffer);
596        assert_eq!(reader.read_bytes(true).unwrap(), b"Hello, world!");
597    }
598
599    #[test]
600    fn read_decimal() {
601        let buffer = [1; 16];
602        let mut reader = SbdfReader::new(&buffer);
603        assert_eq!(reader.read_decimal().unwrap(), buffer);
604    }
605
606    #[test]
607    fn read_value_type() {
608        let buffer = [ValueType::TimeSpan as u8, ValueType::String as u8];
609        let mut reader = SbdfReader::new(&buffer);
610        assert_eq!(reader.read_value_type().unwrap(), ValueType::TimeSpan);
611        assert_eq!(reader.read_value_type().unwrap(), ValueType::String);
612    }
613
614    #[test]
615    fn read_section_id() {
616        let buffer = [0xdf, 0x5b, 0x2];
617        let mut reader = SbdfReader::new(&buffer);
618        assert_eq!(reader.read_section_id().unwrap(), SectionId::TableMetadata);
619    }
620
621    #[test]
622    fn read_file_header() {
623        let buffer = [0x1, 0x0];
624        let mut reader = SbdfReader::new(&buffer);
625        assert_eq!(
626            reader.read_file_header().unwrap(),
627            FileHeader {
628                major_version: 1,
629                minor_version: 0
630            }
631        );
632    }
633}