1use crate::{
2 BinaryArray, BoolArray, ColumnMetadata, ColumnProperties, ColumnSlice, Date, DateArray,
3 DateTime, DateTimeArray, Decimal, DecimalArray, DoubleArray, EncodedBitArray, EncodedRunLength,
4 EncodedValue, FileHeader, FloatArray, IntArray, LongArray, Metadata, Object, Property,
5 SbdfError, SectionId, StringArray, TableMetadata, TableSlice, TimeArray, TimeSpanArray,
6 ValueArrayEncoding, ValueType, BITS_PER_BYTE, COLUMN_METADATA_NAME, COLUMN_METADATA_TYPE,
7 PROPERTY_ERROR_CODE, PROPERTY_HAS_REPLACED_VALUE, PROPERTY_IS_INVALID,
8};
9use std::io::{Cursor, Read};
10
11#[derive(Debug)]
12pub struct SbdfReader<'a> {
13 cursor: Cursor<&'a [u8]>,
14}
15
16impl<'a> SbdfReader<'a> {
17 pub fn new(bytes: &'a [u8]) -> Self {
18 let cursor = Cursor::new(bytes);
19 SbdfReader { cursor }
20 }
21
22 fn read_byte(&mut self) -> Result<u8, SbdfError> {
23 let mut buffer = [0; 1];
24 match self.cursor.read_exact(&mut buffer) {
25 Ok(()) => Ok(buffer[0]),
26 Err(_) => Err(SbdfError::InvalidBytes),
27 }
28 }
29
30 fn read_7bit_packed_int(&mut self) -> Result<i32, SbdfError> {
31 let mut value = 0;
32
33 for i in 0..5 {
34 let byte = self.read_byte()?;
35 value |= ((byte & 0x7f) as i32) << (7 * i);
36 if byte & 0x80 == 0 {
37 break;
38 }
39 }
40
41 Ok(value)
42 }
43
44 fn read_int(&mut self) -> Result<i32, SbdfError> {
45 let mut buffer = [0; 4];
46 match self.cursor.read_exact(&mut buffer) {
47 Ok(()) => Ok(i32::from_le_bytes(buffer)),
48 Err(_) => Err(SbdfError::InvalidInt),
49 }
50 }
51
52 fn read_long(&mut self) -> Result<i64, SbdfError> {
53 let mut buffer = [0; 8];
54 match self.cursor.read_exact(&mut buffer) {
55 Ok(()) => Ok(i64::from_le_bytes(buffer)),
56 Err(_) => Err(SbdfError::InvalidLong),
57 }
58 }
59
60 fn read_float(&mut self) -> Result<f32, SbdfError> {
61 let mut buffer = [0; 4];
62 match self.cursor.read_exact(&mut buffer) {
63 Ok(()) => Ok(f32::from_le_bytes(buffer)),
64 Err(_) => Err(SbdfError::InvalidFloat),
65 }
66 }
67
68 fn read_double(&mut self) -> Result<f64, SbdfError> {
69 let mut buffer = [0; 8];
70 match self.cursor.read_exact(&mut buffer) {
71 Ok(()) => Ok(f64::from_le_bytes(buffer)),
72 Err(_) => Err(SbdfError::InvalidDouble),
73 }
74 }
75
76 fn read_string(&mut self, is_packed_array: bool) -> Result<String, SbdfError> {
77 let bytes = self
78 .read_bytes(is_packed_array)
79 .map_err(|_| SbdfError::InvalidString)?;
80
81 Ok(String::from_utf8(bytes).map_err(|_| SbdfError::InvalidString)?)
82 }
83
84 fn read_bool(&mut self) -> Result<bool, SbdfError> {
85 let byte = self.read_byte()?;
86 match byte {
87 0 => Ok(false),
88 1 => Ok(true),
89 _ => Err(SbdfError::InvalidBool),
90 }
91 }
92
93 fn read_bytes(&mut self, is_packed_array: bool) -> Result<Vec<u8>, SbdfError> {
94 let length = if is_packed_array {
95 self.read_7bit_packed_int()?
96 } else {
97 self.read_int()?
98 } as usize;
99
100 let mut buffer = vec![0; length];
101 match self.cursor.read_exact(&mut buffer) {
102 Ok(()) => Ok(buffer),
103 Err(_) => Err(SbdfError::InvalidBytes),
104 }
105 }
106
107 fn read_decimal(&mut self) -> Result<Decimal, SbdfError> {
108 let mut buffer = [0; 16];
109 match self.cursor.read_exact(&mut buffer) {
110 Ok(()) => Ok(buffer),
111 Err(_) => Err(SbdfError::InvalidBytes),
112 }
113 }
114
115 fn read_multiple<T, F>(&mut self, count: usize, read_value: F) -> Result<Vec<T>, SbdfError>
116 where
117 F: Fn(&mut Self) -> Result<T, SbdfError>,
118 {
119 let mut values = Vec::with_capacity(count);
120 for _ in 0..count {
121 values.push(read_value(self)?);
122 }
123 Ok(values)
124 }
125
126 fn read_value_type(&mut self) -> Result<ValueType, SbdfError> {
127 self.read_byte()?.try_into()
128 }
129
130 fn read_object(
131 &mut self,
132 value_type: ValueType,
133 count: usize,
134 is_packed_array: bool,
135 ) -> Result<Object, SbdfError> {
136 Ok(match (value_type, count) {
137 (ValueType::Bool, 1) => Object::Bool(self.read_bool()?),
138 (ValueType::Int, 1) => Object::Int(self.read_int()?),
139 (ValueType::Long, 1) => Object::Long(self.read_long()?),
140 (ValueType::Float, 1) => Object::Float(self.read_float()?),
141 (ValueType::Double, 1) => Object::Double(self.read_double()?),
142 (ValueType::DateTime, 1) => Object::DateTime(DateTime(self.read_long()?)),
143 (ValueType::Date, 1) => Object::Date(Date(self.read_long()?)),
144 (ValueType::Time, 1) => Object::Time(self.read_long()?),
145 (ValueType::TimeSpan, 1) => Object::TimeSpan(self.read_long()?),
146 (ValueType::String, 1) => {
147 if is_packed_array {
148 let _ = self.read_int()?;
150 }
151
152 Object::String(self.read_string(is_packed_array)?)
153 }
154 (ValueType::Binary, 1) => {
155 if is_packed_array {
156 let _ = self.read_int()?;
158 }
159
160 Object::Binary(self.read_bytes(is_packed_array)?.into_boxed_slice())
161 }
162 (ValueType::Decimal, 1) => Object::Decimal(self.read_decimal()?),
163 (ValueType::Bool, _) => Object::BoolArray(BoolArray(
164 self.read_multiple(count, SbdfReader::read_bool)?
165 .into_boxed_slice(),
166 )),
167 (ValueType::Int, _) => Object::IntArray(IntArray(
168 self.read_multiple(count, SbdfReader::read_int)?
169 .into_boxed_slice(),
170 )),
171 (ValueType::Long, _) => Object::LongArray(LongArray(
172 self.read_multiple(count, |reader| reader.read_long())
173 .map_err(|_| SbdfError::InvalidObject)?
174 .into_boxed_slice(),
175 )),
176 (ValueType::Float, _) => Object::FloatArray(FloatArray(
177 self.read_multiple(count, SbdfReader::read_float)?
178 .into_boxed_slice(),
179 )),
180 (ValueType::Double, _) => Object::DoubleArray(DoubleArray(
181 self.read_multiple(count, SbdfReader::read_double)?
182 .into_boxed_slice(),
183 )),
184 (ValueType::DateTime, _) => Object::DateTimeArray(DateTimeArray(
185 self.read_multiple(count, SbdfReader::read_long)?
186 .into_boxed_slice(),
187 )),
188 (ValueType::Date, _) => Object::DateArray(DateArray(
189 self.read_multiple(count, SbdfReader::read_long)?
190 .into_boxed_slice(),
191 )),
192 (ValueType::Time, _) => Object::TimeArray(TimeArray(
193 self.read_multiple(count, SbdfReader::read_long)?
194 .into_boxed_slice(),
195 )),
196 (ValueType::TimeSpan, _) => Object::TimeSpanArray(TimeSpanArray(
197 self.read_multiple(count, SbdfReader::read_long)?
198 .into_boxed_slice(),
199 )),
200 (ValueType::String, _) => {
201 let mut result = Vec::with_capacity(count);
202
203 if is_packed_array {
204 let _ = self.read_int()?;
206 }
207
208 for _ in 0..count {
209 result.push(self.read_string(is_packed_array)?);
210 }
211
212 Object::StringArray(StringArray(result.into_boxed_slice()))
213 }
214 (ValueType::Binary, _) => {
215 let mut result = Vec::with_capacity(count);
216
217 if is_packed_array {
218 let _ = self.read_int()?;
220 }
221
222 for _ in 0..count {
223 result.push(self.read_bytes(is_packed_array)?.into_boxed_slice());
224 }
225
226 Object::BinaryArray(BinaryArray(result.into_boxed_slice()))
227 }
228 (ValueType::Decimal, _) => Object::DecimalArray(DecimalArray(
229 self.read_multiple(count, SbdfReader::read_decimal)?
230 .into_boxed_slice(),
231 )),
232 })
233 }
234
235 fn read_unpacked_object(&mut self, value_type: ValueType) -> Result<Object, SbdfError> {
236 self.read_object(value_type, 1, false)
237 }
238
239 pub fn read_section_id(&mut self) -> Result<SectionId, SbdfError> {
240 if self.read_byte()? != 0xdfu8 {
241 return Err(SbdfError::MagicNumberMismatch);
242 }
243
244 if self.read_byte()? != 0x5bu8 {
245 return Err(SbdfError::MagicNumberMismatch);
246 }
247
248 self.read_byte().and_then(|value| value.try_into())
249 }
250
251 pub fn expect_section_id(&mut self, expected: SectionId) -> Result<(), SbdfError> {
252 let actual = self.read_section_id()?;
253 if actual != expected {
254 return Err(SbdfError::WrongSectionId { expected, actual });
255 }
256 Ok(())
257 }
258
259 pub fn read_file_header(&mut self) -> Result<FileHeader, SbdfError> {
260 let major_version = self.read_byte()?;
261 let minor_version = self.read_byte()?;
262
263 if major_version != 1 || minor_version != 0 {
264 return Err(SbdfError::UnsupportedVersion {
265 major_version,
266 minor_version,
267 });
268 }
269
270 Ok(FileHeader {
271 major_version,
272 minor_version,
273 })
274 }
275
276 pub fn read_metadata_value(
277 &mut self,
278 value_type: ValueType,
279 ) -> Result<Option<Object>, SbdfError> {
280 match self.read_byte()? {
281 0 => Ok(None),
282 1 => Ok(Some(self.read_unpacked_object(value_type)?)),
283 _ => Err(SbdfError::MetadataValueArrayLengthMustBeZeroOrOne),
284 }
285 }
286
287 fn read_metadata(&mut self) -> Result<Metadata, SbdfError> {
288 let name = self.read_string(false)?;
289 let value_type = self.read_value_type()?;
290 let value = match self.read_metadata_value(value_type)? {
291 Some(value) => value,
292 None => value_type.default_object()?,
293 };
294 let default_value = self.read_metadata_value(value_type)?;
295
296 Ok(Metadata {
297 name,
298 value,
299 default_value,
300 })
301 }
302
303 pub fn read_table_metadata(&mut self) -> Result<TableMetadata, SbdfError> {
304 let table_metadata_count: usize = self
305 .read_int()?
306 .try_into()
307 .map_err(|_| SbdfError::InvalidSize)?;
308
309 let mut table_metadata = Vec::with_capacity(table_metadata_count);
310
311 for _ in 0..table_metadata_count {
312 table_metadata.push(self.read_metadata()?);
313 }
314
315 let column_count = self.read_int()? as usize;
316 let mut columns = Vec::with_capacity(column_count);
317
318 let metadata_count = self.read_int()? as usize;
319 let mut metadata = Vec::with_capacity(metadata_count);
320
321 for _ in 0..metadata_count {
322 let name = self.read_string(false)?;
323 let value_type = self.read_value_type()?;
324 let object = self.read_metadata_value(value_type)?;
325 metadata.push((name, value_type, object));
326 }
327
328 for _ in 0..column_count {
329 let mut maybe_name = None;
330 let mut maybe_type = None;
331
332 let mut column_metadata = Vec::with_capacity(metadata_count.saturating_sub(2));
333
334 for j in 0..metadata_count {
335 let has_metadata = self.read_bool()?;
336 if !has_metadata {
337 continue;
338 }
339
340 let (name, ty, default_value) = &metadata[j];
341 let value = self.read_unpacked_object(*ty)?;
342
343 match name.as_str() {
345 COLUMN_METADATA_NAME => {
346 maybe_name = match value {
347 Object::String(name) => Some(name),
348 _ => return Err(SbdfError::InvalidMetadata),
349 };
350 }
351 COLUMN_METADATA_TYPE => {
352 maybe_type = match value {
353 Object::Binary(ty_raw) => {
354 if ty_raw.len() != 1 {
355 return Err(SbdfError::InvalidMetadata);
356 }
357
358 Some(ty_raw[0].try_into()?)
359 }
360 _ => return Err(SbdfError::InvalidMetadata),
361 }
362 }
363 _ => {
364 column_metadata.push(Metadata {
365 name: name.clone(),
366 value,
367 default_value: default_value.clone(),
368 });
369 }
370 }
371 }
372
373 column_metadata.shrink_to_fit();
374 columns.push(ColumnMetadata {
375 name: maybe_name.ok_or(SbdfError::InvalidMetadata)?,
376 ty: maybe_type.ok_or(SbdfError::InvalidMetadata)?,
377 other: column_metadata.into_boxed_slice(),
378 });
379 }
380
381 Ok(TableMetadata {
382 metadata: table_metadata.into_boxed_slice(),
383 columns: columns.into_boxed_slice(),
384 })
385 }
386
387 fn read_object_packed_array(&mut self, value_type: ValueType) -> Result<Object, SbdfError> {
388 let count = self.read_int()? as usize;
389 self.read_object(value_type, count, true)
390 }
391
392 fn read_value_array(&mut self) -> Result<EncodedValue, SbdfError> {
393 let encoding: ValueArrayEncoding = self.read_byte()?.try_into()?;
394 let value_type = self.read_value_type()?;
395 Ok(match encoding {
396 ValueArrayEncoding::Plain => {
397 let value = self.read_object_packed_array(value_type)?;
398 EncodedValue::Plain(value)
399 }
400 ValueArrayEncoding::RunLength => {
401 let _item_count = self.read_int()?;
402
403 let repetitions = self.read_bytes(false)?;
406
407 let values = self.read_object_packed_array(value_type)?;
408 EncodedValue::RunLength(EncodedRunLength {
409 repetitions: repetitions.into_boxed_slice(),
410 values,
411 })
412 }
413 ValueArrayEncoding::BitArray => {
414 let bit_count = self.read_int()? as usize;
415 let byte_length = bit_count.div_ceil(BITS_PER_BYTE);
417 let mut bytes = vec![0; byte_length];
418 self.cursor
419 .read_exact(&mut bytes)
420 .map_err(|_| SbdfError::InvalidBytes)?;
421
422 EncodedValue::BitArray(EncodedBitArray {
423 bit_count,
424 bytes: bytes.into_boxed_slice(),
425 })
426 }
427 })
428 }
429
430 fn read_properties(&mut self) -> Result<ColumnProperties, SbdfError> {
431 let count = self.read_int()? as usize;
432 let mut properties = Vec::with_capacity(count);
433
434 let mut is_invalid = None;
435 let mut error_code = None;
436 let mut has_replaced_value = None;
437
438 for _ in 0..count {
439 let name = self.read_string(false)?;
440 let values = self.read_value_array()?;
441
442 match (name.as_str(), values) {
444 (PROPERTY_IS_INVALID, EncodedValue::BitArray(bit_array)) => {
445 is_invalid = Some(bit_array);
446 }
447 (PROPERTY_ERROR_CODE, encoded) => {
448 error_code = Some(encoded);
449 }
450 (PROPERTY_HAS_REPLACED_VALUE, EncodedValue::BitArray(bit_array)) => {
451 has_replaced_value = Some(bit_array);
452 }
453 (_, values) => properties.push(Property { name, values }),
454 }
455 }
456
457 Ok(ColumnProperties {
458 is_invalid,
459 error_code,
460 has_replaced_value,
461 other: properties.into_boxed_slice(),
462 })
463 }
464
465 fn read_column_slice(&mut self) -> Result<ColumnSlice, SbdfError> {
466 self.expect_section_id(SectionId::ColumnSlice)?;
467
468 let values = self.read_value_array()?;
469 let properties = self.read_properties()?;
470
471 Ok(ColumnSlice { values, properties })
472 }
473
474 pub fn read_table_slice(
475 &mut self,
476 table_metadata: &TableMetadata,
477 ) -> Result<TableSlice, SbdfError> {
478 let column_count = self.read_int()? as usize;
479
480 if table_metadata.columns.len() != column_count {
481 return Err(SbdfError::ColumnCountMismatch);
482 }
483
484 let mut column_slices = Vec::with_capacity(column_count);
485
486 for _ in 0..column_count {
487 column_slices.push(self.read_column_slice()?);
488 }
489
490 Ok(TableSlice {
491 column_slices: column_slices.into_boxed_slice(),
492 })
493 }
494}
495
496#[cfg(test)]
497mod tests {
498 use super::*;
499
500 #[test]
501 fn read_byte() {
502 let buffer = [0x12, 0x34];
503 let mut reader = SbdfReader::new(&buffer);
504 assert_eq!(reader.read_byte().unwrap(), 0x12);
505 assert_eq!(reader.read_byte().unwrap(), 0x34);
506 }
507
508 #[test]
509 fn read_7bit_packed_int() {
510 let buffer = [0x80, 0x08, 0x01, 0];
511 let mut reader = SbdfReader::new(&buffer);
512 assert_eq!(reader.read_7bit_packed_int().unwrap(), 1024);
513 assert_eq!(reader.read_7bit_packed_int().unwrap(), 1);
514 assert_eq!(reader.read_7bit_packed_int().unwrap(), 0);
515 }
516
517 #[test]
518 fn read_int() {
519 let buffer = [0x0, 0x4, 0x0, 0x0];
520 let mut reader = SbdfReader::new(&buffer);
521 assert_eq!(reader.read_int().unwrap(), 1024);
522 }
523
524 #[test]
525 fn read_long() {
526 let buffer = [0x0, 0x4, 0x0, 0x0, 0x0, 0x4, 0x0, 0x0];
527 let mut reader = SbdfReader::new(&buffer);
528 assert_eq!(reader.read_long().unwrap(), 1024 | 1024 << 32);
529 }
530
531 #[test]
532 fn read_float() {
533 let buffer = 123.456f32.to_le_bytes();
534 let mut reader = SbdfReader::new(&buffer);
535 assert_eq!(reader.read_float().unwrap(), 123.456);
536 }
537
538 #[test]
539 fn read_double() {
540 let buffer = 123.456f64.to_le_bytes();
541 let mut reader = SbdfReader::new(&buffer);
542 assert_eq!(reader.read_double().unwrap(), 123.456);
543 }
544
545 #[test]
546 fn read_string_unpacked() {
547 let mut buffer = Vec::new();
548 let text = b"Hello, world!";
549 let length = (text.len() as i32).to_le_bytes();
550 buffer.extend_from_slice(&length);
551 buffer.extend_from_slice(text);
552 let mut reader = SbdfReader::new(&buffer);
553 assert_eq!(reader.read_string(false).unwrap(), "Hello, world!");
554 }
555
556 #[test]
557 fn read_string_packed() {
558 let mut buffer = Vec::new();
559 let text = b"Hello, world!";
560 let length = text.len() as u8;
562 buffer.push(length);
563 buffer.extend_from_slice(text);
564 let mut reader = SbdfReader::new(&buffer);
565 assert_eq!(reader.read_string(true).unwrap(), "Hello, world!");
566 }
567
568 #[test]
569 fn read_bool() {
570 let buffer = [0, 1];
571 let mut reader = SbdfReader::new(&buffer);
572 assert_eq!(reader.read_bool().unwrap(), false);
573 assert_eq!(reader.read_bool().unwrap(), true);
574 }
575
576 #[test]
577 fn read_bytes_unpacked() {
578 let mut buffer = Vec::new();
579 let text = b"Hello, world!";
580 let length = (text.len() as i32).to_le_bytes();
581 buffer.extend_from_slice(&length);
582 buffer.extend_from_slice(text);
583 let mut reader = SbdfReader::new(&buffer);
584 assert_eq!(reader.read_bytes(false).unwrap(), b"Hello, world!");
585 }
586
587 #[test]
588 fn read_bytes_packed() {
589 let mut buffer = Vec::new();
590 let text = b"Hello, world!";
591 let length = text.len() as u8;
593 buffer.push(length);
594 buffer.extend_from_slice(text);
595 let mut reader = SbdfReader::new(&buffer);
596 assert_eq!(reader.read_bytes(true).unwrap(), b"Hello, world!");
597 }
598
599 #[test]
600 fn read_decimal() {
601 let buffer = [1; 16];
602 let mut reader = SbdfReader::new(&buffer);
603 assert_eq!(reader.read_decimal().unwrap(), buffer);
604 }
605
606 #[test]
607 fn read_value_type() {
608 let buffer = [ValueType::TimeSpan as u8, ValueType::String as u8];
609 let mut reader = SbdfReader::new(&buffer);
610 assert_eq!(reader.read_value_type().unwrap(), ValueType::TimeSpan);
611 assert_eq!(reader.read_value_type().unwrap(), ValueType::String);
612 }
613
614 #[test]
615 fn read_section_id() {
616 let buffer = [0xdf, 0x5b, 0x2];
617 let mut reader = SbdfReader::new(&buffer);
618 assert_eq!(reader.read_section_id().unwrap(), SectionId::TableMetadata);
619 }
620
621 #[test]
622 fn read_file_header() {
623 let buffer = [0x1, 0x0];
624 let mut reader = SbdfReader::new(&buffer);
625 assert_eq!(
626 reader.read_file_header().unwrap(),
627 FileHeader {
628 major_version: 1,
629 minor_version: 0
630 }
631 );
632 }
633}