orcxx 0.5.0

Rust bindings for the official C++ library for Apache ORC
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
// Copyright (C) 2023 The Software Heritage developers
// See the AUTHORS file at the top-level directory of this distribution
// License: GNU General Public License version 3, or any later version
// See top-level LICENSE file for more information

//! Helpers for the `orcxx_derive` crate.

#![allow(clippy::redundant_closure_call)]

use rust_decimal::Decimal;
use thiserror::Error;

use std::convert::TryInto;
use std::iter::Map;
use std::num::TryFromIntError;
use std::slice::IterMut;
use std::str::Utf8Error;

use errors::OrcError;
use kind::Kind;
use vector::{BorrowedColumnVectorBatch, ColumnVectorBatch, DecimalVectorBatch, StructVectorBatch};

/// Error returned when failing to read a particular batch of data
#[derive(Debug, Error, PartialEq)]
pub enum DeserializationError {
    /// Expected to parse a structure from the ORC file, but the given column is of
    /// an incompatible type. Contains the ORC exception whiched occured when casting.
    #[error("Mismatched ORC column type: {0}")]
    MismatchedColumnKind(OrcError),
    /// The structure has a field which was not selected when reading the ORC file (or
    /// is missing from the file).
    /// Contains the name of the field.
    #[error("Field {0} is missing from ORC file")]
    MissingField(String),
    /// u64 could not be converted to usize. Contains the original error
    #[error("Number of items exceeds maximum buffer capacity on this platform: {0}")]
    UsizeOverflow(TryFromIntError),
    /// Failed to decode a [`String`] (use [`Vec<u8>`](`Vec`) instead for columns of
    /// `binary` type).
    #[error("Failed to decode ORC byte string as UTF-8: {0}")]
    Utf8Error(Utf8Error),
    /// [`read_from_vector_batch`](OrcDeserialize::read_from_vector_batch) or
    /// [`from_vector_batch`](OrcDeserialize::from_vector_batch) orwas called
    /// as a method on a non-`Option` type, with a column containing nulls as parameter.
    ///
    /// Contains a human-readable error.
    #[error("Unexpected null value in ORC file: {0}")]
    UnexpectedNull(String),
    /// [`read_from_vector_batch`](OrcDeserialize::read_from_vector_batch) was given
    /// a `src` column batch longer than its a `dst` vector.
    #[error("Tried to deserialize {src}-long buffer into {dst}-long buffer")]
    MismatchedLength { src: u64, dst: u64 },
}

fn check_kind_equals(
    got_kind: &Kind,
    expected_kinds: &[Kind],
    type_name: &str,
) -> Result<(), String> {
    if expected_kinds.contains(got_kind) {
        Ok(())
    } else {
        Err(format!(
            "{} must be decoded from ORC {}, not ORC {:?}",
            type_name,
            expected_kinds
                .iter()
                .map(|k| format!("{:?}", k))
                .collect::<Vec<_>>()
                .join("/"),
            got_kind
        ))
    }
}

/// Types which provide a static `check_kind` method to ensure ORC files can be
/// deserialized into them.
pub trait CheckableKind {
    /// Returns whether the type can be deserialized from [`RowReader`](::reader::RowReader)
    /// instances with this [selected_kind](::reader::RowReader::selected_kind).
    ///
    /// This should be called before any method provided by [`OrcDeserialize`],
    /// to get errors early and with a human-readable error message instead of cast errors
    /// or deserialization into incorrect types (eg. if a file has two fields swapped).
    fn check_kind(kind: &Kind) -> Result<(), String>;
}

// Needed because most structs are going to have Option as fields, and code generated by
// orcxx_derive needs to call check_kind on them recursively.
// This avoid needing to dig into the AST to extract the inner type of the Option.
impl<T: CheckableKind> CheckableKind for Option<T> {
    fn check_kind(kind: &Kind) -> Result<(), String> {
        T::check_kind(kind)
    }
}

/// Types which provide a static `columns` method, which returns the names of all
/// ORC columns the struct expects to read from.
///
/// Nested field names are separated by dots.
///
/// For scalars, this method simply returns the prefix.
pub trait OrcStruct {
    fn columns() -> Vec<String> {
        Self::columns_with_prefix("")
    }

    fn columns_with_prefix(prefix: &str) -> Vec<String>;
}

impl<T: OrcStruct> OrcStruct for Option<T> {
    fn columns_with_prefix(prefix: &str) -> Vec<String> {
        T::columns_with_prefix(prefix)
    }
}

/// Types which can be read in batch from ORC columns ([`BorrowedColumnVectorBatch`]).
pub trait OrcDeserialize: Sized + Default + CheckableKind {
    /// Reads from a [`BorrowedColumnVectorBatch`] to a structure that behaves like
    /// a rewindable iterator of `&mut Self`, and returns the number of rows written.
    ///
    /// If the number of rows written is strictly smaller than `dst`'s size, then
    /// **elements at the end of the `dst` are left unchanged**.
    ///
    /// Users should call
    /// [`check_kind(row_reader.selected_kind()).unwrap()`](CheckableKind::check_kind)
    /// before calling this function on batches produces by a `row_reader`.
    fn read_from_vector_batch<'a, 'b, T>(
        src: &BorrowedColumnVectorBatch,
        dst: &'b mut T,
    ) -> Result<usize, DeserializationError>
    where
        Self: 'a,
        &'b mut T: DeserializationTarget<'a, Item = Self> + 'b;

    /// Reads from a [`BorrowedColumnVectorBatch`] and returns a `Vec<Option<Self>>`
    ///
    /// Users should call
    /// [`check_kind(row_reader.selected_kind()).unwrap()`](CheckableKind::check_kind)
    /// before calling this function on batches produces by a `row_reader`.
    ///
    /// This is a wrapper for
    /// [`read_from_vector_batch`](OrcDeserialize::read_from_vector_batch)
    /// which takes care of allocating a buffer, and returns it.
    fn from_vector_batch(
        vector_batch: &BorrowedColumnVectorBatch,
    ) -> Result<Vec<Self>, DeserializationError> {
        let num_elements = vector_batch.num_elements();
        let num_elements = num_elements
            .try_into()
            .map_err(DeserializationError::UsizeOverflow)?;
        let mut values = Vec::with_capacity(num_elements);
        values.resize_with(num_elements, Default::default);
        Self::read_from_vector_batch(vector_batch, &mut values)?;
        Ok(values)
    }
}

macro_rules! impl_scalar {
    ($ty:ty, $kind:expr, $method:ident) => {
        impl_scalar!($ty, $kind, $method, |s| Ok(s as $ty));
    };
    ($ty:ty, $kind:expr, $method:ident, $cast:expr) => {
        impl OrcStruct for $ty {
            fn columns_with_prefix(prefix: &str) -> Vec<String> {
                vec![prefix.to_string()]
            }
        }

        impl CheckableKind for $ty {
            fn check_kind(kind: &Kind) -> Result<(), String> {
                check_kind_equals(kind, &$kind, stringify!($ty))
            }
        }

        impl OrcDeserialize for $ty {
            fn read_from_vector_batch<'a, 'b, T>(
                src: &BorrowedColumnVectorBatch,
                mut dst: &'b mut T,
            ) -> Result<usize, DeserializationError>
            where
                &'b mut T: DeserializationTarget<'a, Item = Self> + 'b,
            {
                let src = src
                    .$method()
                    .map_err(DeserializationError::MismatchedColumnKind)?;
                match src.try_iter_not_null() {
                    None => Err(DeserializationError::UnexpectedNull(format!(
                        "{} column contains nulls",
                        stringify!($ty)
                    ))),
                    Some(it) => {
                        for (s, d) in it.zip(dst.iter_mut()) {
                            *d = ($cast)(s)?
                        }

                        Ok(src.num_elements().try_into().unwrap())
                    }
                }
            }
        }

        impl OrcDeserialize for Option<$ty> {
            fn read_from_vector_batch<'a, 'b, T>(
                src: &BorrowedColumnVectorBatch,
                mut dst: &'b mut T,
            ) -> Result<usize, DeserializationError>
            where
                &'b mut T: DeserializationTarget<'a, Item = Self> + 'b,
            {
                let src = src
                    .$method()
                    .map_err(DeserializationError::MismatchedColumnKind)?;
                for (s, d) in src.iter().zip(dst.iter_mut()) {
                    match s {
                        None => *d = None,
                        Some(s) => *d = Some(($cast)(s)?),
                    }
                }

                Ok(src.num_elements().try_into().unwrap())
            }
        }
    };
}

impl_scalar!(bool, [Kind::Boolean], try_into_longs, |s| Ok(s != 0));
impl_scalar!(i8, [Kind::Byte], try_into_longs);
impl_scalar!(i16, [Kind::Short], try_into_longs);
impl_scalar!(i32, [Kind::Int], try_into_longs);
impl_scalar!(i64, [Kind::Long], try_into_longs);
impl_scalar!(f32, [Kind::Float], try_into_doubles);
impl_scalar!(f64, [Kind::Double], try_into_doubles);
impl_scalar!(String, [Kind::String], try_into_strings, |s| {
    std::str::from_utf8(s)
        .map_err(DeserializationError::Utf8Error)
        .map(|s| s.to_string())
});
impl_scalar!(Vec<u8>, [Kind::Binary], try_into_strings, |s: &[u8]| Ok(
    s.to_vec()
));

impl_scalar!(
    crate::Timestamp,
    [Kind::Timestamp],
    try_into_timestamps,
    |s: (i64, i64)| Ok(crate::Timestamp {
        seconds: s.0,
        nanoseconds: s.1
    })
);

impl OrcStruct for Decimal {
    fn columns_with_prefix(prefix: &str) -> Vec<String> {
        vec![prefix.to_string()]
    }
}

impl CheckableKind for Decimal {
    fn check_kind(kind: &Kind) -> Result<(), String> {
        match kind {
            Kind::Decimal { .. } => Ok(()),
            _ => Err(format!(
                "Decimal must be decoded from ORC Decimal, not ORC {:?}",
                kind
            )),
        }
    }
}

impl OrcDeserialize for Decimal {
    fn read_from_vector_batch<'a, 'b, T>(
        src: &BorrowedColumnVectorBatch,
        mut dst: &'b mut T,
    ) -> Result<usize, DeserializationError>
    where
        &'b mut T: DeserializationTarget<'a, Item = Self> + 'b,
    {
        match src.try_into_decimals64() {
            Ok(src) => match src.try_iter_not_null() {
                None => {
                    return Err(DeserializationError::UnexpectedNull(
                        "Decimal column contains nulls".to_string(),
                    ))
                }
                Some(it) => {
                    for (s, d) in it.zip(dst.iter_mut()) {
                        *d = s;
                    }
                }
            },
            Err(_) => {
                let src = src
                    .try_into_decimals128()
                    .map_err(DeserializationError::MismatchedColumnKind)?;
                match src.try_iter_not_null() {
                    None => {
                        return Err(DeserializationError::UnexpectedNull(
                            "Decimal column contains nulls".to_string(),
                        ))
                    }
                    Some(it) => {
                        for (s, d) in it.zip(dst.iter_mut()) {
                            *d = s;
                        }
                    }
                }
            }
        }

        Ok(src.num_elements().try_into().unwrap())
    }
}

impl OrcDeserialize for Option<Decimal> {
    fn read_from_vector_batch<'a, 'b, T>(
        src: &BorrowedColumnVectorBatch,
        mut dst: &'b mut T,
    ) -> Result<usize, DeserializationError>
    where
        &'b mut T: DeserializationTarget<'a, Item = Self> + 'b,
    {
        match src.try_into_decimals64() {
            Ok(src) => {
                for (s, d) in src.iter().zip(dst.iter_mut()) {
                    match s {
                        None => *d = None,
                        Some(s) => *d = Some(s),
                    }
                }
            }
            Err(_) => {
                let src = src
                    .try_into_decimals128()
                    .map_err(DeserializationError::MismatchedColumnKind)?;
                for (s, d) in src.iter().zip(dst.iter_mut()) {
                    match s {
                        None => *d = None,
                        Some(s) => *d = Some(s),
                    }
                }
            }
        }

        Ok(src.num_elements().try_into().unwrap())
    }
}

impl<T: OrcStruct> OrcStruct for Vec<T> {
    fn columns_with_prefix(prefix: &str) -> Vec<String> {
        T::columns_with_prefix(prefix)
    }
}

impl<T: CheckableKind> CheckableKind for Vec<T> {
    fn check_kind(kind: &Kind) -> Result<(), String> {
        match kind {
            Kind::List(inner) => T::check_kind(inner),
            _ => Err(format!("Must be a List, not {:?}", kind)),
        }
    }
}

/// Shared initialization code of `impl<I> OrcDeserializeOption for Vec<I>`
/// and impl<I> OrcDeserialize for Vec<I>
macro_rules! init_list_read {
    ($src:expr, $dst: expr) => {{
        let src = $src
            .try_into_lists()
            .map_err(DeserializationError::MismatchedColumnKind)?;

        let num_lists: usize = src
            .num_elements()
            .try_into()
            .map_err(DeserializationError::UsizeOverflow)?;
        let num_elements: usize = src
            .elements()
            .num_elements()
            .try_into()
            .map_err(DeserializationError::UsizeOverflow)?;

        if num_lists > $dst.len() {
            return Err(DeserializationError::MismatchedLength {
                src: num_lists as u64,
                dst: $dst.len() as u64,
            });
        }

        // Deserialize the inner elements recursively into this temporary buffer.
        // TODO: write them directly to the final location to avoid a copy
        let mut elements = Vec::new();
        elements.resize_with(num_elements, Default::default);
        OrcDeserialize::read_from_vector_batch::<Vec<I>>(&src.elements(), &mut elements)?;

        let elements = elements.into_iter();

        (src, elements)
    }};
}

/// Shared loop code of `impl<I> OrcDeserializeOption for Vec<I>`
/// and impl<I> OrcDeserialize for Vec<I>
macro_rules! build_list_item {
    ($range:expr, $last_offset:expr, $elements:expr) => {{
        let range = $range;
        assert_eq!(
            range.start, $last_offset,
            "Non-continuous list (jumped from offset {} to {}",
            $last_offset, range.start
        );
        // Safe because offset is bounded by num_elements;
        let mut array: Vec<I> = Vec::with_capacity((range.end - range.start) as usize);
        for _ in range.clone() {
            match $elements.next() {
                Some(item) => {
                    array.push(item);
                }
                None => panic!(
                    "List too short (expected {} elements, got {})",
                    range.end - range.start,
                    array.len()
                ),
            }
        }
        $last_offset = range.end;
        array
    }};
}

/// Deserialization of ORC lists with nullable values
///
/// cannot do `impl<I> OrcDeserialize for Option<Vec<Option<I>>>` because it causes
/// infinite recursion in the type-checker due to this other implementation being
/// available: `impl<I: OrcDeserializeOption> OrcDeserialize for Option<I>`.
impl<I> OrcDeserializeOption for Vec<I>
where
    I: Default + OrcDeserialize,
{
    fn read_options_from_vector_batch<'a, 'b, T>(
        src: &BorrowedColumnVectorBatch,
        mut dst: &'b mut T,
    ) -> Result<usize, DeserializationError>
    where
        &'b mut T: DeserializationTarget<'a, Item = Option<Self>> + 'b,
    {
        let (src, mut elements) = init_list_read!(src, dst);
        let offsets = src.iter_offsets();
        let mut dst = dst.iter_mut();

        let mut last_offset = 0;

        for offset in offsets {
            // Safe because we checked dst.len() == num_elements, and num_elements
            // is also the size of offsets
            let dst_item: &mut Option<Vec<I>> = unsafe { dst.next().unwrap_unchecked() };
            match offset {
                None => *dst_item = None,
                Some(range) => {
                    *dst_item = Some(build_list_item!(range, last_offset, elements));
                }
            }
        }
        if elements.next().is_some() {
            panic!("List too long");
        }

        Ok(src.num_elements().try_into().unwrap())
    }
}

/// Deserialization of ORC lists without nullable values
impl<I> OrcDeserialize for Vec<I>
where
    I: OrcDeserialize,
{
    fn read_from_vector_batch<'a, 'b, T>(
        src: &BorrowedColumnVectorBatch,
        mut dst: &'b mut T,
    ) -> Result<usize, DeserializationError>
    where
        &'b mut T: DeserializationTarget<'a, Item = Self> + 'b,
    {
        let (src, mut elements) = init_list_read!(src, dst);
        match src.try_iter_offsets_not_null() {
            None => Err(DeserializationError::UnexpectedNull(format!(
                "{} column contains nulls",
                stringify!($ty)
            ))),
            Some(offsets) => {
                let mut dst = dst.iter_mut();

                let mut last_offset = 0;

                for range in offsets {
                    // Safe because we checked dst.len() == num_elements, and num_elements
                    // is also the size of offsets
                    let dst_item: &mut Vec<I> = unsafe { dst.next().unwrap_unchecked() };

                    *dst_item = build_list_item!(range, last_offset, elements);
                }
                if elements.next().is_some() {
                    panic!("List too long");
                }

                Ok(src.num_elements().try_into().unwrap())
            }
        }
    }
}

/// The trait of things that can have ORC data written to them.
///
/// It must be (mutably) iterable, exact-size, and iterable multiple times (one for
/// each column it contains).
///
/// # Safety
///
/// Implementations returning `len()` values larger than the
/// actual length of the iterator returned by `iter_mut()` would lead to
/// undefined behavior (values yielded by the iterator are unwrapped unsafely,
/// for performance).
pub unsafe trait DeserializationTarget<'a> {
    type Item: 'a;
    type IterMut<'b>: Iterator<Item = &'b mut Self::Item>
    where
        Self: 'b,
        'a: 'b;

    fn len(&self) -> usize;
    fn iter_mut(&mut self) -> Self::IterMut<'_>;

    fn is_empty(&self) -> bool {
        self.len() == 0
    }

    fn map<B, F>(&mut self, f: F) -> MultiMap<Self, F>
    where
        Self: Sized,
        F: FnMut(&mut Self::Item) -> &mut B,
    {
        MultiMap { iter: self, f }
    }
}

unsafe impl<'a, V: Sized + 'a> DeserializationTarget<'a> for &mut Vec<V> {
    type Item = V;
    type IterMut<'b> = IterMut<'b, V> where V: 'b, 'a: 'b, Self: 'b;

    fn len(&self) -> usize {
        (self as &Vec<_>).len()
    }

    fn iter_mut(&mut self) -> IterMut<'_, V> {
        <[_]>::iter_mut(self)
    }
}

/// A map that can be iterated multiple times
pub struct MultiMap<'c, T: Sized, F> {
    iter: &'c mut T,
    f: F,
}

unsafe impl<'a, 'c, V: Sized + 'a, V2: Sized + 'a, T, F> DeserializationTarget<'a>
    for &mut MultiMap<'c, T, F>
where
    F: Copy + for<'b> FnMut(&'b mut V) -> &'b mut V2,
    T: DeserializationTarget<'a, Item = V>,
{
    type Item = V2;
    type IterMut<'b> = Map<T::IterMut<'b>, F> where T: 'b, 'a: 'b, F: 'b, Self: 'b;

    fn len(&self) -> usize {
        self.iter.len()
    }

    fn iter_mut(&mut self) -> Map<T::IterMut<'_>, F> {
        self.iter.iter_mut().map(self.f)
    }
}

/// Given a [`StructVectorBatch`], returns a vector of structures initialized with
/// [`Default`] for ever not-null value in the [`StructVectorBatch`], and `None` for
/// null values.
pub fn default_option_vec<T: Default>(vector_batch: &StructVectorBatch) -> Vec<Option<T>> {
    match vector_batch.not_null() {
        None => (0..vector_batch.num_elements())
            .map(|_| Some(Default::default()))
            .collect(),
        Some(not_null) => not_null
            .iter()
            .map(|&b| {
                if b == 0 {
                    None
                } else {
                    Some(Default::default())
                }
            })
            .collect(),
    }
}

/// Internal trait to allow implementing OrcDeserialize on `Option<T>` where `T` is
/// a structure defined in other crates
pub trait OrcDeserializeOption: Sized + CheckableKind {
    /// Reads from a [`BorrowedColumnVectorBatch`] to a structure that behaves like
    /// a rewindable iterator of `&mut Option<Self>`.
    fn read_options_from_vector_batch<'a, 'b, T>(
        src: &BorrowedColumnVectorBatch,
        dst: &'b mut T,
    ) -> Result<usize, DeserializationError>
    where
        Self: 'a,
        &'b mut T: DeserializationTarget<'a, Item = Option<Self>> + 'b;
}

impl<I: OrcDeserializeOption> OrcDeserialize for Option<I> {
    fn read_from_vector_batch<'a, 'b, T>(
        src: &BorrowedColumnVectorBatch,
        dst: &'b mut T,
    ) -> Result<usize, DeserializationError>
    where
        &'b mut T: DeserializationTarget<'a, Item = Self> + 'b,
        I: 'a,
    {
        I::read_options_from_vector_batch(src, dst)
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use kind::Kind;
    use vector::BorrowedColumnVectorBatch;

    #[test]
    fn test_map_struct() {
        // TODO: for now this test only makes sure the code compiles, but it should
        // actually run it eventually.
        #[derive(Default)]
        struct Test {
            field1: Option<i64>,
        }

        impl CheckableKind for Test {
            fn check_kind(kind: &Kind) -> Result<(), String> {
                check_kind_equals(
                    kind,
                    &[Kind::Struct(vec![("field1".to_owned(), Kind::Long)])],
                    "Vec<u8>",
                )
            }
        }

        impl OrcDeserialize for Option<Test> {
            fn read_from_vector_batch<'a, 'b, T>(
                src: &BorrowedColumnVectorBatch,
                mut dst: &'b mut T,
            ) -> Result<usize, DeserializationError>
            where
                &'b mut T: DeserializationTarget<'a, Item = Self>,
            {
                let src = src
                    .try_into_structs()
                    .map_err(DeserializationError::MismatchedColumnKind)?;
                let columns = src.fields();
                let column: BorrowedColumnVectorBatch = columns.into_iter().next().unwrap();
                OrcDeserialize::read_from_vector_batch::<MultiMap<&mut T, _>>(
                    &column,
                    &mut dst.map(|struct_| &mut struct_.as_mut().unwrap().field1),
                )?;

                Ok(src.num_elements().try_into().unwrap())
            }
        }
    }

    #[test]
    fn test_check_kind() {
        assert_eq!(i64::check_kind(&Kind::Long), Ok(()));
        assert_eq!(crate::Timestamp::check_kind(&Kind::Timestamp), Ok(()));
        assert_eq!(String::check_kind(&Kind::String), Ok(()));
        assert_eq!(Vec::<u8>::check_kind(&Kind::Binary), Ok(()));
    }

    #[test]
    fn test_check_kind_fail() {
        assert_eq!(
            i64::check_kind(&Kind::String),
            Err("i64 must be decoded from ORC Long, not ORC String".to_string())
        );
        assert_eq!(
            i64::check_kind(&Kind::Int),
            Err("i64 must be decoded from ORC Long, not ORC Int".to_string())
        );
        assert_eq!(
            String::check_kind(&Kind::Int),
            Err("String must be decoded from ORC String, not ORC Int".to_string())
        );
        assert_eq!(
            String::check_kind(&Kind::Binary),
            Err("String must be decoded from ORC String, not ORC Binary".to_string())
        );
        assert_eq!(
            Vec::<u8>::check_kind(&Kind::Int),
            Err("Vec<u8> must be decoded from ORC Binary, not ORC Int".to_string())
        );
        assert_eq!(
            Vec::<u8>::check_kind(&Kind::String),
            Err("Vec<u8> must be decoded from ORC Binary, not ORC String".to_string())
        );
    }
}