Crate orcxx_derive

source ·
Expand description

Custom derive for the orcxx crate, to deserialize structs using Apache ORC C++ library.

§Supported types

Structures can have fields of the following types:

§About null values

In order to support all ORC files, every single type should be wrapped in Option (eg. struct<a:int, b:list<string>> in ORC should be a: Option<i32>, b: Option<Vec<Option<String>>>), but this is cumbersome, and may have high overhead if you need to check it.

If you omit Option, then orcxx_derive will return an error early for files containing null values, and avoid this overhead for files which don’t.

§Panics

See orcxx’s documentation.

§Examples

extern crate orcxx;
extern crate orcxx_derive;

use std::num::NonZeroU64;

use orcxx::deserialize::{OrcDeserialize, OrcStruct};
use orcxx::row_iterator::RowIterator;
use orcxx::reader;
use orcxx_derive::OrcDeserialize;

// Define structure
#[derive(OrcDeserialize, Clone, Default, Debug, PartialEq, Eq)]
struct Test1 {
    long1: Option<i64>,
}

// Open file
let orc_path = "../orcxx/orc/examples/TestOrcFile.test1.orc";
let input_stream = reader::InputStream::from_local_file(orc_path).expect("Could not open .orc");
let reader = reader::Reader::new(input_stream).expect("Could not read .orc");

let batch_size = NonZeroU64::new(1024).unwrap();
let mut rows: Vec<Option<Test1>> = RowIterator::new(&reader, batch_size)
    .expect("Could not open ORC file")
    .collect();

assert_eq!(
    rows,
    vec![
        Some(Test1 {
            long1: Some(9223372036854775807)
        }),
        Some(Test1 {
            long1: Some(9223372036854775807)
        })
    ]
);

Or equivalently, to avoid cloning structures:

extern crate orcxx;
extern crate orcxx_derive;

use orcxx::deserialize::{CheckableKind, OrcDeserialize, OrcStruct};
use orcxx::reader;
use orcxx_derive::OrcDeserialize;

// Define structure
#[derive(OrcDeserialize, Default, Debug, PartialEq, Eq)]
struct Test1 {
    long1: Option<i64>,
}

// Open file
let orc_path = "../orcxx/orc/examples/TestOrcFile.test1.orc";
let input_stream = reader::InputStream::from_local_file(orc_path).expect("Could not open .orc");
let reader = reader::Reader::new(input_stream).expect("Could not read .orc");

// Only read columns we need
let options = reader::RowReaderOptions::default().include_names(Test1::columns());

let mut row_reader = reader.row_reader(&options).expect("Could not open ORC file");
Test1::check_kind(&row_reader.selected_kind()).expect("Unexpected schema");

let mut rows: Vec<Option<Test1>> = Vec::new();

// Allocate work buffer
let mut batch = row_reader.row_batch(1024);

// Read structs until the end
while row_reader.read_into(&mut batch) {
    let new_rows = Option::<Test1>::from_vector_batch(&batch.borrow()).unwrap();
    rows.extend(new_rows);
}

assert_eq!(
    rows,
    vec![
        Some(Test1 {
            long1: Some(9223372036854775807)
        }),
        Some(Test1 {
            long1: Some(9223372036854775807)
        })
    ]
);

It is also possible to nest structures:

extern crate orcxx;
extern crate orcxx_derive;

use orcxx_derive::OrcDeserialize;

#[derive(OrcDeserialize, Default, Debug, PartialEq)]
struct Test1Option {
    boolean1: Option<bool>,
    byte1: Option<i8>,
    short1: Option<i16>,
    int1: Option<i32>,
    long1: Option<i64>,
    float1: Option<f32>,
    double1: Option<f64>,
    bytes1: Option<Vec<u8>>,
    string1: Option<String>,
    list: Option<Vec<Option<Test1ItemOption>>>,
}

#[derive(OrcDeserialize, Default, Debug, PartialEq)]
struct Test1ItemOption {
    int1: Option<i32>,
    string1: Option<String>,
}

Derive Macros§