Struct Decoder

Source
pub struct Decoder { /* private fields */ }
Expand description

A low-level interface for reading JSON data from a byte stream

See Reader for a higher-level interface for interface with BufRead

The push-based interface facilitates integration with sources that yield arbitrarily delimited bytes ranges, such as BufRead, or a chunked byte stream received from object storage

fn read_from_json<R: BufRead>(
    mut reader: R,
    schema: SchemaRef,
) -> Result<impl Iterator<Item = Result<RecordBatch, ArrowError>>, ArrowError> {
    let mut decoder = ReaderBuilder::new(schema).build_decoder()?;
    let mut next = move || {
        loop {
            // Decoder is agnostic that buf doesn't contain whole records
            let buf = reader.fill_buf()?;
            if buf.is_empty() {
                break; // Input exhausted
            }
            let read = buf.len();
            let decoded = decoder.decode(buf)?;

            // Consume the number of bytes read
            reader.consume(decoded);
            if decoded != read {
                break; // Read batch size
            }
        }
        decoder.flush()
    };
    Ok(std::iter::from_fn(move || next().transpose()))
}

Implementations§

Source§

impl Decoder

Source

pub fn decode(&mut self, buf: &[u8]) -> Result<usize, ArrowError>

Read JSON objects from buf, returning the number of bytes read

This method returns once batch_size objects have been parsed since the last call to Self::flush, or buf is exhausted. Any remaining bytes should be included in the next call to Self::decode

There is no requirement that buf contains a whole number of records, facilitating integration with arbitrary byte streams, such as those yielded by BufRead

Source

pub fn serialize<S>(&mut self, rows: &[S]) -> Result<(), ArrowError>
where S: Serialize,

Serialize rows to this Decoder

This provides a simple way to convert serde-compatible datastructures into arrow RecordBatch.

Custom conversion logic as described in arrow_array::builder will likely outperform this, especially where the schema is known at compile-time, however, this provides a mechanism to get something up and running quickly

It can be used with serde_json::Value

let json = vec![json!({"float": 2.3}), json!({"float": 5.7})];

let schema = Schema::new(vec![Field::new("float", DataType::Float32, true)]);
let mut decoder = ReaderBuilder::new(Arc::new(schema)).build_decoder().unwrap();

decoder.serialize(&json).unwrap();
let batch = decoder.flush().unwrap().unwrap();
assert_eq!(batch.num_rows(), 2);
assert_eq!(batch.num_columns(), 1);
let values = batch.column(0).as_primitive::<Float32Type>().values();
assert_eq!(values, &[2.3, 5.7])

Or with arbitrary Serialize types

#[derive(Serialize)]
struct MyStruct {
    int32: i32,
    float: f32,
}

let schema = Schema::new(vec![
    Field::new("int32", DataType::Int32, false),
    Field::new("float", DataType::Float32, false),
]);

let rows = vec![
    MyStruct{ int32: 0, float: 3. },
    MyStruct{ int32: 4, float: 67.53 },
];

let mut decoder = ReaderBuilder::new(Arc::new(schema)).build_decoder().unwrap();
decoder.serialize(&rows).unwrap();

let batch = decoder.flush().unwrap().unwrap();

// Expect batch containing two columns
let int32 = batch.column(0).as_primitive::<Int32Type>();
assert_eq!(int32.values(), &[0, 4]);

let float = batch.column(1).as_primitive::<Float32Type>();
assert_eq!(float.values(), &[3., 67.53]);

Or even complex nested types

#[derive(Serialize)]
struct MyStruct {
    int32: i32,
    list: Vec<f64>,
    nested: Vec<Option<Nested>>,
}

impl MyStruct {
    /// Returns the [`Fields`] for [`MyStruct`]
    fn fields() -> Fields {
        let nested = DataType::Struct(Nested::fields());
        Fields::from([
            Arc::new(Field::new("int32", DataType::Int32, false)),
            Arc::new(Field::new_list(
                "list",
                Field::new("element", DataType::Float64, false),
                false,
            )),
            Arc::new(Field::new_list(
                "nested",
                Field::new("element", nested, true),
                true,
            )),
        ])
    }
}

#[derive(Serialize)]
struct Nested {
    map: BTreeMap<String, Vec<String>>
}

impl Nested {
    /// Returns the [`Fields`] for [`Nested`]
    fn fields() -> Fields {
        let element = Field::new("element", DataType::Utf8, false);
        Fields::from([
            Arc::new(Field::new_map(
                "map",
                "entries",
                Field::new("key", DataType::Utf8, false),
                Field::new_list("value", element, false),
                false, // sorted
                false, // nullable
            ))
        ])
    }
}

let data = vec![
    MyStruct {
        int32: 34,
        list: vec![1., 2., 34.],
        nested: vec![
            None,
            Some(Nested {
                map: vec![
                    ("key1".to_string(), vec!["foo".to_string(), "bar".to_string()]),
                    ("key2".to_string(), vec!["baz".to_string()])
                ].into_iter().collect()
            })
        ]
    },
    MyStruct {
        int32: 56,
        list: vec![],
        nested: vec![]
    },
    MyStruct {
        int32: 24,
        list: vec![-1., 245.],
        nested: vec![None]
    }
];

let schema = Schema::new(MyStruct::fields());
let mut decoder = ReaderBuilder::new(Arc::new(schema)).build_decoder().unwrap();
decoder.serialize(&data).unwrap();
let batch = decoder.flush().unwrap().unwrap();
assert_eq!(batch.num_rows(), 3);
assert_eq!(batch.num_columns(), 3);

// Convert to StructArray to format
let s = StructArray::from(batch);
let options = FormatOptions::default().with_null("null");
let formatter = ArrayFormatter::try_new(&s, &options).unwrap();

assert_eq!(&formatter.value(0).to_string(), "{int32: 34, list: [1.0, 2.0, 34.0], nested: [null, {map: {key1: [foo, bar], key2: [baz]}}]}");
assert_eq!(&formatter.value(1).to_string(), "{int32: 56, list: [], nested: []}");
assert_eq!(&formatter.value(2).to_string(), "{int32: 24, list: [-1.0, 245.0], nested: [null]}");

Note: this ignores any batch size setting, and always decodes all rows

Source

pub fn has_partial_record(&self) -> bool

True if the decoder is currently part way through decoding a record.

Source

pub fn len(&self) -> usize

The number of unflushed records, including the partially decoded record (if any).

Source

pub fn is_empty(&self) -> bool

True if there are no records to flush, i.e. Self::len is zero.

Source

pub fn flush(&mut self) -> Result<Option<RecordBatch>, ArrowError>

Flushes the currently buffered data to a RecordBatch

Returns Ok(None) if no buffered data, i.e. Self::is_empty is true.

Note: This will return an error if called part way through decoding a record, i.e. Self::has_partial_record is true.

Trait Implementations§

Source§

impl Debug for Decoder

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>

Formats the value using the given formatter. Read more

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> AlignerFor<1> for T

Source§

type Aligner = AlignTo1<T>

The AlignTo* type which aligns Self to ALIGNMENT.
Source§

impl<T> AlignerFor<1024> for T

Source§

type Aligner = AlignTo1024<T>

The AlignTo* type which aligns Self to ALIGNMENT.
Source§

impl<T> AlignerFor<128> for T

Source§

type Aligner = AlignTo128<T>

The AlignTo* type which aligns Self to ALIGNMENT.
Source§

impl<T> AlignerFor<16> for T

Source§

type Aligner = AlignTo16<T>

The AlignTo* type which aligns Self to ALIGNMENT.
Source§

impl<T> AlignerFor<16384> for T

Source§

type Aligner = AlignTo16384<T>

The AlignTo* type which aligns Self to ALIGNMENT.
Source§

impl<T> AlignerFor<2> for T

Source§

type Aligner = AlignTo2<T>

The AlignTo* type which aligns Self to ALIGNMENT.
Source§

impl<T> AlignerFor<2048> for T

Source§

type Aligner = AlignTo2048<T>

The AlignTo* type which aligns Self to ALIGNMENT.
Source§

impl<T> AlignerFor<256> for T

Source§

type Aligner = AlignTo256<T>

The AlignTo* type which aligns Self to ALIGNMENT.
Source§

impl<T> AlignerFor<32> for T

Source§

type Aligner = AlignTo32<T>

The AlignTo* type which aligns Self to ALIGNMENT.
Source§

impl<T> AlignerFor<32768> for T

Source§

type Aligner = AlignTo32768<T>

The AlignTo* type which aligns Self to ALIGNMENT.
Source§

impl<T> AlignerFor<4> for T

Source§

type Aligner = AlignTo4<T>

The AlignTo* type which aligns Self to ALIGNMENT.
Source§

impl<T> AlignerFor<4096> for T

Source§

type Aligner = AlignTo4096<T>

The AlignTo* type which aligns Self to ALIGNMENT.
Source§

impl<T> AlignerFor<512> for T

Source§

type Aligner = AlignTo512<T>

The AlignTo* type which aligns Self to ALIGNMENT.
Source§

impl<T> AlignerFor<64> for T

Source§

type Aligner = AlignTo64<T>

The AlignTo* type which aligns Self to ALIGNMENT.
Source§

impl<T> AlignerFor<8> for T

Source§

type Aligner = AlignTo8<T>

The AlignTo* type which aligns Self to ALIGNMENT.
Source§

impl<T> AlignerFor<8192> for T

Source§

type Aligner = AlignTo8192<T>

The AlignTo* type which aligns Self to ALIGNMENT.
Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T> Instrument for T

Source§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more
Source§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> IntoEither for T

Source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

impl<S> ROExtAcc for S

Source§

fn f_get<F>(&self, offset: FieldOffset<S, F, Aligned>) -> &F

Gets a reference to a field, determined by offset. Read more
Source§

fn f_get_mut<F>(&mut self, offset: FieldOffset<S, F, Aligned>) -> &mut F

Gets a muatble reference to a field, determined by offset. Read more
Source§

fn f_get_ptr<F, A>(&self, offset: FieldOffset<S, F, A>) -> *const F

Gets a const pointer to a field, the field is determined by offset. Read more
Source§

fn f_get_mut_ptr<F, A>(&mut self, offset: FieldOffset<S, F, A>) -> *mut F

Gets a mutable pointer to a field, determined by offset. Read more
Source§

impl<S> ROExtOps<Aligned> for S

Source§

fn f_replace<F>(&mut self, offset: FieldOffset<S, F, Aligned>, value: F) -> F

Replaces a field (determined by offset) with value, returning the previous value of the field. Read more
Source§

fn f_swap<F>(&mut self, offset: FieldOffset<S, F, Aligned>, right: &mut S)

Swaps a field (determined by offset) with the same field in right. Read more
Source§

fn f_get_copy<F>(&self, offset: FieldOffset<S, F, Aligned>) -> F
where F: Copy,

Gets a copy of a field (determined by offset). The field is determined by offset. Read more
Source§

impl<S> ROExtOps<Unaligned> for S

Source§

fn f_replace<F>(&mut self, offset: FieldOffset<S, F, Unaligned>, value: F) -> F

Replaces a field (determined by offset) with value, returning the previous value of the field. Read more
Source§

fn f_swap<F>(&mut self, offset: FieldOffset<S, F, Unaligned>, right: &mut S)

Swaps a field (determined by offset) with the same field in right. Read more
Source§

fn f_get_copy<F>(&self, offset: FieldOffset<S, F, Unaligned>) -> F
where F: Copy,

Gets a copy of a field (determined by offset). The field is determined by offset. Read more
Source§

impl<T> Same for T

Source§

type Output = T

Should always be Self
Source§

impl<T> SelfOps for T
where T: ?Sized,

Source§

fn eq_id(&self, other: &Self) -> bool

Compares the address of self with the address of other. Read more
Source§

fn piped<F, U>(self, f: F) -> U
where F: FnOnce(Self) -> U, Self: Sized,

Emulates the pipeline operator, allowing method syntax in more places. Read more
Source§

fn piped_ref<'a, F, U>(&'a self, f: F) -> U
where F: FnOnce(&'a Self) -> U,

The same as piped except that the function takes &Self Useful for functions that take &Self instead of Self. Read more
Source§

fn piped_mut<'a, F, U>(&'a mut self, f: F) -> U
where F: FnOnce(&'a mut Self) -> U,

The same as piped, except that the function takes &mut Self. Useful for functions that take &mut Self instead of Self.
Source§

fn mutated<F>(self, f: F) -> Self
where F: FnOnce(&mut Self), Self: Sized,

Mutates self using a closure taking self by mutable reference, passing it along the method chain. Read more
Source§

fn observe<F>(self, f: F) -> Self
where F: FnOnce(&Self), Self: Sized,

Observes the value of self, passing it along unmodified. Useful in long method chains. Read more
Source§

fn into_<T>(self) -> T
where Self: Into<T>,

Performs a conversion with Into. using the turbofish .into_::<_>() syntax. Read more
Source§

fn as_ref_<T>(&self) -> &T
where Self: AsRef<T>, T: ?Sized,

Performs a reference to reference conversion with AsRef, using the turbofish .as_ref_::<_>() syntax. Read more
Source§

fn as_mut_<T>(&mut self) -> &mut T
where Self: AsMut<T>, T: ?Sized,

Performs a mutable reference to mutable reference conversion with AsMut, using the turbofish .as_mut_::<_>() syntax. Read more
Source§

fn drop_(self)
where Self: Sized,

Drops self using method notation. Alternative to std::mem::drop. Read more
Source§

impl<This> TransmuteElement for This
where This: ?Sized,

Source§

unsafe fn transmute_element<T>(self) -> Self::TransmutedPtr
where Self: CanTransmuteElement<T>,

Transmutes the element type of this pointer.. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<T> TypeIdentity for T
where T: ?Sized,

Source§

type Type = T

This is always Self.
Source§

fn into_type(self) -> Self::Type
where Self: Sized, Self::Type: Sized,

Converts a value back to the original type.
Source§

fn as_type(&self) -> &Self::Type

Converts a reference back to the original type.
Source§

fn as_type_mut(&mut self) -> &mut Self::Type

Converts a mutable reference back to the original type.
Source§

fn into_type_box(self: Box<Self>) -> Box<Self::Type>

Converts a box back to the original type.
Source§

fn into_type_arc(this: Arc<Self>) -> Arc<Self::Type>

Converts an Arc back to the original type. Read more
Source§

fn into_type_rc(this: Rc<Self>) -> Rc<Self::Type>

Converts an Rc back to the original type. Read more
Source§

fn from_type(this: Self::Type) -> Self
where Self: Sized, Self::Type: Sized,

Converts a value back to the original type.
Source§

fn from_type_ref(this: &Self::Type) -> &Self

Converts a reference back to the original type.
Source§

fn from_type_mut(this: &mut Self::Type) -> &mut Self

Converts a mutable reference back to the original type.
Source§

fn from_type_box(this: Box<Self::Type>) -> Box<Self>

Converts a box back to the original type.
Source§

fn from_type_arc(this: Arc<Self::Type>) -> Arc<Self>

Converts an Arc back to the original type.
Source§

fn from_type_rc(this: Rc<Self::Type>) -> Rc<Self>

Converts an Rc back to the original type.
Source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source§

fn vzip(self) -> V

Source§

impl<T> WithSubscriber for T

Source§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more
Source§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more
Source§

impl<T> ErasedDestructor for T
where T: 'static,

Source§

impl<T> Ungil for T
where T: Send,