pub struct RecordBatch { /* private fields */ }Expand description
A two-dimensional batch of column-oriented data with a defined schema.
A RecordBatch is a two-dimensional dataset of a number of
contiguous arrays, each the same length.
A record batch has a schema which must match its arrays’
datatypes.
Record batches are a convenient unit of work for various serialization and computation functions, possibly incremental.
Use the record_batch! macro to create a RecordBatch from
literal slice of values, useful for rapid prototyping and testing.
Example:
use arrow_array::record_batch;
let batch = record_batch!(
    ("a", Int32, [1, 2, 3]),
    ("b", Float64, [Some(4.0), None, Some(5.0)]),
    ("c", Utf8, ["alpha", "beta", "gamma"])
);Implementations§
Source§impl RecordBatch
 
impl RecordBatch
Sourcepub fn try_new(
    schema: Arc<Schema>,
    columns: Vec<Arc<dyn Array>>,
) -> Result<RecordBatch, ArrowError>
 
pub fn try_new( schema: Arc<Schema>, columns: Vec<Arc<dyn Array>>, ) -> Result<RecordBatch, ArrowError>
Creates a RecordBatch from a schema and columns.
Expects the following:
- !columns.is_empty()
- schema.fields.len() == columns.len()
- schema.fields[i].data_type() == columns[i].data_type()
- columns[i].len() == columns[j].len()
If the conditions are not met, an error is returned.
§Example
let id_array = Int32Array::from(vec![1, 2, 3, 4, 5]);
let schema = Schema::new(vec![
    Field::new("id", DataType::Int32, false)
]);
let batch = RecordBatch::try_new(
    Arc::new(schema),
    vec![Arc::new(id_array)]
).unwrap();Sourcepub unsafe fn new_unchecked(
    schema: Arc<Schema>,
    columns: Vec<Arc<dyn Array>>,
    row_count: usize,
) -> RecordBatch
 
pub unsafe fn new_unchecked( schema: Arc<Schema>, columns: Vec<Arc<dyn Array>>, row_count: usize, ) -> RecordBatch
Creates a RecordBatch from a schema and columns, without validation.
See Self::try_new for the checked version.
§Safety
Expects the following:
- schema.fields.len() == columns.len()
- schema.fields[i].data_type() == columns[i].data_type()
- columns[i].len() == row_count
Note: if the schema does not match the underlying data exactly, it can lead to undefined
behavior, for example, via conversion to a StructArray, which in turn could lead
to incorrect access.
Sourcepub fn try_new_with_options(
    schema: Arc<Schema>,
    columns: Vec<Arc<dyn Array>>,
    options: &RecordBatchOptions,
) -> Result<RecordBatch, ArrowError>
 
pub fn try_new_with_options( schema: Arc<Schema>, columns: Vec<Arc<dyn Array>>, options: &RecordBatchOptions, ) -> Result<RecordBatch, ArrowError>
Creates a RecordBatch from a schema and columns, with additional options,
such as whether to strictly validate field names.
See RecordBatch::try_new for the expected conditions.
Sourcepub fn new_empty(schema: Arc<Schema>) -> RecordBatch
 
pub fn new_empty(schema: Arc<Schema>) -> RecordBatch
Creates a new empty RecordBatch.
Sourcepub fn into_parts(self) -> (Arc<Schema>, Vec<Arc<dyn Array>>, usize)
 
pub fn into_parts(self) -> (Arc<Schema>, Vec<Arc<dyn Array>>, usize)
Return the schema, columns and row count of this RecordBatch
Sourcepub fn with_schema(self, schema: Arc<Schema>) -> Result<RecordBatch, ArrowError>
 
pub fn with_schema(self, schema: Arc<Schema>) -> Result<RecordBatch, ArrowError>
Override the schema of this RecordBatch
Returns an error if schema is not a superset of the current schema
as determined by Schema::contains
See also Self::schema_metadata_mut.
Sourcepub fn schema_ref(&self) -> &Arc<Schema>
 
pub fn schema_ref(&self) -> &Arc<Schema>
Returns a reference to the Schema of the record batch.
Sourcepub fn schema_metadata_mut(&mut self) -> &mut HashMap<String, String>
 
pub fn schema_metadata_mut(&mut self) -> &mut HashMap<String, String>
Mutable access to the metadata of the schema.
This allows you to modify Schema::metadata of Self::schema in a convenient and fast way.
Note this will clone the entire underlying Schema object if it is currently shared
§Example
let mut batch = record_batch!(("a", Int32, [1, 2, 3])).unwrap();
// Initially, the metadata is empty
assert!(batch.schema().metadata().get("key").is_none());
// Insert a key-value pair into the metadata
batch.schema_metadata_mut().insert("key".into(), "value".into());
assert_eq!(batch.schema().metadata().get("key"), Some(&String::from("value")));Sourcepub fn project(&self, indices: &[usize]) -> Result<RecordBatch, ArrowError>
 
pub fn project(&self, indices: &[usize]) -> Result<RecordBatch, ArrowError>
Projects the schema onto the specified columns
Sourcepub fn normalize(
    &self,
    separator: &str,
    max_level: Option<usize>,
) -> Result<RecordBatch, ArrowError>
 
pub fn normalize( &self, separator: &str, max_level: Option<usize>, ) -> Result<RecordBatch, ArrowError>
Normalize a semi-structured RecordBatch into a flat table.
Nested Fields will generate names separated by separator, up to a depth of max_level
(unlimited if None).
e.g. given a RecordBatch with schema:
    "foo": StructArray<"bar": Utf8>A separator of "." would generate a batch with the schema:
    "foo.bar": Utf8Note that giving a depth of Some(0) to max_level is the same as passing in None;
it will be treated as unlimited.
§Example
let animals: ArrayRef = Arc::new(StringArray::from(vec!["Parrot", ""]));
let n_legs: ArrayRef = Arc::new(Int64Array::from(vec![Some(2), Some(4)]));
let animals_field = Arc::new(Field::new("animals", DataType::Utf8, true));
let n_legs_field = Arc::new(Field::new("n_legs", DataType::Int64, true));
let a = Arc::new(StructArray::from(vec![
    (animals_field.clone(), Arc::new(animals.clone()) as ArrayRef),
    (n_legs_field.clone(), Arc::new(n_legs.clone()) as ArrayRef),
]));
let schema = Schema::new(vec![
    Field::new(
        "a",
        DataType::Struct(Fields::from(vec![animals_field, n_legs_field])),
        false,
    )
]);
let normalized = RecordBatch::try_new(Arc::new(schema), vec![a])
    .expect("valid conversion")
    .normalize(".", None)
    .expect("valid normalization");
let expected = RecordBatch::try_from_iter_with_nullable(vec![
    ("a.animals", animals.clone(), true),
    ("a.n_legs", n_legs.clone(), true),
])
.expect("valid conversion");
assert_eq!(expected, normalized);Sourcepub fn num_columns(&self) -> usize
 
pub fn num_columns(&self) -> usize
Returns the number of columns in the record batch.
§Example
let id_array = Int32Array::from(vec![1, 2, 3, 4, 5]);
let schema = Schema::new(vec![
    Field::new("id", DataType::Int32, false)
]);
let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(id_array)]).unwrap();
assert_eq!(batch.num_columns(), 1);Sourcepub fn num_rows(&self) -> usize
 
pub fn num_rows(&self) -> usize
Returns the number of rows in each column.
§Example
let id_array = Int32Array::from(vec![1, 2, 3, 4, 5]);
let schema = Schema::new(vec![
    Field::new("id", DataType::Int32, false)
]);
let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(id_array)]).unwrap();
assert_eq!(batch.num_rows(), 5);Sourcepub fn column_by_name(&self, name: &str) -> Option<&Arc<dyn Array>>
 
pub fn column_by_name(&self, name: &str) -> Option<&Arc<dyn Array>>
Get a reference to a column’s array by name.
Sourcepub fn remove_column(&mut self, index: usize) -> Arc<dyn Array>
 
pub fn remove_column(&mut self, index: usize) -> Arc<dyn Array>
Remove column by index and return it.
Return the ArrayRef if the column is removed.
§Panics
Panics if `index`` out of bounds.
§Example
use std::sync::Arc;
use arrow_array::{BooleanArray, Int32Array, RecordBatch};
use arrow_schema::{DataType, Field, Schema};
let id_array = Int32Array::from(vec![1, 2, 3, 4, 5]);
let bool_array = BooleanArray::from(vec![true, false, false, true, true]);
let schema = Schema::new(vec![
    Field::new("id", DataType::Int32, false),
    Field::new("bool", DataType::Boolean, false),
]);
let mut batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(id_array), Arc::new(bool_array)]).unwrap();
let removed_column = batch.remove_column(0);
assert_eq!(removed_column.as_any().downcast_ref::<Int32Array>().unwrap(), &Int32Array::from(vec![1, 2, 3, 4, 5]));
assert_eq!(batch.num_columns(), 1);Sourcepub fn slice(&self, offset: usize, length: usize) -> RecordBatch
 
pub fn slice(&self, offset: usize, length: usize) -> RecordBatch
Return a new RecordBatch where each column is sliced
according to offset and length
§Panics
Panics if offset with length is greater than column length.
Sourcepub fn try_from_iter<I, F>(value: I) -> Result<RecordBatch, ArrowError>
 
pub fn try_from_iter<I, F>(value: I) -> Result<RecordBatch, ArrowError>
Create a RecordBatch from an iterable list of pairs of the
form (field_name, array), with the same requirements on
fields and arrays as RecordBatch::try_new. This method is
often used to create a single RecordBatch from arrays,
e.g. for testing.
The resulting schema is marked as nullable for each column if
the array for that column is has any nulls. To explicitly
specify nullibility, use RecordBatch::try_from_iter_with_nullable
Example:
let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2]));
let b: ArrayRef = Arc::new(StringArray::from(vec!["a", "b"]));
let record_batch = RecordBatch::try_from_iter(vec![
  ("a", a),
  ("b", b),
]);Another way to quickly create a RecordBatch is to use the record_batch! macro,
which is particularly helpful for rapid prototyping and testing.
Example:
use arrow_array::record_batch;
let batch = record_batch!(
    ("a", Int32, [1, 2, 3]),
    ("b", Float64, [Some(4.0), None, Some(5.0)]),
    ("c", Utf8, ["alpha", "beta", "gamma"])
);Sourcepub fn try_from_iter_with_nullable<I, F>(
    value: I,
) -> Result<RecordBatch, ArrowError>
 
pub fn try_from_iter_with_nullable<I, F>( value: I, ) -> Result<RecordBatch, ArrowError>
Create a RecordBatch from an iterable list of tuples of the
form (field_name, array, nullable), with the same requirements on
fields and arrays as RecordBatch::try_new. This method is often
used to create a single RecordBatch from arrays, e.g. for
testing.
Example:
let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2]));
let b: ArrayRef = Arc::new(StringArray::from(vec![Some("a"), Some("b")]));
// Note neither `a` nor `b` has any actual nulls, but we mark
// b an nullable
let record_batch = RecordBatch::try_from_iter_with_nullable(vec![
  ("a", a, false),
  ("b", b, true),
]);Sourcepub fn get_array_memory_size(&self) -> usize
 
pub fn get_array_memory_size(&self) -> usize
Returns the total number of bytes of memory occupied physically by this batch.
Note that this does not always correspond to the exact memory usage of a
RecordBatch (might overestimate), since multiple columns can share the same
buffers or slices thereof, the memory used by the shared buffers might be
counted multiple times.
Trait Implementations§
Source§impl Clone for RecordBatch
 
impl Clone for RecordBatch
Source§fn clone(&self) -> RecordBatch
 
fn clone(&self) -> RecordBatch
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
 
fn clone_from(&mut self, source: &Self)
source. Read moreSource§impl Debug for RecordBatch
 
impl Debug for RecordBatch
Source§impl From<&StructArray> for RecordBatch
 
impl From<&StructArray> for RecordBatch
Source§fn from(struct_array: &StructArray) -> RecordBatch
 
fn from(struct_array: &StructArray) -> RecordBatch
Source§impl From<RecordBatch> for StructArray
 
impl From<RecordBatch> for StructArray
Source§fn from(value: RecordBatch) -> StructArray
 
fn from(value: RecordBatch) -> StructArray
Source§impl From<StructArray> for RecordBatch
 
impl From<StructArray> for RecordBatch
Source§fn from(value: StructArray) -> RecordBatch
 
fn from(value: StructArray) -> RecordBatch
Source§impl FromPyArrow for RecordBatch
 
impl FromPyArrow for RecordBatch
Source§fn from_pyarrow_bound(value: &Bound<'_, PyAny>) -> Result<RecordBatch, PyErr>
 
fn from_pyarrow_bound(value: &Bound<'_, PyAny>) -> Result<RecordBatch, PyErr>
Source§impl Index<&str> for RecordBatch
 
impl Index<&str> for RecordBatch
Source§impl PartialEq for RecordBatch
 
impl PartialEq for RecordBatch
Source§impl RecordOutput for &RecordBatch
 
impl RecordOutput for &RecordBatch
Source§fn record_output(self, bm: &BaselineMetrics) -> &RecordBatch
 
fn record_output(self, bm: &BaselineMetrics) -> &RecordBatch
Source§impl RecordOutput for RecordBatch
 
impl RecordOutput for RecordBatch
Source§fn record_output(self, bm: &BaselineMetrics) -> RecordBatch
 
fn record_output(self, bm: &BaselineMetrics) -> RecordBatch
Source§impl ToPyArrow for RecordBatch
 
impl ToPyArrow for RecordBatch
impl StructuralPartialEq for RecordBatch
Auto Trait Implementations§
impl Freeze for RecordBatch
impl !RefUnwindSafe for RecordBatch
impl Send for RecordBatch
impl Sync for RecordBatch
impl Unpin for RecordBatch
impl !UnwindSafe for RecordBatch
Blanket Implementations§
Source§impl<T> AlignerFor<1> for T
 
impl<T> AlignerFor<1> for T
Source§impl<T> AlignerFor<1024> for T
 
impl<T> AlignerFor<1024> for T
Source§type Aligner = AlignTo1024<T>
 
type Aligner = AlignTo1024<T>
AlignTo* type which aligns Self to ALIGNMENT.Source§impl<T> AlignerFor<128> for T
 
impl<T> AlignerFor<128> for T
Source§type Aligner = AlignTo128<T>
 
type Aligner = AlignTo128<T>
AlignTo* type which aligns Self to ALIGNMENT.Source§impl<T> AlignerFor<16> for T
 
impl<T> AlignerFor<16> for T
Source§impl<T> AlignerFor<16384> for T
 
impl<T> AlignerFor<16384> for T
Source§type Aligner = AlignTo16384<T>
 
type Aligner = AlignTo16384<T>
AlignTo* type which aligns Self to ALIGNMENT.Source§impl<T> AlignerFor<2> for T
 
impl<T> AlignerFor<2> for T
Source§impl<T> AlignerFor<2048> for T
 
impl<T> AlignerFor<2048> for T
Source§type Aligner = AlignTo2048<T>
 
type Aligner = AlignTo2048<T>
AlignTo* type which aligns Self to ALIGNMENT.Source§impl<T> AlignerFor<256> for T
 
impl<T> AlignerFor<256> for T
Source§type Aligner = AlignTo256<T>
 
type Aligner = AlignTo256<T>
AlignTo* type which aligns Self to ALIGNMENT.Source§impl<T> AlignerFor<32> for T
 
impl<T> AlignerFor<32> for T
Source§impl<T> AlignerFor<32768> for T
 
impl<T> AlignerFor<32768> for T
Source§type Aligner = AlignTo32768<T>
 
type Aligner = AlignTo32768<T>
AlignTo* type which aligns Self to ALIGNMENT.Source§impl<T> AlignerFor<4> for T
 
impl<T> AlignerFor<4> for T
Source§impl<T> AlignerFor<4096> for T
 
impl<T> AlignerFor<4096> for T
Source§type Aligner = AlignTo4096<T>
 
type Aligner = AlignTo4096<T>
AlignTo* type which aligns Self to ALIGNMENT.Source§impl<T> AlignerFor<512> for T
 
impl<T> AlignerFor<512> for T
Source§type Aligner = AlignTo512<T>
 
type Aligner = AlignTo512<T>
AlignTo* type which aligns Self to ALIGNMENT.Source§impl<T> AlignerFor<64> for T
 
impl<T> AlignerFor<64> for T
Source§impl<T> AlignerFor<8> for T
 
impl<T> AlignerFor<8> for T
Source§impl<T> AlignerFor<8192> for T
 
impl<T> AlignerFor<8192> for T
Source§type Aligner = AlignTo8192<T>
 
type Aligner = AlignTo8192<T>
AlignTo* type which aligns Self to ALIGNMENT.Source§impl<T> BorrowMut<T> for Twhere
    T: ?Sized,
 
impl<T> BorrowMut<T> for Twhere
    T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
 
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
    T: Clone,
 
impl<T> CloneToUninit for Twhere
    T: Clone,
Source§impl<T, W> HasTypeWitness<W> for Twhere
    W: MakeTypeWitness<Arg = T>,
    T: ?Sized,
 
impl<T, W> HasTypeWitness<W> for Twhere
    W: MakeTypeWitness<Arg = T>,
    T: ?Sized,
Source§impl<T> Identity for Twhere
    T: ?Sized,
 
impl<T> Identity for Twhere
    T: ?Sized,
Source§impl<T> Instrument for T
 
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
 
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
 
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
 
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
 
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
 
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§impl<T> IntoPyArrow for Twhere
    T: ToPyArrow,
 
impl<T> IntoPyArrow for Twhere
    T: ToPyArrow,
Source§impl<T> PolicyExt for Twhere
    T: ?Sized,
 
impl<T> PolicyExt for Twhere
    T: ?Sized,
Source§impl<'a, T> RCowCompatibleRef<'a> for Twhere
    T: Clone + 'a,
 
impl<'a, T> RCowCompatibleRef<'a> for Twhere
    T: Clone + 'a,
Source§fn as_c_ref(from: &'a T) -> <T as RCowCompatibleRef<'a>>::RefC
 
fn as_c_ref(from: &'a T) -> <T as RCowCompatibleRef<'a>>::RefC
Source§fn as_rust_ref(from: <T as RCowCompatibleRef<'a>>::RefC) -> &'a T
 
fn as_rust_ref(from: <T as RCowCompatibleRef<'a>>::RefC) -> &'a T
Source§impl<S> ROExtAcc for S
 
impl<S> ROExtAcc for S
Source§fn f_get<F>(&self, offset: FieldOffset<S, F, Aligned>) -> &F
 
fn f_get<F>(&self, offset: FieldOffset<S, F, Aligned>) -> &F
offset. Read moreSource§fn f_get_mut<F>(&mut self, offset: FieldOffset<S, F, Aligned>) -> &mut F
 
fn f_get_mut<F>(&mut self, offset: FieldOffset<S, F, Aligned>) -> &mut F
offset. Read moreSource§fn f_get_ptr<F, A>(&self, offset: FieldOffset<S, F, A>) -> *const F
 
fn f_get_ptr<F, A>(&self, offset: FieldOffset<S, F, A>) -> *const F
offset. Read moreSource§fn f_get_mut_ptr<F, A>(&mut self, offset: FieldOffset<S, F, A>) -> *mut F
 
fn f_get_mut_ptr<F, A>(&mut self, offset: FieldOffset<S, F, A>) -> *mut F
offset. Read moreSource§impl<S> ROExtOps<Aligned> for S
 
impl<S> ROExtOps<Aligned> for S
Source§fn f_replace<F>(&mut self, offset: FieldOffset<S, F, Aligned>, value: F) -> F
 
fn f_replace<F>(&mut self, offset: FieldOffset<S, F, Aligned>, value: F) -> F
offset) with value,
returning the previous value of the field. Read moreSource§fn f_get_copy<F>(&self, offset: FieldOffset<S, F, Aligned>) -> Fwhere
    F: Copy,
 
fn f_get_copy<F>(&self, offset: FieldOffset<S, F, Aligned>) -> Fwhere
    F: Copy,
Source§impl<S> ROExtOps<Unaligned> for S
 
impl<S> ROExtOps<Unaligned> for S
Source§fn f_replace<F>(&mut self, offset: FieldOffset<S, F, Unaligned>, value: F) -> F
 
fn f_replace<F>(&mut self, offset: FieldOffset<S, F, Unaligned>, value: F) -> F
offset) with value,
returning the previous value of the field. Read moreSource§fn f_get_copy<F>(&self, offset: FieldOffset<S, F, Unaligned>) -> Fwhere
    F: Copy,
 
fn f_get_copy<F>(&self, offset: FieldOffset<S, F, Unaligned>) -> Fwhere
    F: Copy,
Source§impl<T> SelfOps for Twhere
    T: ?Sized,
 
impl<T> SelfOps for Twhere
    T: ?Sized,
Source§fn piped<F, U>(self, f: F) -> U
 
fn piped<F, U>(self, f: F) -> U
Source§fn piped_ref<'a, F, U>(&'a self, f: F) -> Uwhere
    F: FnOnce(&'a Self) -> U,
 
fn piped_ref<'a, F, U>(&'a self, f: F) -> Uwhere
    F: FnOnce(&'a Self) -> U,
piped except that the function takes &Self
Useful for functions that take &Self instead of Self. Read moreSource§fn piped_mut<'a, F, U>(&'a mut self, f: F) -> Uwhere
    F: FnOnce(&'a mut Self) -> U,
 
fn piped_mut<'a, F, U>(&'a mut self, f: F) -> Uwhere
    F: FnOnce(&'a mut Self) -> U,
piped, except that the function takes &mut Self.
Useful for functions that take &mut Self instead of Self.Source§fn mutated<F>(self, f: F) -> Self
 
fn mutated<F>(self, f: F) -> Self
Source§fn observe<F>(self, f: F) -> Self
 
fn observe<F>(self, f: F) -> Self
Source§fn as_ref_<T>(&self) -> &T
 
fn as_ref_<T>(&self) -> &T
AsRef,
using the turbofish .as_ref_::<_>() syntax. Read more