Trait lance_arrow::RecordBatchExt
source · pub trait RecordBatchExt {
// Required methods
fn try_with_column(
&self,
field: Field,
arr: ArrayRef
) -> Result<RecordBatch, ArrowError>;
fn try_with_column_at(
&self,
index: usize,
field: Field,
arr: ArrayRef
) -> Result<RecordBatch, ArrowError>;
fn try_new_from_struct_array(
&self,
arr: StructArray
) -> Result<RecordBatch, ArrowError>;
fn merge(&self, other: &RecordBatch) -> Result<RecordBatch, ArrowError>;
fn drop_column(&self, name: &str) -> Result<RecordBatch, ArrowError>;
fn replace_column_by_name(
&self,
name: &str,
column: Arc<dyn Array>
) -> Result<RecordBatch, ArrowError>;
fn column_by_qualified_name(&self, name: &str) -> Option<&ArrayRef>;
fn project_by_schema(
&self,
schema: &Schema
) -> Result<RecordBatch, ArrowError>;
fn take(&self, indices: &UInt32Array) -> Result<RecordBatch, ArrowError>;
}Expand description
Extends Arrow’s RecordBatch.
Required Methods§
sourcefn try_with_column(
&self,
field: Field,
arr: ArrayRef
) -> Result<RecordBatch, ArrowError>
fn try_with_column( &self, field: Field, arr: ArrayRef ) -> Result<RecordBatch, ArrowError>
Append a new column to this RecordBatch and returns a new RecordBatch.
use std::sync::Arc;
use arrow_array::{RecordBatch, Int32Array, StringArray};
use arrow_schema::{Schema, Field, DataType};
use lance_arrow::*;
let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)]));
let int_arr = Arc::new(Int32Array::from(vec![1, 2, 3, 4]));
let record_batch = RecordBatch::try_new(schema, vec![int_arr.clone()]).unwrap();
let new_field = Field::new("s", DataType::Utf8, true);
let str_arr = Arc::new(StringArray::from(vec!["a", "b", "c", "d"]));
let new_record_batch = record_batch.try_with_column(new_field, str_arr.clone()).unwrap();
assert_eq!(
new_record_batch,
RecordBatch::try_new(
Arc::new(Schema::new(
vec![
Field::new("a", DataType::Int32, true),
Field::new("s", DataType::Utf8, true)
])
),
vec![int_arr, str_arr],
).unwrap()
)sourcefn try_with_column_at(
&self,
index: usize,
field: Field,
arr: ArrayRef
) -> Result<RecordBatch, ArrowError>
fn try_with_column_at( &self, index: usize, field: Field, arr: ArrayRef ) -> Result<RecordBatch, ArrowError>
Created a new RecordBatch with column at index.
sourcefn try_new_from_struct_array(
&self,
arr: StructArray
) -> Result<RecordBatch, ArrowError>
fn try_new_from_struct_array( &self, arr: StructArray ) -> Result<RecordBatch, ArrowError>
Creates a new RecordBatch from the provided StructArray.
The fields on the StructArray need to match this RecordBatch schema
sourcefn merge(&self, other: &RecordBatch) -> Result<RecordBatch, ArrowError>
fn merge(&self, other: &RecordBatch) -> Result<RecordBatch, ArrowError>
Merge with another RecordBatch and returns a new one.
use std::sync::Arc;
use arrow_array::*;
use arrow_schema::{Schema, Field, DataType};
use lance_arrow::*;
let left_schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)]));
let int_arr = Arc::new(Int32Array::from(vec![1, 2, 3, 4]));
let left = RecordBatch::try_new(left_schema, vec![int_arr.clone()]).unwrap();
let right_schema = Arc::new(Schema::new(vec![Field::new("s", DataType::Utf8, true)]));
let str_arr = Arc::new(StringArray::from(vec!["a", "b", "c", "d"]));
let right = RecordBatch::try_new(right_schema, vec![str_arr.clone()]).unwrap();
let new_record_batch = left.merge(&right).unwrap();
assert_eq!(
new_record_batch,
RecordBatch::try_new(
Arc::new(Schema::new(
vec![
Field::new("a", DataType::Int32, true),
Field::new("s", DataType::Utf8, true)
])
),
vec![int_arr, str_arr],
).unwrap()
)TODO: add merge nested fields support.
sourcefn drop_column(&self, name: &str) -> Result<RecordBatch, ArrowError>
fn drop_column(&self, name: &str) -> Result<RecordBatch, ArrowError>
Drop one column specified with the name and return the new RecordBatch.
If the named column does not exist, it returns a copy of this RecordBatch.
sourcefn replace_column_by_name(
&self,
name: &str,
column: Arc<dyn Array>
) -> Result<RecordBatch, ArrowError>
fn replace_column_by_name( &self, name: &str, column: Arc<dyn Array> ) -> Result<RecordBatch, ArrowError>
Replace a column (specified by name) and return the new RecordBatch.
sourcefn column_by_qualified_name(&self, name: &str) -> Option<&ArrayRef>
fn column_by_qualified_name(&self, name: &str) -> Option<&ArrayRef>
Get (potentially nested) column by qualified name.
sourcefn project_by_schema(&self, schema: &Schema) -> Result<RecordBatch, ArrowError>
fn project_by_schema(&self, schema: &Schema) -> Result<RecordBatch, ArrowError>
Project the schema over the RecordBatch.
sourcefn take(&self, indices: &UInt32Array) -> Result<RecordBatch, ArrowError>
fn take(&self, indices: &UInt32Array) -> Result<RecordBatch, ArrowError>
Take selected rows from the RecordBatch.