polars_arrow/
record_batch.rs

1//! Contains [`RecordBatchT`], a container of [`Array`] where every array has the
2//! same length.
3
4use polars_error::{PolarsResult, polars_ensure};
5
6use crate::array::{Array, ArrayRef};
7use crate::datatypes::{ArrowSchema, ArrowSchemaRef};
8
9/// A vector of trait objects of [`Array`] where every item has
10/// the same length, [`RecordBatchT::len`].
11#[derive(Debug, Clone, PartialEq, Eq)]
12pub struct RecordBatchT<A: AsRef<dyn Array>> {
13    height: usize,
14    schema: ArrowSchemaRef,
15    arrays: Vec<A>,
16}
17
18pub type RecordBatch = RecordBatchT<ArrayRef>;
19
20impl<A: AsRef<dyn Array>> RecordBatchT<A> {
21    /// Creates a new [`RecordBatchT`].
22    ///
23    /// # Panics
24    ///
25    /// I.f.f. the length does not match the length of any of the arrays
26    pub fn new(length: usize, schema: ArrowSchemaRef, arrays: Vec<A>) -> Self {
27        Self::try_new(length, schema, arrays).unwrap()
28    }
29
30    /// Creates a new [`RecordBatchT`].
31    ///
32    /// # Error
33    ///
34    /// I.f.f. the height does not match the length of any of the arrays
35    pub fn try_new(height: usize, schema: ArrowSchemaRef, arrays: Vec<A>) -> PolarsResult<Self> {
36        polars_ensure!(
37            schema.len() == arrays.len(),
38            ComputeError: "RecordBatch requires an equal number of fields and arrays",
39        );
40        polars_ensure!(
41            arrays.iter().all(|arr| arr.as_ref().len() == height),
42            ComputeError: "RecordBatch requires all its arrays to have an equal number of rows",
43        );
44
45        Ok(Self {
46            height,
47            schema,
48            arrays,
49        })
50    }
51
52    /// returns the [`Array`]s in [`RecordBatchT`]
53    pub fn arrays(&self) -> &[A] {
54        &self.arrays
55    }
56
57    /// returns the [`ArrowSchema`]s in [`RecordBatchT`]
58    pub fn schema(&self) -> &ArrowSchema {
59        &self.schema
60    }
61
62    /// returns the [`Array`]s in [`RecordBatchT`]
63    pub fn columns(&self) -> &[A] {
64        &self.arrays
65    }
66
67    /// returns the number of rows of every array
68    pub fn len(&self) -> usize {
69        self.height
70    }
71
72    /// returns the number of rows of every array
73    pub fn height(&self) -> usize {
74        self.height
75    }
76
77    /// returns the number of arrays
78    pub fn width(&self) -> usize {
79        self.arrays.len()
80    }
81
82    /// returns whether the columns have any rows
83    pub fn is_empty(&self) -> bool {
84        self.len() == 0
85    }
86
87    /// Consumes [`RecordBatchT`] into its underlying arrays.
88    /// The arrays are guaranteed to have the same length
89    pub fn into_arrays(self) -> Vec<A> {
90        self.arrays
91    }
92
93    /// Consumes [`RecordBatchT`] into its underlying schema and arrays.
94    /// The arrays are guaranteed to have the same length
95    pub fn into_schema_and_arrays(self) -> (ArrowSchemaRef, Vec<A>) {
96        (self.schema, self.arrays)
97    }
98}
99
100impl<A: AsRef<dyn Array>> From<RecordBatchT<A>> for Vec<A> {
101    fn from(c: RecordBatchT<A>) -> Self {
102        c.into_arrays()
103    }
104}
105
106impl<A: AsRef<dyn Array>> std::ops::Deref for RecordBatchT<A> {
107    type Target = [A];
108
109    #[inline]
110    fn deref(&self) -> &[A] {
111        self.arrays()
112    }
113}