Skip to main content

alopex_dataframe/dataframe/
series.rs

1use arrow::array::ArrayRef;
2use arrow::datatypes::DataType;
3
4use crate::{DataFrameError, Result};
5
6/// A named column represented as one or more Arrow `ArrayRef` chunks.
7#[derive(Debug, Clone)]
8pub struct Series {
9    name: String,
10    chunks: Vec<ArrayRef>,
11}
12
13impl Series {
14    /// Construct a `Series` from Arrow chunks, validating that all chunks share the same dtype.
15    pub fn from_arrow(name: &str, chunks: Vec<ArrayRef>) -> Result<Self> {
16        if chunks.is_empty() {
17            return Ok(Self {
18                name: name.to_string(),
19                chunks,
20            });
21        }
22
23        let expected = chunks[0].data_type().clone();
24        for chunk in &chunks[1..] {
25            let actual = chunk.data_type();
26            if actual != &expected {
27                return Err(DataFrameError::type_mismatch(
28                    Some(name.to_string()),
29                    expected.to_string(),
30                    actual.to_string(),
31                ));
32            }
33        }
34
35        Ok(Self {
36            name: name.to_string(),
37            chunks,
38        })
39    }
40
41    /// Convert this series into Arrow chunks.
42    pub fn to_arrow(&self) -> Vec<ArrayRef> {
43        self.chunks.clone()
44    }
45
46    /// Return the series name.
47    pub fn name(&self) -> &str {
48        &self.name
49    }
50
51    /// Return the logical length of the series.
52    pub fn len(&self) -> usize {
53        self.chunks.iter().map(|c| c.len()).sum()
54    }
55
56    /// Return the Arrow dtype of the series.
57    pub fn dtype(&self) -> DataType {
58        self.chunks
59            .first()
60            .map(|c| c.data_type().clone())
61            .unwrap_or(DataType::Null)
62    }
63
64    /// Returns `true` if this series is empty.
65    pub fn is_empty(&self) -> bool {
66        self.len() == 0
67    }
68
69    pub(crate) fn chunks(&self) -> &[ArrayRef] {
70        &self.chunks
71    }
72
73    pub(crate) fn from_arrow_unchecked(name: &str, chunks: Vec<ArrayRef>) -> Self {
74        Self {
75            name: name.to_string(),
76            chunks,
77        }
78    }
79}
80
81#[cfg(test)]
82mod tests {
83    use std::sync::Arc;
84
85    use arrow::array::{ArrayRef, Int32Array, StringArray};
86
87    use super::Series;
88    use crate::DataFrameError;
89
90    #[test]
91    fn from_arrow_accepts_empty_chunks() {
92        let s = Series::from_arrow("a", vec![]).unwrap();
93        assert_eq!(s.name(), "a");
94        assert_eq!(s.len(), 0);
95        assert!(s.is_empty());
96    }
97
98    #[test]
99    fn from_arrow_rejects_mixed_dtypes() {
100        let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2]));
101        let b: ArrayRef = Arc::new(StringArray::from(vec!["x", "y"]));
102
103        let err = Series::from_arrow("col", vec![a, b]).unwrap_err();
104        match err {
105            DataFrameError::TypeMismatch { column, .. } => {
106                assert_eq!(column.as_deref(), Some("col"));
107            }
108            other => panic!("unexpected error: {other:?}"),
109        }
110    }
111}