1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
use super::*;
#[allow(clippy::ptr_arg)]
fn from_chunks_list_dtype(chunks: &mut Vec<ArrayRef>, dtype: DataType) -> DataType {
let dtype = if let Some(arr) = chunks.get(0) {
arr.data_type().into()
} else {
dtype
};
match dtype {
#[cfg(feature = "dtype-categorical")]
DataType::List(inner) if *inner == DataType::Categorical(None) => {
use polars_arrow::kernels::concatenate::concatenate_owned_unchecked;
let array = concatenate_owned_unchecked(chunks).unwrap();
let list_arr = array.as_any().downcast_ref::<ListArray<i64>>().unwrap();
let values_arr = list_arr.values();
let cat = unsafe {
Series::try_from_arrow_unchecked(
"",
vec![values_arr.clone()],
values_arr.data_type(),
)
.unwrap()
};
let arrow_dtype = ListArray::<i64>::default_datatype(ArrowDataType::UInt32);
let new_array = unsafe {
ListArray::new_unchecked(
arrow_dtype,
list_arr.offsets().clone(),
cat.array_ref(0).clone(),
list_arr.validity().cloned(),
)
};
chunks.clear();
chunks.push(Box::new(new_array));
DataType::List(Box::new(cat.dtype().clone()))
}
_ => dtype,
}
}
impl<T> ChunkedArray<T>
where
T: PolarsDataType,
{
pub fn from_chunks(name: &str, mut chunks: Vec<ArrayRef>) -> Self {
let dtype = match T::get_dtype() {
dtype @ DataType::List(_) => from_chunks_list_dtype(&mut chunks, dtype),
dt => dt,
};
let field = Arc::new(Field::new(name, dtype));
let mut out = ChunkedArray {
field,
chunks,
phantom: PhantomData,
categorical_map: None,
bit_settings: Default::default(),
length: 0,
};
out.compute_len();
out
}
}
impl Int32Chunked {
pub(crate) fn new_null(name: &str, len: usize) -> Self {
let arr = arrow::array::new_null_array(ArrowDataType::Null, len);
let field = Arc::new(Field::new(name, DataType::Null));
let chunks = vec![arr as ArrayRef];
let mut out = ChunkedArray {
field,
chunks,
phantom: PhantomData,
categorical_map: None,
bit_settings: Default::default(),
length: 0,
};
out.compute_len();
out
}
}
impl<T> ChunkedArray<T>
where
T: PolarsNumericType,
{
pub fn from_vec(name: &str, v: Vec<T::Native>) -> Self {
let arr = to_array::<T>(v, None);
Self::from_chunks(name, vec![arr])
}
pub fn new_from_owned_with_null_bitmap(
name: &str,
values: Vec<T::Native>,
buffer: Option<Bitmap>,
) -> Self {
let arr = to_array::<T>(values, buffer);
let mut out = ChunkedArray {
field: Arc::new(Field::new(name, T::get_dtype())),
chunks: vec![arr],
phantom: PhantomData,
categorical_map: None,
..Default::default()
};
out.compute_len();
out
}
}