1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
use super::*;
#[allow(clippy::ptr_arg)]
fn from_chunks_list_dtype(chunks: &mut Vec<ArrayRef>, dtype: DataType) -> DataType {
let dtype = if let Some(arr) = chunks.get(0) {
arr.data_type().into()
} else {
dtype
};
match dtype {
#[cfg(feature = "dtype-categorical")]
DataType::List(inner) if *inner == DataType::Categorical(None) => {
use polars_arrow::kernels::concatenate::concatenate_owned_unchecked;
let array = concatenate_owned_unchecked(chunks).unwrap();
let list_arr = array.as_any().downcast_ref::<ListArray<i64>>().unwrap();
let values_arr = list_arr.values();
let cat = unsafe {
Series::try_from_arrow_unchecked(
"",
vec![values_arr.clone()],
values_arr.data_type(),
)
.unwrap()
};
let arrow_dtype = ListArray::<i64>::default_datatype(ArrowDataType::UInt32);
let new_array = ListArray::new(
arrow_dtype,
list_arr.offsets().clone(),
cat.array_ref(0).clone(),
list_arr.validity().cloned(),
);
chunks.clear();
chunks.push(Box::new(new_array));
DataType::List(Box::new(cat.dtype().clone()))
}
_ => dtype,
}
}
impl<T> ChunkedArray<T>
where
T: PolarsDataType,
{
pub unsafe fn from_chunks(name: &str, mut chunks: Vec<ArrayRef>) -> Self {
let dtype = match T::get_dtype() {
dtype @ DataType::List(_) => from_chunks_list_dtype(&mut chunks, dtype),
dt => dt,
};
let field = Arc::new(Field::new(name, dtype));
let mut out = ChunkedArray {
field,
chunks,
phantom: PhantomData,
bit_settings: Default::default(),
length: 0,
};
out.compute_len();
out
}
}
impl Int32Chunked {
pub(crate) fn new_null(name: &str, len: usize) -> Self {
let arr = arrow::array::new_null_array(ArrowDataType::Null, len);
let field = Arc::new(Field::new(name, DataType::Null));
let chunks = vec![arr as ArrayRef];
let mut out = ChunkedArray {
field,
chunks,
phantom: PhantomData,
bit_settings: Default::default(),
length: 0,
};
out.compute_len();
out
}
}
impl<T> ChunkedArray<T>
where
T: PolarsNumericType,
{
pub fn from_vec(name: &str, v: Vec<T::Native>) -> Self {
let arr = to_array::<T>(v, None);
unsafe { Self::from_chunks(name, vec![arr]) }
}
pub fn new_from_owned_with_null_bitmap(
name: &str,
values: Vec<T::Native>,
buffer: Option<Bitmap>,
) -> Self {
let arr = to_array::<T>(values, buffer);
let mut out = ChunkedArray {
field: Arc::new(Field::new(name, T::get_dtype())),
chunks: vec![arr],
phantom: PhantomData,
..Default::default()
};
out.compute_len();
out
}
pub unsafe fn mmap_slice(name: &str, values: &[T::Native]) -> Self {
let arr = arrow::ffi::mmap::slice(values);
let arr = Box::new(arr);
let mut out = ChunkedArray {
field: Arc::new(Field::new(name, T::get_dtype())),
chunks: vec![arr],
phantom: PhantomData,
..Default::default()
};
out.compute_len();
out
}
}