polars_core/chunked_array/object/
builder.rs1use arrow::bitmap::BitmapBuilder;
2
3use super::*;
4use crate::chunked_array::object::registry::{AnonymousObjectBuilder, ObjectRegistry};
5use crate::utils::get_iter_capacity;
6
7pub struct ObjectChunkedBuilder<T> {
8 field: Field,
9 bitmask_builder: BitmapBuilder,
10 values: Vec<T>,
11}
12
13impl<T> ObjectChunkedBuilder<T>
14where
15 T: PolarsObject,
16{
17 pub fn new(name: PlSmallStr, capacity: usize) -> Self {
18 ObjectChunkedBuilder {
19 field: Field::new(name, DataType::Object(T::type_name(), None)),
20 values: Vec::with_capacity(capacity),
21 bitmask_builder: BitmapBuilder::with_capacity(capacity),
22 }
23 }
24
25 #[inline]
27 pub fn append_value(&mut self, v: T) {
28 self.values.push(v);
29 self.bitmask_builder.push(true);
30 }
31
32 #[inline]
34 pub fn append_null(&mut self) {
35 self.values.push(T::default());
36 self.bitmask_builder.push(false);
37 }
38
39 #[inline]
40 pub fn append_value_from_any(&mut self, v: &dyn Any) -> PolarsResult<()> {
41 let Some(v) = v.downcast_ref::<T>() else {
42 polars_bail!(SchemaMismatch: "cannot downcast any in ObjectBuilder");
43 };
44 self.append_value(v.clone());
45 Ok(())
46 }
47
48 #[inline]
49 pub fn append_option(&mut self, opt: Option<T>) {
50 match opt {
51 Some(s) => self.append_value(s),
52 None => self.append_null(),
53 }
54 }
55
56 pub fn finish(mut self) -> ObjectChunked<T> {
57 let null_bitmap: Option<Bitmap> = self.bitmask_builder.into_opt_validity();
58
59 let len = self.values.len();
60 let null_count = null_bitmap
61 .as_ref()
62 .map(|validity| validity.unset_bits())
63 .unwrap_or(0);
64
65 let arr = Box::new(ObjectArray {
66 values: self.values.into(),
67 validity: null_bitmap,
68 });
69
70 self.field.dtype = get_object_type::<T>();
71
72 unsafe { ChunkedArray::new_with_dims(Arc::new(self.field), vec![arr], len, null_count) }
73 }
74}
75
76pub(crate) fn get_object_type<T: PolarsObject>() -> DataType {
79 let object_builder = Box::new(|name: PlSmallStr, capacity: usize| {
80 Box::new(ObjectChunkedBuilder::<T>::new(name, capacity)) as Box<dyn AnonymousObjectBuilder>
81 });
82
83 let object_size = size_of::<T>();
84 let physical_dtype = ArrowDataType::FixedSizeBinary(object_size);
85
86 let registry = ObjectRegistry::new(object_builder, physical_dtype);
87 DataType::Object(T::type_name(), Some(Arc::new(registry)))
88}
89
90impl<T> Default for ObjectChunkedBuilder<T>
91where
92 T: PolarsObject,
93{
94 fn default() -> Self {
95 ObjectChunkedBuilder::new(PlSmallStr::EMPTY, 0)
96 }
97}
98
99impl<T> NewChunkedArray<ObjectType<T>, T> for ObjectChunked<T>
100where
101 T: PolarsObject,
102{
103 fn from_slice(name: PlSmallStr, v: &[T]) -> Self {
104 Self::from_iter_values(name, v.iter().cloned())
105 }
106
107 fn from_slice_options(name: PlSmallStr, opt_v: &[Option<T>]) -> Self {
108 let mut builder = ObjectChunkedBuilder::<T>::new(name, opt_v.len());
109 opt_v
110 .iter()
111 .cloned()
112 .for_each(|opt| builder.append_option(opt));
113 builder.finish()
114 }
115
116 fn from_iter_options(
117 name: PlSmallStr,
118 it: impl Iterator<Item = Option<T>>,
119 ) -> ObjectChunked<T> {
120 let mut builder = ObjectChunkedBuilder::new(name, get_iter_capacity(&it));
121 it.for_each(|opt| builder.append_option(opt));
122 builder.finish()
123 }
124
125 fn from_iter_values(name: PlSmallStr, it: impl Iterator<Item = T>) -> ObjectChunked<T> {
127 let mut builder = ObjectChunkedBuilder::new(name, get_iter_capacity(&it));
128 it.for_each(|v| builder.append_value(v));
129 builder.finish()
130 }
131}
132
133impl<T> ObjectChunked<T>
134where
135 T: PolarsObject,
136{
137 pub fn new_from_vec(name: PlSmallStr, v: Vec<T>) -> Self {
138 let field = Arc::new(Field::new(name, DataType::Object(T::type_name(), None)));
139 let len = v.len();
140 let arr = Box::new(ObjectArray {
141 values: v.into(),
142 validity: None,
143 });
144
145 unsafe { ObjectChunked::new_with_dims(field, vec![arr], len, 0) }
146 }
147
148 pub fn new_from_vec_and_validity(
149 name: PlSmallStr,
150 v: Vec<T>,
151 validity: Option<Bitmap>,
152 ) -> Self {
153 let field = Arc::new(Field::new(name, DataType::Object(T::type_name(), None)));
154 let len = v.len();
155 let null_count = validity.as_ref().map(|v| v.unset_bits()).unwrap_or(0);
156 let arr = Box::new(ObjectArray {
157 values: v.into(),
158 validity,
159 });
160
161 unsafe { ObjectChunked::new_with_dims(field, vec![arr], len, null_count) }
162 }
163
164 pub fn new_empty(name: PlSmallStr) -> Self {
165 Self::new_from_vec(name, vec![])
166 }
167}
168
169pub(crate) fn object_series_to_arrow_array(s: &Series) -> ArrayRef {
171 let list_s = unsafe {
176 s.agg_list(&GroupsType::Slice {
177 groups: vec![[0, s.len() as IdxSize]],
178 rolling: false,
179 })
180 };
181 let arr = &list_s.chunks()[0];
182 let arr = arr.as_any().downcast_ref::<ListArray<i64>>().unwrap();
183 arr.values().to_boxed()
184}