1use std::sync::Arc;
2
3use polars_error::{PolarsResult, polars_bail};
4
5use super::{MutableUtf8Array, StrAsBytes, Utf8Array};
6use crate::array::physical_binary::*;
7use crate::array::specification::{try_check_offsets_bounds, try_check_utf8};
8use crate::array::{Array, ArrayValuesIter, MutableArray, TryExtend, TryExtendFromSelf, TryPush};
9use crate::bitmap::MutableBitmap;
10use crate::datatypes::ArrowDataType;
11use crate::offset::{Offset, Offsets};
12use crate::trusted_len::TrustedLen;
13
14#[derive(Debug, Clone)]
17pub struct MutableUtf8ValuesArray<O: Offset> {
18 dtype: ArrowDataType,
19 offsets: Offsets<O>,
20 values: Vec<u8>,
21}
22
23impl<O: Offset> From<MutableUtf8ValuesArray<O>> for Utf8Array<O> {
24 fn from(other: MutableUtf8ValuesArray<O>) -> Self {
25 unsafe {
29 Utf8Array::<O>::new_unchecked(
30 other.dtype,
31 other.offsets.into(),
32 other.values.into(),
33 None,
34 )
35 }
36 }
37}
38
39impl<O: Offset> From<MutableUtf8ValuesArray<O>> for MutableUtf8Array<O> {
40 fn from(other: MutableUtf8ValuesArray<O>) -> Self {
41 unsafe {
44 MutableUtf8Array::<O>::new_unchecked(other.dtype, other.offsets, other.values, None)
45 }
46 }
47}
48
49impl<O: Offset> Default for MutableUtf8ValuesArray<O> {
50 fn default() -> Self {
51 Self::new()
52 }
53}
54
55impl<O: Offset> MutableUtf8ValuesArray<O> {
56 pub fn new() -> Self {
58 Self {
59 dtype: Self::default_dtype(),
60 offsets: Offsets::new(),
61 values: Vec::<u8>::new(),
62 }
63 }
64
65 pub fn try_new(
75 dtype: ArrowDataType,
76 offsets: Offsets<O>,
77 values: Vec<u8>,
78 ) -> PolarsResult<Self> {
79 try_check_utf8(&offsets, &values)?;
80 if dtype.to_physical_type() != Self::default_dtype().to_physical_type() {
81 polars_bail!(ComputeError: "MutableUtf8ValuesArray can only be initialized with DataType::Utf8 or DataType::LargeUtf8")
82 }
83
84 Ok(Self {
85 dtype,
86 offsets,
87 values,
88 })
89 }
90
91 pub unsafe fn new_unchecked(
105 dtype: ArrowDataType,
106 offsets: Offsets<O>,
107 values: Vec<u8>,
108 ) -> Self {
109 try_check_offsets_bounds(&offsets, values.len())
110 .expect("The length of the values must be equal to the last offset value");
111
112 if dtype.to_physical_type() != Self::default_dtype().to_physical_type() {
113 panic!(
114 "MutableUtf8ValuesArray can only be initialized with DataType::Utf8 or DataType::LargeUtf8"
115 )
116 }
117
118 Self {
119 dtype,
120 offsets,
121 values,
122 }
123 }
124
125 pub fn default_dtype() -> ArrowDataType {
128 Utf8Array::<O>::default_dtype()
129 }
130
131 pub fn with_capacity(capacity: usize) -> Self {
133 Self::with_capacities(capacity, 0)
134 }
135
136 pub fn with_capacities(capacity: usize, values: usize) -> Self {
138 Self {
139 dtype: Self::default_dtype(),
140 offsets: Offsets::<O>::with_capacity(capacity),
141 values: Vec::<u8>::with_capacity(values),
142 }
143 }
144
145 #[inline]
147 pub fn values(&self) -> &Vec<u8> {
148 &self.values
149 }
150
151 #[inline]
153 pub fn offsets(&self) -> &Offsets<O> {
154 &self.offsets
155 }
156
157 #[inline]
159 pub fn reserve(&mut self, additional: usize, additional_values: usize) {
160 self.offsets.reserve(additional + 1);
161 self.values.reserve(additional_values);
162 }
163
164 pub fn capacity(&self) -> usize {
166 self.offsets.capacity()
167 }
168
169 #[inline]
171 pub fn len(&self) -> usize {
172 self.offsets.len_proxy()
173 }
174
175 #[inline]
179 pub fn push<T: AsRef<str>>(&mut self, value: T) {
180 self.try_push(value).unwrap()
181 }
182
183 pub fn pop(&mut self) -> Option<String> {
186 if self.len() == 0 {
187 return None;
188 }
189 self.offsets.pop()?;
190 let start = self.offsets.last().to_usize();
191 let value = self.values.split_off(start);
192 Some(unsafe { String::from_utf8_unchecked(value) })
194 }
195
196 #[inline]
200 pub fn value(&self, i: usize) -> &str {
201 assert!(i < self.len());
202 unsafe { self.value_unchecked(i) }
203 }
204
205 #[inline]
210 pub unsafe fn value_unchecked(&self, i: usize) -> &str {
211 let (start, end) = self.offsets.start_end(i);
213
214 let slice = self.values.get_unchecked(start..end);
216
217 std::str::from_utf8_unchecked(slice)
219 }
220
221 pub fn iter(&self) -> ArrayValuesIter<'_, Self> {
223 ArrayValuesIter::new(self)
224 }
225
226 pub fn shrink_to_fit(&mut self) {
228 self.values.shrink_to_fit();
229 self.offsets.shrink_to_fit();
230 }
231
232 pub fn into_inner(self) -> (ArrowDataType, Offsets<O>, Vec<u8>) {
234 (self.dtype, self.offsets, self.values)
235 }
236}
237
238impl<O: Offset> MutableArray for MutableUtf8ValuesArray<O> {
239 fn len(&self) -> usize {
240 self.len()
241 }
242
243 fn validity(&self) -> Option<&MutableBitmap> {
244 None
245 }
246
247 fn as_box(&mut self) -> Box<dyn Array> {
248 let array: Utf8Array<O> = std::mem::take(self).into();
249 array.boxed()
250 }
251
252 fn as_arc(&mut self) -> Arc<dyn Array> {
253 let array: Utf8Array<O> = std::mem::take(self).into();
254 array.arced()
255 }
256
257 fn dtype(&self) -> &ArrowDataType {
258 &self.dtype
259 }
260
261 fn as_any(&self) -> &dyn std::any::Any {
262 self
263 }
264
265 fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
266 self
267 }
268
269 #[inline]
270 fn push_null(&mut self) {
271 self.push::<&str>("")
272 }
273
274 fn reserve(&mut self, additional: usize) {
275 self.reserve(additional, 0)
276 }
277
278 fn shrink_to_fit(&mut self) {
279 self.shrink_to_fit()
280 }
281}
282
283impl<O: Offset, P: AsRef<str>> FromIterator<P> for MutableUtf8ValuesArray<O> {
284 fn from_iter<I: IntoIterator<Item = P>>(iter: I) -> Self {
285 let (offsets, values) = values_iter(iter.into_iter().map(StrAsBytes));
286 unsafe { Self::new_unchecked(Self::default_dtype(), offsets, values) }
288 }
289}
290
291impl<O: Offset> MutableUtf8ValuesArray<O> {
292 pub(crate) unsafe fn extend_from_trusted_len_iter<I, P>(
293 &mut self,
294 validity: &mut MutableBitmap,
295 iterator: I,
296 ) where
297 P: AsRef<str>,
298 I: Iterator<Item = Option<P>>,
299 {
300 let iterator = iterator.map(|x| x.map(StrAsBytes));
301 extend_from_trusted_len_iter(&mut self.offsets, &mut self.values, validity, iterator);
302 }
303
304 #[inline]
306 pub fn extend_trusted_len<I, P>(&mut self, iterator: I)
307 where
308 P: AsRef<str>,
309 I: TrustedLen<Item = P>,
310 {
311 unsafe { self.extend_trusted_len_unchecked(iterator) }
312 }
313
314 #[inline]
319 pub unsafe fn extend_trusted_len_unchecked<I, P>(&mut self, iterator: I)
320 where
321 P: AsRef<str>,
322 I: Iterator<Item = P>,
323 {
324 let iterator = iterator.map(StrAsBytes);
325 extend_from_trusted_len_values_iter(&mut self.offsets, &mut self.values, iterator);
326 }
327
328 #[inline]
330 pub fn from_trusted_len_iter<I, P>(iterator: I) -> Self
331 where
332 P: AsRef<str>,
333 I: TrustedLen<Item = P>,
334 {
335 unsafe { Self::from_trusted_len_iter_unchecked(iterator) }
337 }
338
339 #[inline]
345 pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self
346 where
347 P: AsRef<str>,
348 I: Iterator<Item = P>,
349 {
350 let iterator = iterator.map(StrAsBytes);
351 let (offsets, values) = trusted_len_values_iter(iterator);
352
353 Self::new_unchecked(Self::default_dtype(), offsets, values)
355 }
356
357 pub fn try_from_iter<P: AsRef<str>, I: IntoIterator<Item = P>>(iter: I) -> PolarsResult<Self> {
362 let iterator = iter.into_iter();
363 let (lower, _) = iterator.size_hint();
364 let mut array = Self::with_capacity(lower);
365 for item in iterator {
366 array.try_push(item)?;
367 }
368 Ok(array)
369 }
370
371 pub fn extend_fallible<T, I, E>(&mut self, iter: I) -> std::result::Result<(), E>
373 where
374 E: std::error::Error,
375 I: IntoIterator<Item = std::result::Result<T, E>>,
376 T: AsRef<str>,
377 {
378 let mut iter = iter.into_iter();
379 self.reserve(iter.size_hint().0, 0);
380 iter.try_for_each(|x| {
381 self.push(x?);
382 Ok(())
383 })
384 }
385}
386
387impl<O: Offset, T: AsRef<str>> Extend<T> for MutableUtf8ValuesArray<O> {
388 fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
389 extend_from_values_iter(
390 &mut self.offsets,
391 &mut self.values,
392 iter.into_iter().map(StrAsBytes),
393 );
394 }
395}
396
397impl<O: Offset, T: AsRef<str>> TryExtend<T> for MutableUtf8ValuesArray<O> {
398 fn try_extend<I: IntoIterator<Item = T>>(&mut self, iter: I) -> PolarsResult<()> {
399 let mut iter = iter.into_iter();
400 self.reserve(iter.size_hint().0, 0);
401 iter.try_for_each(|x| self.try_push(x))
402 }
403}
404
405impl<O: Offset, T: AsRef<str>> TryPush<T> for MutableUtf8ValuesArray<O> {
406 #[inline]
407 fn try_push(&mut self, value: T) -> PolarsResult<()> {
408 let bytes = value.as_ref().as_bytes();
409 self.values.extend_from_slice(bytes);
410 self.offsets.try_push(bytes.len())
411 }
412}
413
414impl<O: Offset> TryExtendFromSelf for MutableUtf8ValuesArray<O> {
415 fn try_extend_from_self(&mut self, other: &Self) -> PolarsResult<()> {
416 self.values.extend_from_slice(&other.values);
417 self.offsets.try_extend_from_self(&other.offsets)
418 }
419}