1use std::{iter::FromIterator, sync::Arc};
2
3use crate::array::{physical_binary::*, TryExtendFromSelf};
4use crate::{
5 array::{Array, MutableArray, TryExtend, TryPush},
6 bitmap::{
7 utils::{BitmapIter, ZipValidity},
8 Bitmap, MutableBitmap,
9 },
10 datatypes::DataType,
11 error::{Error, Result},
12 offset::{Offset, Offsets},
13 trusted_len::TrustedLen,
14};
15
16use super::{MutableUtf8ValuesArray, MutableUtf8ValuesIter, StrAsBytes, Utf8Array};
17
18#[derive(Debug, Clone)]
21pub struct MutableUtf8Array<O: Offset> {
22 values: MutableUtf8ValuesArray<O>,
23 validity: Option<MutableBitmap>,
24}
25
26impl<O: Offset> From<MutableUtf8Array<O>> for Utf8Array<O> {
27 fn from(other: MutableUtf8Array<O>) -> Self {
28 let validity = other.validity.and_then(|x| {
29 let validity: Option<Bitmap> = x.into();
30 validity
31 });
32 let array: Utf8Array<O> = other.values.into();
33 array.with_validity(validity)
34 }
35}
36
37impl<O: Offset> Default for MutableUtf8Array<O> {
38 fn default() -> Self {
39 Self::new()
40 }
41}
42
43impl<O: Offset> MutableUtf8Array<O> {
44 pub fn new() -> Self {
46 Self {
47 values: Default::default(),
48 validity: None,
49 }
50 }
51
52 pub fn try_new(
63 data_type: DataType,
64 offsets: Offsets<O>,
65 values: Vec<u8>,
66 validity: Option<MutableBitmap>,
67 ) -> Result<Self> {
68 let values = MutableUtf8ValuesArray::try_new(data_type, offsets, values)?;
69
70 if validity
71 .as_ref()
72 .map_or(false, |validity| validity.len() != values.len())
73 {
74 return Err(Error::oos(
75 "validity's length must be equal to the number of values",
76 ));
77 }
78
79 Ok(Self { values, validity })
80 }
81
82 pub unsafe fn new_unchecked(
90 data_type: DataType,
91 offsets: Offsets<O>,
92 values: Vec<u8>,
93 validity: Option<MutableBitmap>,
94 ) -> Self {
95 let values = MutableUtf8ValuesArray::new_unchecked(data_type, offsets, values);
96 if let Some(ref validity) = validity {
97 assert_eq!(values.len(), validity.len());
98 }
99 Self { values, validity }
100 }
101
102 pub fn from<T: AsRef<str>, P: AsRef<[Option<T>]>>(slice: P) -> Self {
105 Self::from_trusted_len_iter(slice.as_ref().iter().map(|x| x.as_ref()))
106 }
107
108 fn default_data_type() -> DataType {
109 Utf8Array::<O>::default_data_type()
110 }
111
112 pub fn with_capacity(capacity: usize) -> Self {
114 Self::with_capacities(capacity, 0)
115 }
116
117 pub fn with_capacities(capacity: usize, values: usize) -> Self {
119 Self {
120 values: MutableUtf8ValuesArray::with_capacities(capacity, values),
121 validity: None,
122 }
123 }
124
125 pub fn reserve(&mut self, additional: usize, additional_values: usize) {
127 self.values.reserve(additional, additional_values);
128 if let Some(x) = self.validity.as_mut() {
129 x.reserve(additional)
130 }
131 }
132
133 pub fn capacity(&self) -> usize {
135 self.values.capacity()
136 }
137
138 #[inline]
140 pub fn len(&self) -> usize {
141 self.values.len()
142 }
143
144 #[inline]
148 pub fn push<T: AsRef<str>>(&mut self, value: Option<T>) {
149 self.try_push(value).unwrap()
150 }
151
152 #[inline]
156 pub fn value(&self, i: usize) -> &str {
157 self.values.value(i)
158 }
159
160 #[inline]
164 pub unsafe fn value_unchecked(&self, i: usize) -> &str {
165 self.values.value_unchecked(i)
166 }
167
168 pub fn pop(&mut self) -> Option<String> {
171 let value = self.values.pop()?;
172 self.validity
173 .as_mut()
174 .map(|x| x.pop()?.then(|| ()))
175 .unwrap_or_else(|| Some(()))
176 .map(|_| value)
177 }
178
179 fn init_validity(&mut self) {
180 let mut validity = MutableBitmap::with_capacity(self.values.capacity());
181 validity.extend_constant(self.len(), true);
182 validity.set(self.len() - 1, false);
183 self.validity = Some(validity);
184 }
185
186 pub fn iter(&self) -> ZipValidity<&str, MutableUtf8ValuesIter<O>, BitmapIter> {
188 ZipValidity::new(self.values_iter(), self.validity.as_ref().map(|x| x.iter()))
189 }
190
191 pub fn into_arc(self) -> Arc<dyn Array> {
193 let a: Utf8Array<O> = self.into();
194 Arc::new(a)
195 }
196
197 pub fn shrink_to_fit(&mut self) {
199 self.values.shrink_to_fit();
200 if let Some(validity) = &mut self.validity {
201 validity.shrink_to_fit()
202 }
203 }
204
205 pub fn into_data(self) -> (DataType, Offsets<O>, Vec<u8>, Option<MutableBitmap>) {
207 let (data_type, offsets, values) = self.values.into_inner();
208 (data_type, offsets, values, self.validity)
209 }
210
211 pub fn values_iter(&self) -> MutableUtf8ValuesIter<O> {
213 self.values.iter()
214 }
215
216 pub fn set_validity(&mut self, validity: Option<MutableBitmap>) {
220 if let Some(validity) = &validity {
221 assert_eq!(self.values.len(), validity.len())
222 }
223 self.validity = validity;
224 }
225
226 pub fn apply_validity<F: FnOnce(MutableBitmap) -> MutableBitmap>(&mut self, f: F) {
232 if let Some(validity) = std::mem::take(&mut self.validity) {
233 self.set_validity(Some(f(validity)))
234 }
235 }
236}
237
238impl<O: Offset> MutableUtf8Array<O> {
239 pub fn values(&self) -> &Vec<u8> {
241 self.values.values()
242 }
243
244 pub fn offsets(&self) -> &Offsets<O> {
246 self.values.offsets()
247 }
248}
249
250impl<O: Offset> MutableArray for MutableUtf8Array<O> {
251 fn len(&self) -> usize {
252 self.len()
253 }
254
255 fn validity(&self) -> Option<&MutableBitmap> {
256 self.validity.as_ref()
257 }
258
259 fn as_box(&mut self) -> Box<dyn Array> {
260 let array: Utf8Array<O> = std::mem::take(self).into();
261 array.boxed()
262 }
263
264 fn as_arc(&mut self) -> Arc<dyn Array> {
265 let array: Utf8Array<O> = std::mem::take(self).into();
266 array.arced()
267 }
268
269 fn data_type(&self) -> &DataType {
270 if O::IS_LARGE {
271 &DataType::LargeUtf8
272 } else {
273 &DataType::Utf8
274 }
275 }
276
277 fn as_any(&self) -> &dyn std::any::Any {
278 self
279 }
280
281 fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
282 self
283 }
284
285 #[inline]
286 fn push_null(&mut self) {
287 self.push::<&str>(None)
288 }
289
290 fn reserve(&mut self, additional: usize) {
291 self.reserve(additional, 0)
292 }
293
294 fn shrink_to_fit(&mut self) {
295 self.shrink_to_fit()
296 }
297}
298
299impl<O: Offset, P: AsRef<str>> FromIterator<Option<P>> for MutableUtf8Array<O> {
300 fn from_iter<I: IntoIterator<Item = Option<P>>>(iter: I) -> Self {
301 Self::try_from_iter(iter).unwrap()
302 }
303}
304
305impl<O: Offset> MutableUtf8Array<O> {
306 #[inline]
309 pub fn extend_trusted_len_values<I, P>(&mut self, iterator: I)
310 where
311 P: AsRef<str>,
312 I: TrustedLen<Item = P>,
313 {
314 unsafe { self.extend_trusted_len_values_unchecked(iterator) }
315 }
316
317 #[inline]
320 pub fn extend_values<I, P>(&mut self, iterator: I)
321 where
322 P: AsRef<str>,
323 I: Iterator<Item = P>,
324 {
325 let length = self.values.len();
326 self.values.extend(iterator);
327 let additional = self.values.len() - length;
328
329 if let Some(validity) = self.validity.as_mut() {
330 validity.extend_constant(additional, true);
331 }
332 }
333
334 #[inline]
340 pub unsafe fn extend_trusted_len_values_unchecked<I, P>(&mut self, iterator: I)
341 where
342 P: AsRef<str>,
343 I: Iterator<Item = P>,
344 {
345 let length = self.values.len();
346 self.values.extend_trusted_len_unchecked(iterator);
347 let additional = self.values.len() - length;
348
349 if let Some(validity) = self.validity.as_mut() {
350 validity.extend_constant(additional, true);
351 }
352 }
353
354 #[inline]
356 pub fn extend_trusted_len<I, P>(&mut self, iterator: I)
357 where
358 P: AsRef<str>,
359 I: TrustedLen<Item = Option<P>>,
360 {
361 unsafe { self.extend_trusted_len_unchecked(iterator) }
362 }
363
364 #[inline]
368 pub unsafe fn extend_trusted_len_unchecked<I, P>(&mut self, iterator: I)
369 where
370 P: AsRef<str>,
371 I: Iterator<Item = Option<P>>,
372 {
373 if self.validity.is_none() {
374 let mut validity = MutableBitmap::new();
375 validity.extend_constant(self.len(), true);
376 self.validity = Some(validity);
377 }
378
379 self.values
380 .extend_from_trusted_len_iter(self.validity.as_mut().unwrap(), iterator);
381 }
382
383 #[inline]
388 pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self
389 where
390 P: AsRef<str>,
391 I: Iterator<Item = Option<P>>,
392 {
393 let iterator = iterator.map(|x| x.map(StrAsBytes));
394 let (validity, offsets, values) = trusted_len_unzip(iterator);
395
396 Self::new_unchecked(Self::default_data_type(), offsets, values, validity)
398 }
399
400 #[inline]
402 pub fn from_trusted_len_iter<I, P>(iterator: I) -> Self
403 where
404 P: AsRef<str>,
405 I: TrustedLen<Item = Option<P>>,
406 {
407 unsafe { Self::from_trusted_len_iter_unchecked(iterator) }
409 }
410
411 #[inline]
416 pub unsafe fn from_trusted_len_values_iter_unchecked<T: AsRef<str>, I: Iterator<Item = T>>(
417 iterator: I,
418 ) -> Self {
419 MutableUtf8ValuesArray::from_trusted_len_iter_unchecked(iterator).into()
420 }
421
422 #[inline]
424 pub fn from_trusted_len_values_iter<T: AsRef<str>, I: TrustedLen<Item = T>>(
425 iterator: I,
426 ) -> Self {
427 unsafe { Self::from_trusted_len_values_iter_unchecked(iterator) }
429 }
430
431 fn try_from_iter<P: AsRef<str>, I: IntoIterator<Item = Option<P>>>(iter: I) -> Result<Self> {
436 let iterator = iter.into_iter();
437 let (lower, _) = iterator.size_hint();
438 let mut array = Self::with_capacity(lower);
439 for item in iterator {
440 array.try_push(item)?;
441 }
442 Ok(array)
443 }
444
445 #[inline]
450 pub unsafe fn try_from_trusted_len_iter_unchecked<E, I, P>(
451 iterator: I,
452 ) -> std::result::Result<Self, E>
453 where
454 P: AsRef<str>,
455 I: IntoIterator<Item = std::result::Result<Option<P>, E>>,
456 {
457 let iterator = iterator.into_iter();
458
459 let iterator = iterator.map(|x| x.map(|x| x.map(StrAsBytes)));
460 let (validity, offsets, values) = try_trusted_len_unzip(iterator)?;
461
462 Ok(Self::new_unchecked(
464 Self::default_data_type(),
465 offsets,
466 values,
467 validity,
468 ))
469 }
470
471 #[inline]
473 pub fn try_from_trusted_len_iter<E, I, P>(iterator: I) -> std::result::Result<Self, E>
474 where
475 P: AsRef<str>,
476 I: TrustedLen<Item = std::result::Result<Option<P>, E>>,
477 {
478 unsafe { Self::try_from_trusted_len_iter_unchecked(iterator) }
480 }
481
482 pub fn from_iter_values<T: AsRef<str>, I: Iterator<Item = T>>(iterator: I) -> Self {
484 MutableUtf8ValuesArray::from_iter(iterator).into()
485 }
486
487 pub fn extend_fallible<T, I, E>(&mut self, iter: I) -> std::result::Result<(), E>
489 where
490 E: std::error::Error,
491 I: IntoIterator<Item = std::result::Result<Option<T>, E>>,
492 T: AsRef<str>,
493 {
494 let mut iter = iter.into_iter();
495 self.reserve(iter.size_hint().0, 0);
496 iter.try_for_each(|x| {
497 self.push(x?);
498 Ok(())
499 })
500 }
501}
502
503impl<O: Offset, T: AsRef<str>> Extend<Option<T>> for MutableUtf8Array<O> {
504 fn extend<I: IntoIterator<Item = Option<T>>>(&mut self, iter: I) {
505 self.try_extend(iter).unwrap();
506 }
507}
508
509impl<O: Offset, T: AsRef<str>> TryExtend<Option<T>> for MutableUtf8Array<O> {
510 fn try_extend<I: IntoIterator<Item = Option<T>>>(&mut self, iter: I) -> Result<()> {
511 let mut iter = iter.into_iter();
512 self.reserve(iter.size_hint().0, 0);
513 iter.try_for_each(|x| self.try_push(x))
514 }
515}
516
517impl<O: Offset, T: AsRef<str>> TryPush<Option<T>> for MutableUtf8Array<O> {
518 #[inline]
519 fn try_push(&mut self, value: Option<T>) -> Result<()> {
520 match value {
521 Some(value) => {
522 self.values.try_push(value.as_ref())?;
523
524 match &mut self.validity {
525 Some(validity) => validity.push(true),
526 None => {}
527 }
528 }
529 None => {
530 self.values.push("");
531 match &mut self.validity {
532 Some(validity) => validity.push(false),
533 None => self.init_validity(),
534 }
535 }
536 }
537 Ok(())
538 }
539}
540
541impl<O: Offset> PartialEq for MutableUtf8Array<O> {
542 fn eq(&self, other: &Self) -> bool {
543 self.iter().eq(other.iter())
544 }
545}
546
547impl<O: Offset> TryExtendFromSelf for MutableUtf8Array<O> {
548 fn try_extend_from_self(&mut self, other: &Self) -> Result<()> {
549 extend_validity(self.len(), &mut self.validity, &other.validity);
550
551 self.values.try_extend_from_self(&other.values)
552 }
553}