1use std::{iter::FromIterator, sync::Arc};
2
3use crate::{
4 array::{
5 specification::{try_check_offsets_bounds, try_check_utf8},
6 Array, ArrayValuesIter, MutableArray, TryExtend, TryExtendFromSelf, TryPush,
7 },
8 bitmap::MutableBitmap,
9 datatypes::DataType,
10 error::{Error, Result},
11 offset::{Offset, Offsets},
12 trusted_len::TrustedLen,
13};
14
15use super::{MutableUtf8Array, StrAsBytes, Utf8Array};
16use crate::array::physical_binary::*;
17
18#[derive(Debug, Clone)]
21pub struct MutableUtf8ValuesArray<O: Offset> {
22 data_type: DataType,
23 offsets: Offsets<O>,
24 values: Vec<u8>,
25}
26
27impl<O: Offset> From<MutableUtf8ValuesArray<O>> for Utf8Array<O> {
28 fn from(other: MutableUtf8ValuesArray<O>) -> Self {
29 unsafe {
33 Utf8Array::<O>::new_unchecked(
34 other.data_type,
35 other.offsets.into(),
36 other.values.into(),
37 None,
38 )
39 }
40 }
41}
42
43impl<O: Offset> From<MutableUtf8ValuesArray<O>> for MutableUtf8Array<O> {
44 fn from(other: MutableUtf8ValuesArray<O>) -> Self {
45 unsafe {
48 MutableUtf8Array::<O>::new_unchecked(other.data_type, other.offsets, other.values, None)
49 }
50 }
51}
52
53impl<O: Offset> Default for MutableUtf8ValuesArray<O> {
54 fn default() -> Self {
55 Self::new()
56 }
57}
58
59impl<O: Offset> MutableUtf8ValuesArray<O> {
60 pub fn new() -> Self {
62 Self {
63 data_type: Self::default_data_type(),
64 offsets: Offsets::new(),
65 values: Vec::<u8>::new(),
66 }
67 }
68
69 pub fn try_new(data_type: DataType, offsets: Offsets<O>, values: Vec<u8>) -> Result<Self> {
79 try_check_utf8(&offsets, &values)?;
80 if data_type.to_physical_type() != Self::default_data_type().to_physical_type() {
81 return Err(Error::oos(
82 "MutableUtf8ValuesArray can only be initialized with DataType::Utf8 or DataType::LargeUtf8",
83 ));
84 }
85
86 Ok(Self {
87 data_type,
88 offsets,
89 values,
90 })
91 }
92
93 pub unsafe fn new_unchecked(data_type: DataType, offsets: Offsets<O>, values: Vec<u8>) -> Self {
106 try_check_offsets_bounds(&offsets, values.len())
107 .expect("The length of the values must be equal to the last offset value");
108
109 if data_type.to_physical_type() != Self::default_data_type().to_physical_type() {
110 panic!("MutableUtf8ValuesArray can only be initialized with DataType::Utf8 or DataType::LargeUtf8")
111 }
112
113 Self {
114 data_type,
115 offsets,
116 values,
117 }
118 }
119
120 pub fn default_data_type() -> DataType {
123 Utf8Array::<O>::default_data_type()
124 }
125
126 pub fn with_capacity(capacity: usize) -> Self {
128 Self::with_capacities(capacity, 0)
129 }
130
131 pub fn with_capacities(capacity: usize, values: usize) -> Self {
133 Self {
134 data_type: Self::default_data_type(),
135 offsets: Offsets::<O>::with_capacity(capacity),
136 values: Vec::<u8>::with_capacity(values),
137 }
138 }
139
140 #[inline]
142 pub fn values(&self) -> &Vec<u8> {
143 &self.values
144 }
145
146 #[inline]
148 pub fn offsets(&self) -> &Offsets<O> {
149 &self.offsets
150 }
151
152 #[inline]
154 pub fn reserve(&mut self, additional: usize, additional_values: usize) {
155 self.offsets.reserve(additional + 1);
156 self.values.reserve(additional_values);
157 }
158
159 pub fn capacity(&self) -> usize {
161 self.offsets.capacity()
162 }
163
164 #[inline]
166 pub fn len(&self) -> usize {
167 self.offsets.len_proxy()
168 }
169
170 #[inline]
174 pub fn push<T: AsRef<str>>(&mut self, value: T) {
175 self.try_push(value).unwrap()
176 }
177
178 pub fn pop(&mut self) -> Option<String> {
181 if self.len() == 0 {
182 return None;
183 }
184 self.offsets.pop()?;
185 let start = self.offsets.last().to_usize();
186 let value = self.values.split_off(start);
187 Some(unsafe { String::from_utf8_unchecked(value) })
189 }
190
191 #[inline]
195 pub fn value(&self, i: usize) -> &str {
196 assert!(i < self.len());
197 unsafe { self.value_unchecked(i) }
198 }
199
200 #[inline]
204 pub unsafe fn value_unchecked(&self, i: usize) -> &str {
205 let (start, end) = self.offsets.start_end(i);
207
208 let slice = self.values.get_unchecked(start..end);
210
211 std::str::from_utf8_unchecked(slice)
213 }
214
215 pub fn iter(&self) -> ArrayValuesIter<Self> {
217 ArrayValuesIter::new(self)
218 }
219
220 pub fn shrink_to_fit(&mut self) {
222 self.values.shrink_to_fit();
223 self.offsets.shrink_to_fit();
224 }
225
226 pub fn into_inner(self) -> (DataType, Offsets<O>, Vec<u8>) {
228 (self.data_type, self.offsets, self.values)
229 }
230}
231
232impl<O: Offset> MutableArray for MutableUtf8ValuesArray<O> {
233 fn len(&self) -> usize {
234 self.len()
235 }
236
237 fn validity(&self) -> Option<&MutableBitmap> {
238 None
239 }
240
241 fn as_box(&mut self) -> Box<dyn Array> {
242 let array: Utf8Array<O> = std::mem::take(self).into();
243 array.boxed()
244 }
245
246 fn as_arc(&mut self) -> Arc<dyn Array> {
247 let array: Utf8Array<O> = std::mem::take(self).into();
248 array.arced()
249 }
250
251 fn data_type(&self) -> &DataType {
252 &self.data_type
253 }
254
255 fn as_any(&self) -> &dyn std::any::Any {
256 self
257 }
258
259 fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
260 self
261 }
262
263 #[inline]
264 fn push_null(&mut self) {
265 self.push::<&str>("")
266 }
267
268 fn reserve(&mut self, additional: usize) {
269 self.reserve(additional, 0)
270 }
271
272 fn shrink_to_fit(&mut self) {
273 self.shrink_to_fit()
274 }
275}
276
277impl<O: Offset, P: AsRef<str>> FromIterator<P> for MutableUtf8ValuesArray<O> {
278 fn from_iter<I: IntoIterator<Item = P>>(iter: I) -> Self {
279 let (offsets, values) = values_iter(iter.into_iter().map(StrAsBytes));
280 unsafe { Self::new_unchecked(Self::default_data_type(), offsets, values) }
282 }
283}
284
285impl<O: Offset> MutableUtf8ValuesArray<O> {
286 pub(crate) unsafe fn extend_from_trusted_len_iter<I, P>(
287 &mut self,
288 validity: &mut MutableBitmap,
289 iterator: I,
290 ) where
291 P: AsRef<str>,
292 I: Iterator<Item = Option<P>>,
293 {
294 let iterator = iterator.map(|x| x.map(StrAsBytes));
295 extend_from_trusted_len_iter(&mut self.offsets, &mut self.values, validity, iterator);
296 }
297
298 #[inline]
300 pub fn extend_trusted_len<I, P>(&mut self, iterator: I)
301 where
302 P: AsRef<str>,
303 I: TrustedLen<Item = P>,
304 {
305 unsafe { self.extend_trusted_len_unchecked(iterator) }
306 }
307
308 #[inline]
312 pub unsafe fn extend_trusted_len_unchecked<I, P>(&mut self, iterator: I)
313 where
314 P: AsRef<str>,
315 I: Iterator<Item = P>,
316 {
317 let iterator = iterator.map(StrAsBytes);
318 extend_from_trusted_len_values_iter(&mut self.offsets, &mut self.values, iterator);
319 }
320
321 #[inline]
323 pub fn from_trusted_len_iter<I, P>(iterator: I) -> Self
324 where
325 P: AsRef<str>,
326 I: TrustedLen<Item = P>,
327 {
328 unsafe { Self::from_trusted_len_iter_unchecked(iterator) }
330 }
331
332 #[inline]
337 pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self
338 where
339 P: AsRef<str>,
340 I: Iterator<Item = P>,
341 {
342 let iterator = iterator.map(StrAsBytes);
343 let (offsets, values) = trusted_len_values_iter(iterator);
344
345 Self::new_unchecked(Self::default_data_type(), offsets, values)
347 }
348
349 pub fn try_from_iter<P: AsRef<str>, I: IntoIterator<Item = P>>(iter: I) -> Result<Self> {
354 let iterator = iter.into_iter();
355 let (lower, _) = iterator.size_hint();
356 let mut array = Self::with_capacity(lower);
357 for item in iterator {
358 array.try_push(item)?;
359 }
360 Ok(array)
361 }
362
363 pub fn extend_fallible<T, I, E>(&mut self, iter: I) -> std::result::Result<(), E>
365 where
366 E: std::error::Error,
367 I: IntoIterator<Item = std::result::Result<T, E>>,
368 T: AsRef<str>,
369 {
370 let mut iter = iter.into_iter();
371 self.reserve(iter.size_hint().0, 0);
372 iter.try_for_each(|x| {
373 self.push(x?);
374 Ok(())
375 })
376 }
377}
378
379impl<O: Offset, T: AsRef<str>> Extend<T> for MutableUtf8ValuesArray<O> {
380 fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
381 extend_from_values_iter(
382 &mut self.offsets,
383 &mut self.values,
384 iter.into_iter().map(StrAsBytes),
385 );
386 }
387}
388
389impl<O: Offset, T: AsRef<str>> TryExtend<T> for MutableUtf8ValuesArray<O> {
390 fn try_extend<I: IntoIterator<Item = T>>(&mut self, iter: I) -> Result<()> {
391 let mut iter = iter.into_iter();
392 self.reserve(iter.size_hint().0, 0);
393 iter.try_for_each(|x| self.try_push(x))
394 }
395}
396
397impl<O: Offset, T: AsRef<str>> TryPush<T> for MutableUtf8ValuesArray<O> {
398 #[inline]
399 fn try_push(&mut self, value: T) -> Result<()> {
400 let bytes = value.as_ref().as_bytes();
401 self.values.extend_from_slice(bytes);
402 self.offsets.try_push_usize(bytes.len())
403 }
404}
405
406impl<O: Offset> TryExtendFromSelf for MutableUtf8ValuesArray<O> {
407 fn try_extend_from_self(&mut self, other: &Self) -> Result<()> {
408 self.values.extend_from_slice(&other.values);
409 self.offsets.try_extend_from_self(&other.offsets)
410 }
411}