narrow/array/
string.rs

1//! Array with string values.
2
3use std::{iter::Map, str};
4
5use super::{Array, VariableSizeBinaryArray};
6use crate::{
7    bitmap::{Bitmap, BitmapRef, BitmapRefMut, ValidityBitmap},
8    buffer::{BufferType, VecBuffer},
9    nullability::{NonNullable, Nullability, Nullable},
10    offset::Offset,
11    Index, Length,
12};
13
14/// Array with string values.
15pub struct StringArray<
16    Nullable: Nullability = NonNullable,
17    OffsetItem: Offset = i32,
18    Buffer: BufferType = VecBuffer,
19>(pub VariableSizeBinaryArray<Nullable, OffsetItem, Buffer>);
20
21/// Array with string values, using `i32` offset values.
22pub type Utf8Array<Nullable = NonNullable, Buffer = VecBuffer> = StringArray<Nullable, i32, Buffer>;
23
24/// Array with string values, using `i64` offset values.
25pub type LargeUtf8Array<Nullable = NonNullable, Buffer = VecBuffer> =
26    StringArray<Nullable, i64, Buffer>;
27
28impl<Nullable: Nullability, OffsetItem: Offset, Buffer: BufferType>
29    StringArray<Nullable, OffsetItem, Buffer>
30where
31    StringArray<Nullable, OffsetItem, Buffer>: Index + Length,
32{
33    /// Returns an iterator over the items in this [`StringArray`].
34    pub fn iter(&self) -> StringIter<'_, Nullable, OffsetItem, Buffer> {
35        <&Self as IntoIterator>::into_iter(self)
36    }
37}
38
39impl<Nullable: Nullability, OffsetItem: Offset, Buffer: BufferType> Array
40    for StringArray<Nullable, OffsetItem, Buffer>
41{
42    type Item = Nullable::Item<String>;
43}
44
45impl<Nullable: Nullability, OffsetItem: Offset, Buffer: BufferType> Clone
46    for StringArray<Nullable, OffsetItem, Buffer>
47where
48    VariableSizeBinaryArray<Nullable, OffsetItem, Buffer>: Clone,
49{
50    fn clone(&self) -> Self {
51        Self(self.0.clone())
52    }
53}
54
55impl<Nullable: Nullability, OffsetItem: Offset, Buffer: BufferType> Default
56    for StringArray<Nullable, OffsetItem, Buffer>
57where
58    VariableSizeBinaryArray<Nullable, OffsetItem, Buffer>: Default,
59{
60    fn default() -> Self {
61        Self(VariableSizeBinaryArray::default())
62    }
63}
64
65impl<'a, T: ?Sized, OffsetItem: Offset, Buffer: BufferType> Extend<&'a T>
66    for StringArray<NonNullable, OffsetItem, Buffer>
67where
68    T: AsRef<str>,
69    VariableSizeBinaryArray<NonNullable, OffsetItem, Buffer>: Extend<&'a [u8]>,
70{
71    fn extend<I: IntoIterator<Item = &'a T>>(&mut self, iter: I) {
72        self.0
73            .extend(iter.into_iter().map(|item| item.as_ref().as_bytes()));
74    }
75}
76
77impl<'a, T: ?Sized, OffsetItem: Offset, Buffer: BufferType> Extend<Option<&'a T>>
78    for StringArray<Nullable, OffsetItem, Buffer>
79where
80    T: AsRef<str>,
81    VariableSizeBinaryArray<Nullable, OffsetItem, Buffer>: Extend<Option<&'a [u8]>>,
82{
83    fn extend<I: IntoIterator<Item = Option<&'a T>>>(&mut self, iter: I) {
84        self.0.extend(
85            iter.into_iter()
86                .map(|opt| opt.map(|item| item.as_ref().as_bytes())),
87        );
88    }
89}
90
91impl<OffsetItem: Offset, Buffer: BufferType> Extend<String>
92    for StringArray<NonNullable, OffsetItem, Buffer>
93where
94    VariableSizeBinaryArray<NonNullable, OffsetItem, Buffer>: Extend<Vec<u8>>,
95{
96    fn extend<I: IntoIterator<Item = String>>(&mut self, iter: I) {
97        self.0.extend(iter.into_iter().map(String::into_bytes));
98    }
99}
100
101impl<OffsetItem: Offset, Buffer: BufferType> Extend<Option<String>>
102    for StringArray<Nullable, OffsetItem, Buffer>
103where
104    VariableSizeBinaryArray<Nullable, OffsetItem, Buffer>: Extend<Option<Vec<u8>>>,
105{
106    fn extend<I: IntoIterator<Item = Option<String>>>(&mut self, iter: I) {
107        self.0
108            .extend(iter.into_iter().map(|opt| opt.map(String::into_bytes)));
109    }
110}
111
112impl<OffsetItem: Offset, Buffer: BufferType> From<StringArray<NonNullable, OffsetItem, Buffer>>
113    for StringArray<Nullable, OffsetItem, Buffer>
114where
115    VariableSizeBinaryArray<NonNullable, OffsetItem, Buffer>:
116        Into<VariableSizeBinaryArray<Nullable, OffsetItem, Buffer>>,
117{
118    fn from(value: StringArray<NonNullable, OffsetItem, Buffer>) -> Self {
119        Self(value.0.into())
120    }
121}
122
123impl<'a, T: ?Sized, OffsetItem: Offset, Buffer: BufferType> FromIterator<&'a T>
124    for StringArray<NonNullable, OffsetItem, Buffer>
125where
126    T: AsRef<str>,
127    VariableSizeBinaryArray<NonNullable, OffsetItem, Buffer>: FromIterator<&'a [u8]>,
128{
129    fn from_iter<I: IntoIterator<Item = &'a T>>(iter: I) -> Self {
130        Self(
131            iter.into_iter()
132                .map(|item| item.as_ref().as_bytes())
133                .collect(),
134        )
135    }
136}
137
138impl<'a, T: ?Sized, OffsetItem: Offset, Buffer: BufferType> FromIterator<Option<&'a T>>
139    for StringArray<Nullable, OffsetItem, Buffer>
140where
141    T: AsRef<str>,
142    VariableSizeBinaryArray<Nullable, OffsetItem, Buffer>: FromIterator<Option<&'a [u8]>>,
143{
144    fn from_iter<I: IntoIterator<Item = Option<&'a T>>>(iter: I) -> Self {
145        Self(
146            iter.into_iter()
147                .map(|x| x.map(|item| item.as_ref().as_bytes()))
148                .collect(),
149        )
150    }
151}
152
153impl<OffsetItem: Offset, Buffer: BufferType> FromIterator<String>
154    for StringArray<NonNullable, OffsetItem, Buffer>
155where
156    VariableSizeBinaryArray<NonNullable, OffsetItem, Buffer>: FromIterator<Vec<u8>>,
157{
158    fn from_iter<I: IntoIterator<Item = String>>(iter: I) -> Self {
159        Self(iter.into_iter().map(String::into_bytes).collect())
160    }
161}
162
163impl<OffsetItem: Offset, Buffer: BufferType> FromIterator<Option<String>>
164    for StringArray<Nullable, OffsetItem, Buffer>
165where
166    VariableSizeBinaryArray<Nullable, OffsetItem, Buffer>: FromIterator<Option<Vec<u8>>>,
167{
168    fn from_iter<I: IntoIterator<Item = Option<String>>>(iter: I) -> Self {
169        Self(
170            iter.into_iter()
171                .map(|x| x.map(String::into_bytes))
172                .collect(),
173        )
174    }
175}
176
177impl<OffsetItem: Offset, Buffer: BufferType> Index
178    for StringArray<NonNullable, OffsetItem, Buffer>
179{
180    type Item<'a>
181        = &'a str
182    where
183        Self: 'a;
184
185    unsafe fn index_unchecked(&self, index: usize) -> Self::Item<'_> {
186        str::from_utf8_unchecked(self.0.index_unchecked(index))
187    }
188}
189
190impl<OffsetItem: Offset, Buffer: BufferType> Index for StringArray<Nullable, OffsetItem, Buffer> {
191    type Item<'a>
192        = Option<&'a str>
193    where
194        Self: 'a;
195
196    unsafe fn index_unchecked(&self, index: usize) -> Self::Item<'_> {
197        self.0
198            .index_unchecked(index)
199            .map(|bytes| str::from_utf8_unchecked(bytes))
200    }
201}
202
203/// An iterator over strings in a [`StringArray`].
204pub struct StringIter<'a, Nullable: Nullability, OffsetItem: Offset, Buffer: BufferType> {
205    /// Reference to the array.
206    array: &'a StringArray<Nullable, OffsetItem, Buffer>,
207    /// Current index.
208    index: usize,
209}
210
211impl<'a, Nullable: Nullability, OffsetItem: Offset, Buffer: BufferType> Iterator
212    for StringIter<'a, Nullable, OffsetItem, Buffer>
213where
214    StringArray<Nullable, OffsetItem, Buffer>: Length + Index,
215{
216    type Item = <StringArray<Nullable, OffsetItem, Buffer> as Index>::Item<'a>;
217
218    fn next(&mut self) -> Option<Self::Item> {
219        self.array
220            .index(self.index)
221            .into_iter()
222            .inspect(|_| {
223                self.index += 1;
224            })
225            .next()
226    }
227}
228
229impl<'a, Nullable: Nullability, OffsetItem: Offset, Buffer: BufferType> IntoIterator
230    for &'a StringArray<Nullable, OffsetItem, Buffer>
231where
232    StringArray<Nullable, OffsetItem, Buffer>: Index + Length,
233{
234    type Item = <StringArray<Nullable, OffsetItem, Buffer> as Index>::Item<'a>;
235    type IntoIter = StringIter<'a, Nullable, OffsetItem, Buffer>;
236
237    fn into_iter(self) -> Self::IntoIter {
238        StringIter {
239            array: self,
240            index: 0,
241        }
242    }
243}
244
245impl<OffsetItem: Offset, Buffer: BufferType> IntoIterator
246    for StringArray<NonNullable, OffsetItem, Buffer>
247where
248    VariableSizeBinaryArray<NonNullable, OffsetItem, Buffer>: IntoIterator<Item = Vec<u8>>,
249{
250    type Item = String;
251    type IntoIter = Map<
252        <VariableSizeBinaryArray<NonNullable, OffsetItem, Buffer> as IntoIterator>::IntoIter,
253        fn(
254            <VariableSizeBinaryArray<NonNullable, OffsetItem, Buffer> as IntoIterator>::Item,
255        ) -> String,
256    >;
257
258    fn into_iter(self) -> Self::IntoIter {
259        self.0.into_iter().map(|bytes| {
260            // SAFETY:
261            // - String arrays contain valid UTF8.
262            unsafe { String::from_utf8_unchecked(bytes) }
263        })
264    }
265}
266
267impl<OffsetItem: Offset, Buffer: BufferType> IntoIterator
268    for StringArray<Nullable, OffsetItem, Buffer>
269where
270    VariableSizeBinaryArray<Nullable, OffsetItem, Buffer>: IntoIterator<Item = Option<Vec<u8>>>,
271{
272    type Item = Option<String>;
273    type IntoIter = Map<
274        <VariableSizeBinaryArray<Nullable, OffsetItem, Buffer> as IntoIterator>::IntoIter,
275        fn(
276            <VariableSizeBinaryArray<Nullable, OffsetItem, Buffer> as IntoIterator>::Item,
277        ) -> Option<String>,
278    >;
279
280    fn into_iter(self) -> Self::IntoIter {
281        self.0.into_iter().map(|opt| {
282            opt.map(|bytes| {
283                // SAFETY:
284                // - String arrays contain valid UTF8.
285                unsafe { String::from_utf8_unchecked(bytes) }
286            })
287        })
288    }
289}
290
291impl<Nullable: Nullability, OffsetItem: Offset, Buffer: BufferType> Length
292    for StringArray<Nullable, OffsetItem, Buffer>
293where
294    VariableSizeBinaryArray<Nullable, OffsetItem, Buffer>: Length,
295{
296    fn len(&self) -> usize {
297        self.0.len()
298    }
299}
300
301impl<OffsetItem: Offset, Buffer: BufferType> BitmapRef
302    for StringArray<Nullable, OffsetItem, Buffer>
303{
304    type Buffer = Buffer;
305
306    fn bitmap_ref(&self) -> &Bitmap<Self::Buffer> {
307        self.0.bitmap_ref()
308    }
309}
310
311impl<OffsetItem: Offset, Buffer: BufferType> BitmapRefMut
312    for StringArray<Nullable, OffsetItem, Buffer>
313{
314    fn bitmap_ref_mut(&mut self) -> &mut Bitmap<Self::Buffer> {
315        self.0.bitmap_ref_mut()
316    }
317}
318
319impl<OffsetItem: Offset, Buffer: BufferType> ValidityBitmap
320    for StringArray<Nullable, OffsetItem, Buffer>
321{
322}
323
324#[cfg(test)]
325mod tests {
326    use super::*;
327    use crate::{array::ArrayTypeOf, buffer::BufferRef};
328
329    #[test]
330    fn from_iter() {
331        let input = ["1", "23", "456", "7890"];
332        let array = input.into_iter().collect::<ArrayTypeOf<String>>();
333        assert_eq!(array.len(), 4);
334        assert_eq!(array.0 .0.data.0, b"1234567890");
335
336        let input_string = vec!["a".to_owned(), "sd".to_owned(), "f".to_owned()];
337        let array_string = input_string.into_iter().collect::<StringArray>();
338        assert_eq!(array_string.len(), 3);
339        assert_eq!(array_string.0 .0.data.0, &[97, 115, 100, 102]);
340        assert_eq!(array_string.0 .0.offsets, &[0, 1, 3, 4]);
341    }
342
343    #[test]
344    fn from_iter_nullable() {
345        let input = vec![Some("a"), None, Some("sd"), Some("f"), None];
346        let array = input.into_iter().collect::<StringArray<Nullable>>();
347        assert_eq!(array.len(), 5);
348        assert_eq!(array.is_valid(0), Some(true));
349        assert_eq!(array.is_valid(1), Some(false));
350        assert_eq!(array.is_valid(2), Some(true));
351        assert_eq!(array.is_valid(3), Some(true));
352        assert_eq!(array.is_valid(4), Some(false));
353        assert_eq!(array.is_valid(5), None);
354        assert_eq!(array.0 .0.data.0, "asdf".as_bytes());
355        assert_eq!(array.0 .0.offsets.as_ref(), &[0, 1, 1, 3, 4, 4]);
356        assert_eq!(
357            array.bitmap_ref().into_iter().collect::<Vec<_>>(),
358            &[true, false, true, true, false]
359        );
360    }
361
362    #[test]
363    fn into_iter() {
364        let input = ["1", "23", "456", "7890"];
365        let array = input.into_iter().collect::<StringArray>();
366        assert_eq!(array.into_iter().collect::<Vec<_>>(), input);
367
368        let input_nullable = vec![
369            Some("a".to_owned()),
370            None,
371            Some("sd".to_owned()),
372            Some("f".to_owned()),
373            None,
374        ];
375        let array_nullable = input_nullable
376            .clone()
377            .into_iter()
378            .collect::<StringArray<Nullable>>();
379        let output = array_nullable.into_iter().collect::<Vec<_>>();
380        assert_eq!(output, input_nullable);
381    }
382
383    #[test]
384    fn convert_nullable() {
385        let input = ["hello", " ", "world"];
386        let array = input
387            .into_iter()
388            .map(ToOwned::to_owned)
389            .collect::<StringArray>();
390        let nullable: StringArray<Nullable> = array.into();
391        assert_eq!(nullable.bitmap_ref().buffer_ref(), &[0b0000_0111]);
392    }
393}