1use std::{iter::Map, str};
4
5use super::{Array, VariableSizeBinaryArray};
6use crate::{
7 bitmap::{Bitmap, BitmapRef, BitmapRefMut, ValidityBitmap},
8 buffer::{BufferType, VecBuffer},
9 nullability::{NonNullable, Nullability, Nullable},
10 offset::Offset,
11 Index, Length,
12};
13
14pub struct StringArray<
16 Nullable: Nullability = NonNullable,
17 OffsetItem: Offset = i32,
18 Buffer: BufferType = VecBuffer,
19>(pub VariableSizeBinaryArray<Nullable, OffsetItem, Buffer>);
20
21pub type Utf8Array<Nullable = NonNullable, Buffer = VecBuffer> = StringArray<Nullable, i32, Buffer>;
23
24pub type LargeUtf8Array<Nullable = NonNullable, Buffer = VecBuffer> =
26 StringArray<Nullable, i64, Buffer>;
27
28impl<Nullable: Nullability, OffsetItem: Offset, Buffer: BufferType>
29 StringArray<Nullable, OffsetItem, Buffer>
30where
31 StringArray<Nullable, OffsetItem, Buffer>: Index + Length,
32{
33 pub fn iter(&self) -> StringIter<'_, Nullable, OffsetItem, Buffer> {
35 <&Self as IntoIterator>::into_iter(self)
36 }
37}
38
39impl<Nullable: Nullability, OffsetItem: Offset, Buffer: BufferType> Array
40 for StringArray<Nullable, OffsetItem, Buffer>
41{
42 type Item = Nullable::Item<String>;
43}
44
45impl<Nullable: Nullability, OffsetItem: Offset, Buffer: BufferType> Clone
46 for StringArray<Nullable, OffsetItem, Buffer>
47where
48 VariableSizeBinaryArray<Nullable, OffsetItem, Buffer>: Clone,
49{
50 fn clone(&self) -> Self {
51 Self(self.0.clone())
52 }
53}
54
55impl<Nullable: Nullability, OffsetItem: Offset, Buffer: BufferType> Default
56 for StringArray<Nullable, OffsetItem, Buffer>
57where
58 VariableSizeBinaryArray<Nullable, OffsetItem, Buffer>: Default,
59{
60 fn default() -> Self {
61 Self(VariableSizeBinaryArray::default())
62 }
63}
64
65impl<'a, T: ?Sized, OffsetItem: Offset, Buffer: BufferType> Extend<&'a T>
66 for StringArray<NonNullable, OffsetItem, Buffer>
67where
68 T: AsRef<str>,
69 VariableSizeBinaryArray<NonNullable, OffsetItem, Buffer>: Extend<&'a [u8]>,
70{
71 fn extend<I: IntoIterator<Item = &'a T>>(&mut self, iter: I) {
72 self.0
73 .extend(iter.into_iter().map(|item| item.as_ref().as_bytes()));
74 }
75}
76
77impl<'a, T: ?Sized, OffsetItem: Offset, Buffer: BufferType> Extend<Option<&'a T>>
78 for StringArray<Nullable, OffsetItem, Buffer>
79where
80 T: AsRef<str>,
81 VariableSizeBinaryArray<Nullable, OffsetItem, Buffer>: Extend<Option<&'a [u8]>>,
82{
83 fn extend<I: IntoIterator<Item = Option<&'a T>>>(&mut self, iter: I) {
84 self.0.extend(
85 iter.into_iter()
86 .map(|opt| opt.map(|item| item.as_ref().as_bytes())),
87 );
88 }
89}
90
91impl<OffsetItem: Offset, Buffer: BufferType> Extend<String>
92 for StringArray<NonNullable, OffsetItem, Buffer>
93where
94 VariableSizeBinaryArray<NonNullable, OffsetItem, Buffer>: Extend<Vec<u8>>,
95{
96 fn extend<I: IntoIterator<Item = String>>(&mut self, iter: I) {
97 self.0.extend(iter.into_iter().map(String::into_bytes));
98 }
99}
100
101impl<OffsetItem: Offset, Buffer: BufferType> Extend<Option<String>>
102 for StringArray<Nullable, OffsetItem, Buffer>
103where
104 VariableSizeBinaryArray<Nullable, OffsetItem, Buffer>: Extend<Option<Vec<u8>>>,
105{
106 fn extend<I: IntoIterator<Item = Option<String>>>(&mut self, iter: I) {
107 self.0
108 .extend(iter.into_iter().map(|opt| opt.map(String::into_bytes)));
109 }
110}
111
112impl<OffsetItem: Offset, Buffer: BufferType> From<StringArray<NonNullable, OffsetItem, Buffer>>
113 for StringArray<Nullable, OffsetItem, Buffer>
114where
115 VariableSizeBinaryArray<NonNullable, OffsetItem, Buffer>:
116 Into<VariableSizeBinaryArray<Nullable, OffsetItem, Buffer>>,
117{
118 fn from(value: StringArray<NonNullable, OffsetItem, Buffer>) -> Self {
119 Self(value.0.into())
120 }
121}
122
123impl<'a, T: ?Sized, OffsetItem: Offset, Buffer: BufferType> FromIterator<&'a T>
124 for StringArray<NonNullable, OffsetItem, Buffer>
125where
126 T: AsRef<str>,
127 VariableSizeBinaryArray<NonNullable, OffsetItem, Buffer>: FromIterator<&'a [u8]>,
128{
129 fn from_iter<I: IntoIterator<Item = &'a T>>(iter: I) -> Self {
130 Self(
131 iter.into_iter()
132 .map(|item| item.as_ref().as_bytes())
133 .collect(),
134 )
135 }
136}
137
138impl<'a, T: ?Sized, OffsetItem: Offset, Buffer: BufferType> FromIterator<Option<&'a T>>
139 for StringArray<Nullable, OffsetItem, Buffer>
140where
141 T: AsRef<str>,
142 VariableSizeBinaryArray<Nullable, OffsetItem, Buffer>: FromIterator<Option<&'a [u8]>>,
143{
144 fn from_iter<I: IntoIterator<Item = Option<&'a T>>>(iter: I) -> Self {
145 Self(
146 iter.into_iter()
147 .map(|x| x.map(|item| item.as_ref().as_bytes()))
148 .collect(),
149 )
150 }
151}
152
153impl<OffsetItem: Offset, Buffer: BufferType> FromIterator<String>
154 for StringArray<NonNullable, OffsetItem, Buffer>
155where
156 VariableSizeBinaryArray<NonNullable, OffsetItem, Buffer>: FromIterator<Vec<u8>>,
157{
158 fn from_iter<I: IntoIterator<Item = String>>(iter: I) -> Self {
159 Self(iter.into_iter().map(String::into_bytes).collect())
160 }
161}
162
163impl<OffsetItem: Offset, Buffer: BufferType> FromIterator<Option<String>>
164 for StringArray<Nullable, OffsetItem, Buffer>
165where
166 VariableSizeBinaryArray<Nullable, OffsetItem, Buffer>: FromIterator<Option<Vec<u8>>>,
167{
168 fn from_iter<I: IntoIterator<Item = Option<String>>>(iter: I) -> Self {
169 Self(
170 iter.into_iter()
171 .map(|x| x.map(String::into_bytes))
172 .collect(),
173 )
174 }
175}
176
177impl<OffsetItem: Offset, Buffer: BufferType> Index
178 for StringArray<NonNullable, OffsetItem, Buffer>
179{
180 type Item<'a>
181 = &'a str
182 where
183 Self: 'a;
184
185 unsafe fn index_unchecked(&self, index: usize) -> Self::Item<'_> {
186 str::from_utf8_unchecked(self.0.index_unchecked(index))
187 }
188}
189
190impl<OffsetItem: Offset, Buffer: BufferType> Index for StringArray<Nullable, OffsetItem, Buffer> {
191 type Item<'a>
192 = Option<&'a str>
193 where
194 Self: 'a;
195
196 unsafe fn index_unchecked(&self, index: usize) -> Self::Item<'_> {
197 self.0
198 .index_unchecked(index)
199 .map(|bytes| str::from_utf8_unchecked(bytes))
200 }
201}
202
203pub struct StringIter<'a, Nullable: Nullability, OffsetItem: Offset, Buffer: BufferType> {
205 array: &'a StringArray<Nullable, OffsetItem, Buffer>,
207 index: usize,
209}
210
211impl<'a, Nullable: Nullability, OffsetItem: Offset, Buffer: BufferType> Iterator
212 for StringIter<'a, Nullable, OffsetItem, Buffer>
213where
214 StringArray<Nullable, OffsetItem, Buffer>: Length + Index,
215{
216 type Item = <StringArray<Nullable, OffsetItem, Buffer> as Index>::Item<'a>;
217
218 fn next(&mut self) -> Option<Self::Item> {
219 self.array
220 .index(self.index)
221 .into_iter()
222 .inspect(|_| {
223 self.index += 1;
224 })
225 .next()
226 }
227}
228
229impl<'a, Nullable: Nullability, OffsetItem: Offset, Buffer: BufferType> IntoIterator
230 for &'a StringArray<Nullable, OffsetItem, Buffer>
231where
232 StringArray<Nullable, OffsetItem, Buffer>: Index + Length,
233{
234 type Item = <StringArray<Nullable, OffsetItem, Buffer> as Index>::Item<'a>;
235 type IntoIter = StringIter<'a, Nullable, OffsetItem, Buffer>;
236
237 fn into_iter(self) -> Self::IntoIter {
238 StringIter {
239 array: self,
240 index: 0,
241 }
242 }
243}
244
245impl<OffsetItem: Offset, Buffer: BufferType> IntoIterator
246 for StringArray<NonNullable, OffsetItem, Buffer>
247where
248 VariableSizeBinaryArray<NonNullable, OffsetItem, Buffer>: IntoIterator<Item = Vec<u8>>,
249{
250 type Item = String;
251 type IntoIter = Map<
252 <VariableSizeBinaryArray<NonNullable, OffsetItem, Buffer> as IntoIterator>::IntoIter,
253 fn(
254 <VariableSizeBinaryArray<NonNullable, OffsetItem, Buffer> as IntoIterator>::Item,
255 ) -> String,
256 >;
257
258 fn into_iter(self) -> Self::IntoIter {
259 self.0.into_iter().map(|bytes| {
260 unsafe { String::from_utf8_unchecked(bytes) }
263 })
264 }
265}
266
267impl<OffsetItem: Offset, Buffer: BufferType> IntoIterator
268 for StringArray<Nullable, OffsetItem, Buffer>
269where
270 VariableSizeBinaryArray<Nullable, OffsetItem, Buffer>: IntoIterator<Item = Option<Vec<u8>>>,
271{
272 type Item = Option<String>;
273 type IntoIter = Map<
274 <VariableSizeBinaryArray<Nullable, OffsetItem, Buffer> as IntoIterator>::IntoIter,
275 fn(
276 <VariableSizeBinaryArray<Nullable, OffsetItem, Buffer> as IntoIterator>::Item,
277 ) -> Option<String>,
278 >;
279
280 fn into_iter(self) -> Self::IntoIter {
281 self.0.into_iter().map(|opt| {
282 opt.map(|bytes| {
283 unsafe { String::from_utf8_unchecked(bytes) }
286 })
287 })
288 }
289}
290
291impl<Nullable: Nullability, OffsetItem: Offset, Buffer: BufferType> Length
292 for StringArray<Nullable, OffsetItem, Buffer>
293where
294 VariableSizeBinaryArray<Nullable, OffsetItem, Buffer>: Length,
295{
296 fn len(&self) -> usize {
297 self.0.len()
298 }
299}
300
301impl<OffsetItem: Offset, Buffer: BufferType> BitmapRef
302 for StringArray<Nullable, OffsetItem, Buffer>
303{
304 type Buffer = Buffer;
305
306 fn bitmap_ref(&self) -> &Bitmap<Self::Buffer> {
307 self.0.bitmap_ref()
308 }
309}
310
311impl<OffsetItem: Offset, Buffer: BufferType> BitmapRefMut
312 for StringArray<Nullable, OffsetItem, Buffer>
313{
314 fn bitmap_ref_mut(&mut self) -> &mut Bitmap<Self::Buffer> {
315 self.0.bitmap_ref_mut()
316 }
317}
318
319impl<OffsetItem: Offset, Buffer: BufferType> ValidityBitmap
320 for StringArray<Nullable, OffsetItem, Buffer>
321{
322}
323
324#[cfg(test)]
325mod tests {
326 use super::*;
327 use crate::{array::ArrayTypeOf, buffer::BufferRef};
328
329 #[test]
330 fn from_iter() {
331 let input = ["1", "23", "456", "7890"];
332 let array = input.into_iter().collect::<ArrayTypeOf<String>>();
333 assert_eq!(array.len(), 4);
334 assert_eq!(array.0 .0.data.0, b"1234567890");
335
336 let input_string = vec!["a".to_owned(), "sd".to_owned(), "f".to_owned()];
337 let array_string = input_string.into_iter().collect::<StringArray>();
338 assert_eq!(array_string.len(), 3);
339 assert_eq!(array_string.0 .0.data.0, &[97, 115, 100, 102]);
340 assert_eq!(array_string.0 .0.offsets, &[0, 1, 3, 4]);
341 }
342
343 #[test]
344 fn from_iter_nullable() {
345 let input = vec![Some("a"), None, Some("sd"), Some("f"), None];
346 let array = input.into_iter().collect::<StringArray<Nullable>>();
347 assert_eq!(array.len(), 5);
348 assert_eq!(array.is_valid(0), Some(true));
349 assert_eq!(array.is_valid(1), Some(false));
350 assert_eq!(array.is_valid(2), Some(true));
351 assert_eq!(array.is_valid(3), Some(true));
352 assert_eq!(array.is_valid(4), Some(false));
353 assert_eq!(array.is_valid(5), None);
354 assert_eq!(array.0 .0.data.0, "asdf".as_bytes());
355 assert_eq!(array.0 .0.offsets.as_ref(), &[0, 1, 1, 3, 4, 4]);
356 assert_eq!(
357 array.bitmap_ref().into_iter().collect::<Vec<_>>(),
358 &[true, false, true, true, false]
359 );
360 }
361
362 #[test]
363 fn into_iter() {
364 let input = ["1", "23", "456", "7890"];
365 let array = input.into_iter().collect::<StringArray>();
366 assert_eq!(array.into_iter().collect::<Vec<_>>(), input);
367
368 let input_nullable = vec![
369 Some("a".to_owned()),
370 None,
371 Some("sd".to_owned()),
372 Some("f".to_owned()),
373 None,
374 ];
375 let array_nullable = input_nullable
376 .clone()
377 .into_iter()
378 .collect::<StringArray<Nullable>>();
379 let output = array_nullable.into_iter().collect::<Vec<_>>();
380 assert_eq!(output, input_nullable);
381 }
382
383 #[test]
384 fn convert_nullable() {
385 let input = ["hello", " ", "world"];
386 let array = input
387 .into_iter()
388 .map(ToOwned::to_owned)
389 .collect::<StringArray>();
390 let nullable: StringArray<Nullable> = array.into();
391 assert_eq!(nullable.bitmap_ref().buffer_ref(), &[0b0000_0111]);
392 }
393}