polars_arrow/legacy/array/
utf8.rs1use crate::array::{BinaryArray, Utf8Array};
2use crate::datatypes::ArrowDataType;
3use crate::legacy::trusted_len::TrustedLenPush;
4use crate::offset::Offsets;
5
6#[inline]
7unsafe fn extend_from_trusted_len_values_iter<I, P>(
8 offsets: &mut Vec<i64>,
9 values: &mut Vec<u8>,
10 iterator: I,
11) where
12 P: AsRef<[u8]>,
13 I: Iterator<Item = P>,
14{
15 let mut total_length = 0;
16 offsets.push(total_length);
17 iterator.for_each(|item| {
18 let s = item.as_ref();
19 values.extend_from_slice(s);
21
22 total_length += s.len() as i64;
23 offsets.push_unchecked(total_length);
24 });
25}
26
27#[inline]
30unsafe fn fill_offsets_and_values<I, P>(
31 iterator: I,
32 value_capacity: usize,
33 len: usize,
34) -> (Offsets<i64>, Vec<u8>)
35where
36 P: AsRef<[u8]>,
37 I: Iterator<Item = P>,
38{
39 let mut offsets = Vec::with_capacity(len + 1);
40 let mut values = Vec::<u8>::with_capacity(value_capacity);
41
42 extend_from_trusted_len_values_iter(&mut offsets, &mut values, iterator);
43
44 (Offsets::new_unchecked(offsets), values)
45}
46
47struct StrAsBytes<P>(P);
48impl<T: AsRef<str>> AsRef<[u8]> for StrAsBytes<T> {
49 #[inline(always)]
50 fn as_ref(&self) -> &[u8] {
51 self.0.as_ref().as_bytes()
52 }
53}
54
55pub trait Utf8FromIter {
56 #[inline]
57 fn from_values_iter<I, S>(iter: I, len: usize, size_hint: usize) -> Utf8Array<i64>
58 where
59 S: AsRef<str>,
60 I: Iterator<Item = S>,
61 {
62 let iter = iter.map(StrAsBytes);
63 let (offsets, values) = unsafe { fill_offsets_and_values(iter, size_hint, len) };
64 unsafe {
65 Utf8Array::new_unchecked(
66 ArrowDataType::LargeUtf8,
67 offsets.into(),
68 values.into(),
69 None,
70 )
71 }
72 }
73}
74
75impl Utf8FromIter for Utf8Array<i64> {}
76
77pub trait BinaryFromIter {
78 #[inline]
79 fn from_values_iter<I, S>(iter: I, len: usize, value_cap: usize) -> BinaryArray<i64>
80 where
81 S: AsRef<[u8]>,
82 I: Iterator<Item = S>,
83 {
84 let (offsets, values) = unsafe { fill_offsets_and_values(iter, value_cap, len) };
85 BinaryArray::new(
86 ArrowDataType::LargeBinary,
87 offsets.into(),
88 values.into(),
89 None,
90 )
91 }
92}
93
94impl BinaryFromIter for BinaryArray<i64> {}