polars_arrow/legacy/array/
utf8.rs

1use crate::array::{BinaryArray, Utf8Array};
2use crate::datatypes::ArrowDataType;
3use crate::legacy::trusted_len::TrustedLenPush;
4use crate::offset::Offsets;
5
6#[inline]
7unsafe fn extend_from_trusted_len_values_iter<I, P>(
8    offsets: &mut Vec<i64>,
9    values: &mut Vec<u8>,
10    iterator: I,
11) where
12    P: AsRef<[u8]>,
13    I: Iterator<Item = P>,
14{
15    let mut total_length = 0;
16    offsets.push(total_length);
17    iterator.for_each(|item| {
18        let s = item.as_ref();
19        // Push new entries for both `values` and `offsets` buffer
20        values.extend_from_slice(s);
21
22        total_length += s.len() as i64;
23        offsets.push_unchecked(total_length);
24    });
25}
26
27/// # Safety
28/// reported `len` must be correct.
29#[inline]
30unsafe fn fill_offsets_and_values<I, P>(
31    iterator: I,
32    value_capacity: usize,
33    len: usize,
34) -> (Offsets<i64>, Vec<u8>)
35where
36    P: AsRef<[u8]>,
37    I: Iterator<Item = P>,
38{
39    let mut offsets = Vec::with_capacity(len + 1);
40    let mut values = Vec::<u8>::with_capacity(value_capacity);
41
42    extend_from_trusted_len_values_iter(&mut offsets, &mut values, iterator);
43
44    (Offsets::new_unchecked(offsets), values)
45}
46
47struct StrAsBytes<P>(P);
48impl<T: AsRef<str>> AsRef<[u8]> for StrAsBytes<T> {
49    #[inline(always)]
50    fn as_ref(&self) -> &[u8] {
51        self.0.as_ref().as_bytes()
52    }
53}
54
55pub trait Utf8FromIter {
56    #[inline]
57    fn from_values_iter<I, S>(iter: I, len: usize, size_hint: usize) -> Utf8Array<i64>
58    where
59        S: AsRef<str>,
60        I: Iterator<Item = S>,
61    {
62        let iter = iter.map(StrAsBytes);
63        let (offsets, values) = unsafe { fill_offsets_and_values(iter, size_hint, len) };
64        unsafe {
65            Utf8Array::new_unchecked(
66                ArrowDataType::LargeUtf8,
67                offsets.into(),
68                values.into(),
69                None,
70            )
71        }
72    }
73}
74
75impl Utf8FromIter for Utf8Array<i64> {}
76
77pub trait BinaryFromIter {
78    #[inline]
79    fn from_values_iter<I, S>(iter: I, len: usize, value_cap: usize) -> BinaryArray<i64>
80    where
81        S: AsRef<[u8]>,
82        I: Iterator<Item = S>,
83    {
84        let (offsets, values) = unsafe { fill_offsets_and_values(iter, value_cap, len) };
85        BinaryArray::new(
86            ArrowDataType::LargeBinary,
87            offsets.into(),
88            values.into(),
89            None,
90        )
91    }
92}
93
94impl BinaryFromIter for BinaryArray<i64> {}