polars_arrow/array/
physical_binary.rs

1use crate::bitmap::{BitmapBuilder, MutableBitmap};
2use crate::offset::{Offset, Offsets};
3
4/// # Safety
5/// The caller must ensure that `iterator` is `TrustedLen`.
6#[inline]
7#[allow(clippy::type_complexity)]
8pub(crate) unsafe fn try_trusted_len_unzip<E, I, P, O>(
9    iterator: I,
10) -> std::result::Result<(Option<MutableBitmap>, Offsets<O>, Vec<u8>), E>
11where
12    O: Offset,
13    P: AsRef<[u8]>,
14    I: Iterator<Item = std::result::Result<Option<P>, E>>,
15{
16    let (_, upper) = iterator.size_hint();
17    let len = upper.expect("trusted_len_unzip requires an upper limit");
18
19    let mut null = BitmapBuilder::with_capacity(len);
20    let mut offsets = Vec::<O>::with_capacity(len + 1);
21    let mut values = Vec::<u8>::new();
22
23    let mut length = O::default();
24    let mut dst = offsets.as_mut_ptr();
25    std::ptr::write(dst, length);
26    dst = dst.add(1);
27    for item in iterator {
28        if let Some(item) = item? {
29            null.push_unchecked(true);
30            let s = item.as_ref();
31            length += O::from_as_usize(s.len());
32            values.extend_from_slice(s);
33        } else {
34            null.push_unchecked(false);
35        };
36
37        std::ptr::write(dst, length);
38        dst = dst.add(1);
39    }
40    assert_eq!(
41        dst.offset_from(offsets.as_ptr()) as usize,
42        len + 1,
43        "Trusted iterator length was not accurately reported"
44    );
45    offsets.set_len(len + 1);
46
47    Ok((
48        null.into_opt_mut_validity(),
49        Offsets::new_unchecked(offsets),
50        values,
51    ))
52}
53
54/// Creates [`MutableBitmap`] and two [`Vec`]s from an iterator of `Option`.
55/// The first buffer corresponds to a offset buffer, the second one
56/// corresponds to a values buffer.
57/// # Safety
58/// The caller must ensure that `iterator` is `TrustedLen`.
59#[inline]
60pub(crate) unsafe fn trusted_len_unzip<O, I, P>(
61    iterator: I,
62) -> (Option<MutableBitmap>, Offsets<O>, Vec<u8>)
63where
64    O: Offset,
65    P: AsRef<[u8]>,
66    I: Iterator<Item = Option<P>>,
67{
68    let (_, upper) = iterator.size_hint();
69    let len = upper.expect("trusted_len_unzip requires an upper limit");
70
71    let mut offsets = Offsets::<O>::with_capacity(len);
72    let mut values = Vec::<u8>::new();
73    let mut validity = MutableBitmap::new();
74
75    extend_from_trusted_len_iter(&mut offsets, &mut values, &mut validity, iterator);
76
77    let validity = if validity.unset_bits() > 0 {
78        Some(validity)
79    } else {
80        None
81    };
82
83    (validity, offsets, values)
84}
85
86/// Creates two [`Buffer`]s from an iterator of `&[u8]`.
87/// The first buffer corresponds to a offset buffer, the second to a values buffer.
88/// # Safety
89/// The caller must ensure that `iterator` is [`TrustedLen`].
90#[inline]
91pub(crate) unsafe fn trusted_len_values_iter<O, I, P>(iterator: I) -> (Offsets<O>, Vec<u8>)
92where
93    O: Offset,
94    P: AsRef<[u8]>,
95    I: Iterator<Item = P>,
96{
97    let (_, upper) = iterator.size_hint();
98    let len = upper.expect("trusted_len_unzip requires an upper limit");
99
100    let mut offsets = Offsets::<O>::with_capacity(len);
101    let mut values = Vec::<u8>::new();
102
103    extend_from_trusted_len_values_iter(&mut offsets, &mut values, iterator);
104
105    (offsets, values)
106}
107
108// Populates `offsets` and `values` [`Vec`]s with information extracted
109// from the incoming `iterator`.
110// # Safety
111// The caller must ensure the `iterator` is [`TrustedLen`]
112#[inline]
113pub(crate) unsafe fn extend_from_trusted_len_values_iter<I, P, O>(
114    offsets: &mut Offsets<O>,
115    values: &mut Vec<u8>,
116    iterator: I,
117) where
118    O: Offset,
119    P: AsRef<[u8]>,
120    I: Iterator<Item = P>,
121{
122    let lengths = iterator.map(|item| {
123        let s = item.as_ref();
124        // Push new entries for both `values` and `offsets` buffer
125        values.extend_from_slice(s);
126        s.len()
127    });
128    offsets.try_extend_from_lengths(lengths).unwrap();
129}
130
131// Populates `offsets` and `values` [`Vec`]s with information extracted
132// from the incoming `iterator`.
133// the return value indicates how many items were added.
134#[inline]
135pub(crate) fn extend_from_values_iter<I, P, O>(
136    offsets: &mut Offsets<O>,
137    values: &mut Vec<u8>,
138    iterator: I,
139) -> usize
140where
141    O: Offset,
142    P: AsRef<[u8]>,
143    I: Iterator<Item = P>,
144{
145    let (size_hint, _) = iterator.size_hint();
146
147    offsets.reserve(size_hint);
148
149    let start_index = offsets.len_proxy();
150
151    for item in iterator {
152        let bytes = item.as_ref();
153        values.extend_from_slice(bytes);
154        offsets.try_push(bytes.len()).unwrap();
155    }
156    offsets.len_proxy() - start_index
157}
158
159// Populates `offsets`, `values`, and `validity` [`Vec`]s with
160// information extracted from the incoming `iterator`.
161//
162// # Safety
163// The caller must ensure that `iterator` is [`TrustedLen`]
164#[inline]
165pub(crate) unsafe fn extend_from_trusted_len_iter<O, I, P>(
166    offsets: &mut Offsets<O>,
167    values: &mut Vec<u8>,
168    validity: &mut MutableBitmap,
169    iterator: I,
170) where
171    O: Offset,
172    P: AsRef<[u8]>,
173    I: Iterator<Item = Option<P>>,
174{
175    let (_, upper) = iterator.size_hint();
176    let additional = upper.expect("extend_from_trusted_len_iter requires an upper limit");
177
178    offsets.reserve(additional);
179    validity.reserve(additional);
180
181    let lengths = iterator.map(|item| {
182        if let Some(item) = item {
183            let bytes = item.as_ref();
184            values.extend_from_slice(bytes);
185            validity.push_unchecked(true);
186            bytes.len()
187        } else {
188            validity.push_unchecked(false);
189            0
190        }
191    });
192    offsets.try_extend_from_lengths(lengths).unwrap();
193}
194
195/// Creates two [`Vec`]s from an iterator of `&[u8]`.
196/// The first buffer corresponds to a offset buffer, the second to a values buffer.
197#[inline]
198pub(crate) fn values_iter<O, I, P>(iterator: I) -> (Offsets<O>, Vec<u8>)
199where
200    O: Offset,
201    P: AsRef<[u8]>,
202    I: Iterator<Item = P>,
203{
204    let (lower, _) = iterator.size_hint();
205
206    let mut offsets = Offsets::<O>::with_capacity(lower);
207    let mut values = Vec::<u8>::new();
208
209    for item in iterator {
210        let s = item.as_ref();
211        values.extend_from_slice(s);
212        offsets.try_push(s.len()).unwrap();
213    }
214    (offsets, values)
215}
216
217/// Extends `validity` with all items from `other`
218pub(crate) fn extend_validity(
219    length: usize,
220    validity: &mut Option<MutableBitmap>,
221    other: &Option<MutableBitmap>,
222) {
223    if let Some(other) = other {
224        if let Some(validity) = validity {
225            let slice = other.as_slice();
226            // SAFETY: invariant offset + length <= slice.len()
227            unsafe { validity.extend_from_slice_unchecked(slice, 0, other.len()) }
228        } else {
229            let mut new_validity = MutableBitmap::from_len_set(length);
230            new_validity.extend_from_slice(other.as_slice(), 0, other.len());
231            *validity = Some(new_validity);
232        }
233    }
234}