arrow2/io/parquet/read/deserialize/binary/
utils.rs

1use crate::offset::{Offset, Offsets};
2
3use super::super::utils::Pushable;
4
5/// [`Pushable`] for variable length binary data.
6#[derive(Debug)]
7pub struct Binary<O: Offset> {
8    pub offsets: Offsets<O>,
9    pub values: Vec<u8>,
10}
11
12impl<O: Offset> Pushable<usize> for Offsets<O> {
13    fn reserve(&mut self, additional: usize) {
14        self.reserve(additional)
15    }
16    #[inline]
17    fn len(&self) -> usize {
18        self.len_proxy()
19    }
20
21    #[inline]
22    fn push(&mut self, value: usize) {
23        self.try_push_usize(value).unwrap()
24    }
25
26    #[inline]
27    fn push_null(&mut self) {
28        self.extend_constant(1);
29    }
30
31    #[inline]
32    fn extend_constant(&mut self, additional: usize, _: usize) {
33        self.extend_constant(additional)
34    }
35}
36
37impl<O: Offset> Binary<O> {
38    #[inline]
39    pub fn with_capacity(capacity: usize) -> Self {
40        Self {
41            offsets: Offsets::with_capacity(capacity),
42            values: Vec::with_capacity(capacity.min(100) * 24),
43        }
44    }
45
46    #[inline]
47    pub fn push(&mut self, v: &[u8]) {
48        if self.offsets.len_proxy() == 100 && self.offsets.capacity() > 100 {
49            let bytes_per_row = self.values.len() / 100 + 1;
50            let bytes_estimate = bytes_per_row * self.offsets.capacity();
51            if bytes_estimate > self.values.capacity() {
52                self.values.reserve(bytes_estimate - self.values.capacity());
53            }
54        }
55
56        self.values.extend(v);
57        self.offsets.try_push_usize(v.len()).unwrap()
58    }
59
60    #[inline]
61    pub fn extend_constant(&mut self, additional: usize) {
62        self.offsets.extend_constant(additional);
63    }
64
65    #[inline]
66    pub fn len(&self) -> usize {
67        self.offsets.len_proxy()
68    }
69
70    #[inline]
71    pub fn extend_lengths<I: Iterator<Item = usize>>(&mut self, lengths: I, values: &mut &[u8]) {
72        let current_offset = *self.offsets.last();
73        self.offsets.try_extend_from_lengths(lengths).unwrap();
74        let new_offset = *self.offsets.last();
75        let length = new_offset.to_usize() - current_offset.to_usize();
76        let (consumed, remaining) = values.split_at(length);
77        *values = remaining;
78        self.values.extend_from_slice(consumed);
79    }
80}
81
82impl<'a, O: Offset> Pushable<&'a [u8]> for Binary<O> {
83    #[inline]
84    fn reserve(&mut self, additional: usize) {
85        let avg_len = self.values.len() / std::cmp::max(self.offsets.last().to_usize(), 1);
86        self.values.reserve(additional * avg_len);
87        self.offsets.reserve(additional);
88    }
89    #[inline]
90    fn len(&self) -> usize {
91        self.len()
92    }
93
94    #[inline]
95    fn push_null(&mut self) {
96        self.push(&[])
97    }
98
99    #[inline]
100    fn push(&mut self, value: &[u8]) {
101        self.push(value)
102    }
103
104    #[inline]
105    fn extend_constant(&mut self, additional: usize, value: &[u8]) {
106        assert_eq!(value.len(), 0);
107        self.extend_constant(additional)
108    }
109}
110
111#[derive(Debug)]
112pub struct BinaryIter<'a> {
113    values: &'a [u8],
114}
115
116impl<'a> BinaryIter<'a> {
117    pub fn new(values: &'a [u8]) -> Self {
118        Self { values }
119    }
120}
121
122impl<'a> Iterator for BinaryIter<'a> {
123    type Item = &'a [u8];
124
125    #[inline]
126    fn next(&mut self) -> Option<Self::Item> {
127        if self.values.is_empty() {
128            return None;
129        }
130        let (length, remaining) = self.values.split_at(4);
131        let length = u32::from_le_bytes(length.try_into().unwrap()) as usize;
132        let (result, remaining) = remaining.split_at(length);
133        self.values = remaining;
134        Some(result)
135    }
136}
137
138#[derive(Debug)]
139pub struct SizedBinaryIter<'a> {
140    iter: BinaryIter<'a>,
141    remaining: usize,
142}
143
144impl<'a> SizedBinaryIter<'a> {
145    pub fn new(values: &'a [u8], size: usize) -> Self {
146        let iter = BinaryIter::new(values);
147        Self {
148            iter,
149            remaining: size,
150        }
151    }
152}
153
154impl<'a> Iterator for SizedBinaryIter<'a> {
155    type Item = &'a [u8];
156
157    #[inline]
158    fn next(&mut self) -> Option<Self::Item> {
159        if self.remaining == 0 {
160            return None;
161        } else {
162            self.remaining -= 1
163        };
164        self.iter.next()
165    }
166
167    fn size_hint(&self) -> (usize, Option<usize>) {
168        (self.remaining, Some(self.remaining))
169    }
170}