arrow2/io/parquet/read/deserialize/binary/
utils.rs1use crate::offset::{Offset, Offsets};
2
3use super::super::utils::Pushable;
4
5#[derive(Debug)]
7pub struct Binary<O: Offset> {
8 pub offsets: Offsets<O>,
9 pub values: Vec<u8>,
10}
11
12impl<O: Offset> Pushable<usize> for Offsets<O> {
13 fn reserve(&mut self, additional: usize) {
14 self.reserve(additional)
15 }
16 #[inline]
17 fn len(&self) -> usize {
18 self.len_proxy()
19 }
20
21 #[inline]
22 fn push(&mut self, value: usize) {
23 self.try_push_usize(value).unwrap()
24 }
25
26 #[inline]
27 fn push_null(&mut self) {
28 self.extend_constant(1);
29 }
30
31 #[inline]
32 fn extend_constant(&mut self, additional: usize, _: usize) {
33 self.extend_constant(additional)
34 }
35}
36
37impl<O: Offset> Binary<O> {
38 #[inline]
39 pub fn with_capacity(capacity: usize) -> Self {
40 Self {
41 offsets: Offsets::with_capacity(capacity),
42 values: Vec::with_capacity(capacity.min(100) * 24),
43 }
44 }
45
46 #[inline]
47 pub fn push(&mut self, v: &[u8]) {
48 if self.offsets.len_proxy() == 100 && self.offsets.capacity() > 100 {
49 let bytes_per_row = self.values.len() / 100 + 1;
50 let bytes_estimate = bytes_per_row * self.offsets.capacity();
51 if bytes_estimate > self.values.capacity() {
52 self.values.reserve(bytes_estimate - self.values.capacity());
53 }
54 }
55
56 self.values.extend(v);
57 self.offsets.try_push_usize(v.len()).unwrap()
58 }
59
60 #[inline]
61 pub fn extend_constant(&mut self, additional: usize) {
62 self.offsets.extend_constant(additional);
63 }
64
65 #[inline]
66 pub fn len(&self) -> usize {
67 self.offsets.len_proxy()
68 }
69
70 #[inline]
71 pub fn extend_lengths<I: Iterator<Item = usize>>(&mut self, lengths: I, values: &mut &[u8]) {
72 let current_offset = *self.offsets.last();
73 self.offsets.try_extend_from_lengths(lengths).unwrap();
74 let new_offset = *self.offsets.last();
75 let length = new_offset.to_usize() - current_offset.to_usize();
76 let (consumed, remaining) = values.split_at(length);
77 *values = remaining;
78 self.values.extend_from_slice(consumed);
79 }
80}
81
82impl<'a, O: Offset> Pushable<&'a [u8]> for Binary<O> {
83 #[inline]
84 fn reserve(&mut self, additional: usize) {
85 let avg_len = self.values.len() / std::cmp::max(self.offsets.last().to_usize(), 1);
86 self.values.reserve(additional * avg_len);
87 self.offsets.reserve(additional);
88 }
89 #[inline]
90 fn len(&self) -> usize {
91 self.len()
92 }
93
94 #[inline]
95 fn push_null(&mut self) {
96 self.push(&[])
97 }
98
99 #[inline]
100 fn push(&mut self, value: &[u8]) {
101 self.push(value)
102 }
103
104 #[inline]
105 fn extend_constant(&mut self, additional: usize, value: &[u8]) {
106 assert_eq!(value.len(), 0);
107 self.extend_constant(additional)
108 }
109}
110
111#[derive(Debug)]
112pub struct BinaryIter<'a> {
113 values: &'a [u8],
114}
115
116impl<'a> BinaryIter<'a> {
117 pub fn new(values: &'a [u8]) -> Self {
118 Self { values }
119 }
120}
121
122impl<'a> Iterator for BinaryIter<'a> {
123 type Item = &'a [u8];
124
125 #[inline]
126 fn next(&mut self) -> Option<Self::Item> {
127 if self.values.is_empty() {
128 return None;
129 }
130 let (length, remaining) = self.values.split_at(4);
131 let length = u32::from_le_bytes(length.try_into().unwrap()) as usize;
132 let (result, remaining) = remaining.split_at(length);
133 self.values = remaining;
134 Some(result)
135 }
136}
137
138#[derive(Debug)]
139pub struct SizedBinaryIter<'a> {
140 iter: BinaryIter<'a>,
141 remaining: usize,
142}
143
144impl<'a> SizedBinaryIter<'a> {
145 pub fn new(values: &'a [u8], size: usize) -> Self {
146 let iter = BinaryIter::new(values);
147 Self {
148 iter,
149 remaining: size,
150 }
151 }
152}
153
154impl<'a> Iterator for SizedBinaryIter<'a> {
155 type Item = &'a [u8];
156
157 #[inline]
158 fn next(&mut self) -> Option<Self::Item> {
159 if self.remaining == 0 {
160 return None;
161 } else {
162 self.remaining -= 1
163 };
164 self.iter.next()
165 }
166
167 fn size_hint(&self) -> (usize, Option<usize>) {
168 (self.remaining, Some(self.remaining))
169 }
170}