1use crate::{format::pb, rowids::bitmap::Bitmap};
5use lance_core::{Error, Result};
6use snafu::location;
7
8use super::{encoded_array::EncodedU64Array, RowIdSequence, U64Segment};
9use prost::Message;
10
11impl TryFrom<pb::RowIdSequence> for RowIdSequence {
12 type Error = Error;
13
14 fn try_from(pb: pb::RowIdSequence) -> Result<Self> {
15 Ok(Self(
16 pb.segments
17 .into_iter()
18 .map(U64Segment::try_from)
19 .collect::<Result<Vec<_>>>()?,
20 ))
21 }
22}
23
24impl TryFrom<pb::U64Segment> for U64Segment {
25 type Error = Error;
26
27 fn try_from(pb: pb::U64Segment) -> Result<Self> {
28 use pb::u64_segment as pb_seg;
29 use pb::u64_segment::Segment::*;
30 match pb.segment {
31 Some(Range(pb_seg::Range { start, end })) => Ok(Self::Range(start..end)),
32 Some(RangeWithHoles(pb_seg::RangeWithHoles { start, end, holes })) => {
33 let holes = holes
34 .ok_or_else(|| Error::invalid_input("missing hole", location!()))?
35 .try_into()?;
36 Ok(Self::RangeWithHoles {
37 range: start..end,
38 holes,
39 })
40 }
41 Some(RangeWithBitmap(pb_seg::RangeWithBitmap { start, end, bitmap })) => {
42 Ok(Self::RangeWithBitmap {
43 range: start..end,
44 bitmap: Bitmap {
45 data: bitmap,
46 len: (end - start) as usize,
47 },
48 })
49 }
50 Some(SortedArray(array)) => Ok(Self::SortedArray(EncodedU64Array::try_from(array)?)),
51 Some(Array(array)) => Ok(Self::Array(EncodedU64Array::try_from(array)?)),
52 None => Err(Error::invalid_input("missing segment type", location!())),
55 }
56 }
57}
58
59impl TryFrom<pb::EncodedU64Array> for EncodedU64Array {
60 type Error = Error;
61
62 fn try_from(pb: pb::EncodedU64Array) -> Result<Self> {
63 use pb::encoded_u64_array as pb_arr;
64 use pb::encoded_u64_array::Array::*;
65 match pb.array {
66 Some(U16Array(pb_arr::U16Array { base, offsets })) => {
67 assert!(
68 offsets.len() % 2 == 0,
69 "Must have even number of bytes to store u16 array"
70 );
71 let offsets = offsets
72 .chunks_exact(2)
73 .map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]]))
74 .collect();
75 Ok(Self::U16 { base, offsets })
76 }
77 Some(U32Array(pb_arr::U32Array { base, offsets })) => {
78 assert!(
79 offsets.len() % 4 == 0,
80 "Must have even number of bytes to store u32 array"
81 );
82 let offsets = offsets
83 .chunks_exact(4)
84 .map(|chunk| u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]))
85 .collect();
86 Ok(Self::U32 { base, offsets })
87 }
88 Some(U64Array(pb_arr::U64Array { values })) => {
89 assert!(
90 values.len() % 8 == 0,
91 "Must have even number of bytes to store u64 array"
92 );
93 let values = values
94 .chunks_exact(8)
95 .map(|chunk| {
96 u64::from_le_bytes([
97 chunk[0], chunk[1], chunk[2], chunk[3], chunk[4], chunk[5], chunk[6],
98 chunk[7],
99 ])
100 })
101 .collect();
102 Ok(Self::U64(values))
103 }
104 None => Err(Error::invalid_input("missing array type", location!())),
107 }
108 }
109}
110
111impl From<RowIdSequence> for pb::RowIdSequence {
112 fn from(sequence: RowIdSequence) -> Self {
113 Self {
114 segments: sequence.0.into_iter().map(pb::U64Segment::from).collect(),
115 }
116 }
117}
118
119impl From<U64Segment> for pb::U64Segment {
120 fn from(segment: U64Segment) -> Self {
121 match segment {
122 U64Segment::Range(range) => Self {
123 segment: Some(pb::u64_segment::Segment::Range(pb::u64_segment::Range {
124 start: range.start,
125 end: range.end,
126 })),
127 },
128 U64Segment::RangeWithHoles { range, holes } => Self {
129 segment: Some(pb::u64_segment::Segment::RangeWithHoles(
130 pb::u64_segment::RangeWithHoles {
131 start: range.start,
132 end: range.end,
133 holes: Some(holes.into()),
134 },
135 )),
136 },
137 U64Segment::RangeWithBitmap { range, bitmap } => Self {
138 segment: Some(pb::u64_segment::Segment::RangeWithBitmap(
139 pb::u64_segment::RangeWithBitmap {
140 start: range.start,
141 end: range.end,
142 bitmap: bitmap.data,
143 },
144 )),
145 },
146 U64Segment::SortedArray(array) => Self {
147 segment: Some(pb::u64_segment::Segment::SortedArray(array.into())),
148 },
149 U64Segment::Array(array) => Self {
150 segment: Some(pb::u64_segment::Segment::Array(array.into())),
151 },
152 }
153 }
154}
155
156impl From<EncodedU64Array> for pb::EncodedU64Array {
157 fn from(array: EncodedU64Array) -> Self {
158 match array {
159 EncodedU64Array::U16 { base, offsets } => Self {
160 array: Some(pb::encoded_u64_array::Array::U16Array(
161 pb::encoded_u64_array::U16Array {
162 base,
163 offsets: offsets
164 .iter()
165 .flat_map(|&offset| offset.to_le_bytes().to_vec())
166 .collect(),
167 },
168 )),
169 },
170 EncodedU64Array::U32 { base, offsets } => Self {
171 array: Some(pb::encoded_u64_array::Array::U32Array(
172 pb::encoded_u64_array::U32Array {
173 base,
174 offsets: offsets
175 .iter()
176 .flat_map(|&offset| offset.to_le_bytes().to_vec())
177 .collect(),
178 },
179 )),
180 },
181 EncodedU64Array::U64(values) => Self {
182 array: Some(pb::encoded_u64_array::Array::U64Array(
183 pb::encoded_u64_array::U64Array {
184 values: values
185 .iter()
186 .flat_map(|&value| value.to_le_bytes().to_vec())
187 .collect(),
188 },
189 )),
190 },
191 }
192 }
193}
194
195pub fn write_row_ids(sequence: &RowIdSequence) -> Vec<u8> {
197 let pb_sequence = pb::RowIdSequence::from(sequence.clone());
198 pb_sequence.encode_to_vec()
199}
200
201pub fn read_row_ids(reader: &[u8]) -> Result<RowIdSequence> {
203 let pb_sequence = pb::RowIdSequence::decode(reader)?;
204 RowIdSequence::try_from(pb_sequence)
205}
206
207#[cfg(test)]
208mod test {
209 use super::*;
210 use pretty_assertions::assert_eq;
211
212 #[test]
213 fn test_write_read_row_ids() {
214 let mut sequence = RowIdSequence::from(0..20);
215 sequence.0.push(U64Segment::Range(30..100));
216 sequence.0.push(U64Segment::RangeWithHoles {
217 range: 100..200,
218 holes: EncodedU64Array::U64(vec![104, 108, 150]),
219 });
220 sequence.0.push(U64Segment::RangeWithBitmap {
221 range: 200..300,
222 bitmap: Bitmap::new_empty(100),
223 });
224 sequence
225 .0
226 .push(U64Segment::SortedArray(EncodedU64Array::U16 {
227 base: 200,
228 offsets: vec![1, 2, 3],
229 }));
230 sequence
231 .0
232 .push(U64Segment::Array(EncodedU64Array::U64(vec![1, 2, 3])));
233
234 let serialized = write_row_ids(&sequence);
235
236 let sequence2 = read_row_ids(&serialized).unwrap();
237
238 assert_eq!(sequence.0, sequence2.0);
239 }
240}