1use crate::{format::pb, rowids::bitmap::Bitmap};
5use lance_core::{Error, Result};
6
7use super::{RowIdSequence, U64Segment, encoded_array::EncodedU64Array};
8use prost::Message;
9
10impl TryFrom<pb::RowIdSequence> for RowIdSequence {
11 type Error = Error;
12
13 fn try_from(pb: pb::RowIdSequence) -> Result<Self> {
14 Ok(Self(
15 pb.segments
16 .into_iter()
17 .map(U64Segment::try_from)
18 .collect::<Result<Vec<_>>>()?,
19 ))
20 }
21}
22
23impl TryFrom<pb::U64Segment> for U64Segment {
24 type Error = Error;
25
26 fn try_from(pb: pb::U64Segment) -> Result<Self> {
27 use pb::u64_segment as pb_seg;
28 use pb::u64_segment::Segment::*;
29 match pb.segment {
30 Some(Range(pb_seg::Range { start, end })) => Ok(Self::Range(start..end)),
31 Some(RangeWithHoles(pb_seg::RangeWithHoles { start, end, holes })) => {
32 let holes = holes
33 .ok_or_else(|| Error::invalid_input("missing hole"))?
34 .try_into()?;
35 Ok(Self::RangeWithHoles {
36 range: start..end,
37 holes,
38 })
39 }
40 Some(RangeWithBitmap(pb_seg::RangeWithBitmap { start, end, bitmap })) => {
41 Ok(Self::RangeWithBitmap {
42 range: start..end,
43 bitmap: Bitmap {
44 data: bitmap,
45 len: (end - start) as usize,
46 },
47 })
48 }
49 Some(SortedArray(array)) => Ok(Self::SortedArray(EncodedU64Array::try_from(array)?)),
50 Some(Array(array)) => Ok(Self::Array(EncodedU64Array::try_from(array)?)),
51 None => Err(Error::invalid_input("missing segment type")),
54 }
55 }
56}
57
58impl TryFrom<pb::EncodedU64Array> for EncodedU64Array {
59 type Error = Error;
60
61 fn try_from(pb: pb::EncodedU64Array) -> Result<Self> {
62 use pb::encoded_u64_array as pb_arr;
63 use pb::encoded_u64_array::Array::*;
64 match pb.array {
65 Some(U16Array(pb_arr::U16Array { base, offsets })) => {
66 assert!(
67 offsets.len() % 2 == 0,
68 "Must have even number of bytes to store u16 array"
69 );
70 let offsets = offsets
71 .chunks_exact(2)
72 .map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]]))
73 .collect();
74 Ok(Self::U16 { base, offsets })
75 }
76 Some(U32Array(pb_arr::U32Array { base, offsets })) => {
77 assert!(
78 offsets.len() % 4 == 0,
79 "Must have even number of bytes to store u32 array"
80 );
81 let offsets = offsets
82 .chunks_exact(4)
83 .map(|chunk| u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]))
84 .collect();
85 Ok(Self::U32 { base, offsets })
86 }
87 Some(U64Array(pb_arr::U64Array { values })) => {
88 assert!(
89 values.len() % 8 == 0,
90 "Must have even number of bytes to store u64 array"
91 );
92 let values = values
93 .chunks_exact(8)
94 .map(|chunk| {
95 u64::from_le_bytes([
96 chunk[0], chunk[1], chunk[2], chunk[3], chunk[4], chunk[5], chunk[6],
97 chunk[7],
98 ])
99 })
100 .collect();
101 Ok(Self::U64(values))
102 }
103 None => Err(Error::invalid_input("missing array type")),
106 }
107 }
108}
109
110impl From<RowIdSequence> for pb::RowIdSequence {
111 fn from(sequence: RowIdSequence) -> Self {
112 Self {
113 segments: sequence.0.into_iter().map(pb::U64Segment::from).collect(),
114 }
115 }
116}
117
118impl From<U64Segment> for pb::U64Segment {
119 fn from(segment: U64Segment) -> Self {
120 match segment {
121 U64Segment::Range(range) => Self {
122 segment: Some(pb::u64_segment::Segment::Range(pb::u64_segment::Range {
123 start: range.start,
124 end: range.end,
125 })),
126 },
127 U64Segment::RangeWithHoles { range, holes } => Self {
128 segment: Some(pb::u64_segment::Segment::RangeWithHoles(
129 pb::u64_segment::RangeWithHoles {
130 start: range.start,
131 end: range.end,
132 holes: Some(holes.into()),
133 },
134 )),
135 },
136 U64Segment::RangeWithBitmap { range, bitmap } => Self {
137 segment: Some(pb::u64_segment::Segment::RangeWithBitmap(
138 pb::u64_segment::RangeWithBitmap {
139 start: range.start,
140 end: range.end,
141 bitmap: bitmap.data,
142 },
143 )),
144 },
145 U64Segment::SortedArray(array) => Self {
146 segment: Some(pb::u64_segment::Segment::SortedArray(array.into())),
147 },
148 U64Segment::Array(array) => Self {
149 segment: Some(pb::u64_segment::Segment::Array(array.into())),
150 },
151 }
152 }
153}
154
155impl From<EncodedU64Array> for pb::EncodedU64Array {
156 fn from(array: EncodedU64Array) -> Self {
157 match array {
158 EncodedU64Array::U16 { base, offsets } => Self {
159 array: Some(pb::encoded_u64_array::Array::U16Array(
160 pb::encoded_u64_array::U16Array {
161 base,
162 offsets: offsets
163 .iter()
164 .flat_map(|&offset| offset.to_le_bytes().to_vec())
165 .collect(),
166 },
167 )),
168 },
169 EncodedU64Array::U32 { base, offsets } => Self {
170 array: Some(pb::encoded_u64_array::Array::U32Array(
171 pb::encoded_u64_array::U32Array {
172 base,
173 offsets: offsets
174 .iter()
175 .flat_map(|&offset| offset.to_le_bytes().to_vec())
176 .collect(),
177 },
178 )),
179 },
180 EncodedU64Array::U64(values) => Self {
181 array: Some(pb::encoded_u64_array::Array::U64Array(
182 pb::encoded_u64_array::U64Array {
183 values: values
184 .iter()
185 .flat_map(|&value| value.to_le_bytes().to_vec())
186 .collect(),
187 },
188 )),
189 },
190 }
191 }
192}
193
194pub fn write_row_ids(sequence: &RowIdSequence) -> Vec<u8> {
196 let pb_sequence = pb::RowIdSequence::from(sequence.clone());
197 pb_sequence.encode_to_vec()
198}
199
200pub fn read_row_ids(reader: &[u8]) -> Result<RowIdSequence> {
202 let pb_sequence = pb::RowIdSequence::decode(reader)?;
203 RowIdSequence::try_from(pb_sequence)
204}
205
206#[cfg(test)]
207mod test {
208 use super::*;
209 use pretty_assertions::assert_eq;
210
211 #[test]
212 fn test_write_read_row_ids() {
213 let mut sequence = RowIdSequence::from(0..20);
214 sequence.0.push(U64Segment::Range(30..100));
215 sequence.0.push(U64Segment::RangeWithHoles {
216 range: 100..200,
217 holes: EncodedU64Array::U64(vec![104, 108, 150]),
218 });
219 sequence.0.push(U64Segment::RangeWithBitmap {
220 range: 200..300,
221 bitmap: Bitmap::new_empty(100),
222 });
223 sequence
224 .0
225 .push(U64Segment::SortedArray(EncodedU64Array::U16 {
226 base: 200,
227 offsets: vec![1, 2, 3],
228 }));
229 sequence
230 .0
231 .push(U64Segment::Array(EncodedU64Array::U64(vec![1, 2, 3])));
232
233 let serialized = write_row_ids(&sequence);
234
235 let sequence2 = read_row_ids(&serialized).unwrap();
236
237 assert_eq!(sequence.0, sequence2.0);
238 }
239}