lance_table/rowids/
serde.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4use crate::{format::pb, rowids::bitmap::Bitmap};
5use lance_core::{Error, Result};
6use snafu::location;
7
8use super::{encoded_array::EncodedU64Array, RowIdSequence, U64Segment};
9use prost::Message;
10
11impl TryFrom<pb::RowIdSequence> for RowIdSequence {
12    type Error = Error;
13
14    fn try_from(pb: pb::RowIdSequence) -> Result<Self> {
15        Ok(Self(
16            pb.segments
17                .into_iter()
18                .map(U64Segment::try_from)
19                .collect::<Result<Vec<_>>>()?,
20        ))
21    }
22}
23
24impl TryFrom<pb::U64Segment> for U64Segment {
25    type Error = Error;
26
27    fn try_from(pb: pb::U64Segment) -> Result<Self> {
28        use pb::u64_segment as pb_seg;
29        use pb::u64_segment::Segment::*;
30        match pb.segment {
31            Some(Range(pb_seg::Range { start, end })) => Ok(Self::Range(start..end)),
32            Some(RangeWithHoles(pb_seg::RangeWithHoles { start, end, holes })) => {
33                let holes = holes
34                    .ok_or_else(|| Error::invalid_input("missing hole", location!()))?
35                    .try_into()?;
36                Ok(Self::RangeWithHoles {
37                    range: start..end,
38                    holes,
39                })
40            }
41            Some(RangeWithBitmap(pb_seg::RangeWithBitmap { start, end, bitmap })) => {
42                Ok(Self::RangeWithBitmap {
43                    range: start..end,
44                    bitmap: Bitmap {
45                        data: bitmap,
46                        len: (end - start) as usize,
47                    },
48                })
49            }
50            Some(SortedArray(array)) => Ok(Self::SortedArray(EncodedU64Array::try_from(array)?)),
51            Some(Array(array)) => Ok(Self::Array(EncodedU64Array::try_from(array)?)),
52            // TODO: why non-exhaustive?
53            // Some(_) => Err(Error::invalid_input("unknown segment type", location!())),
54            None => Err(Error::invalid_input("missing segment type", location!())),
55        }
56    }
57}
58
59impl TryFrom<pb::EncodedU64Array> for EncodedU64Array {
60    type Error = Error;
61
62    fn try_from(pb: pb::EncodedU64Array) -> Result<Self> {
63        use pb::encoded_u64_array as pb_arr;
64        use pb::encoded_u64_array::Array::*;
65        match pb.array {
66            Some(U16Array(pb_arr::U16Array { base, offsets })) => {
67                assert!(
68                    offsets.len() % 2 == 0,
69                    "Must have even number of bytes to store u16 array"
70                );
71                let offsets = offsets
72                    .chunks_exact(2)
73                    .map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]]))
74                    .collect();
75                Ok(Self::U16 { base, offsets })
76            }
77            Some(U32Array(pb_arr::U32Array { base, offsets })) => {
78                assert!(
79                    offsets.len() % 4 == 0,
80                    "Must have even number of bytes to store u32 array"
81                );
82                let offsets = offsets
83                    .chunks_exact(4)
84                    .map(|chunk| u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]))
85                    .collect();
86                Ok(Self::U32 { base, offsets })
87            }
88            Some(U64Array(pb_arr::U64Array { values })) => {
89                assert!(
90                    values.len() % 8 == 0,
91                    "Must have even number of bytes to store u64 array"
92                );
93                let values = values
94                    .chunks_exact(8)
95                    .map(|chunk| {
96                        u64::from_le_bytes([
97                            chunk[0], chunk[1], chunk[2], chunk[3], chunk[4], chunk[5], chunk[6],
98                            chunk[7],
99                        ])
100                    })
101                    .collect();
102                Ok(Self::U64(values))
103            }
104            // TODO: shouldn't this enum be non-exhaustive?
105            // Some(_) => Err(Error::invalid_input("unknown array type", location!())),
106            None => Err(Error::invalid_input("missing array type", location!())),
107        }
108    }
109}
110
111impl From<RowIdSequence> for pb::RowIdSequence {
112    fn from(sequence: RowIdSequence) -> Self {
113        Self {
114            segments: sequence.0.into_iter().map(pb::U64Segment::from).collect(),
115        }
116    }
117}
118
119impl From<U64Segment> for pb::U64Segment {
120    fn from(segment: U64Segment) -> Self {
121        match segment {
122            U64Segment::Range(range) => Self {
123                segment: Some(pb::u64_segment::Segment::Range(pb::u64_segment::Range {
124                    start: range.start,
125                    end: range.end,
126                })),
127            },
128            U64Segment::RangeWithHoles { range, holes } => Self {
129                segment: Some(pb::u64_segment::Segment::RangeWithHoles(
130                    pb::u64_segment::RangeWithHoles {
131                        start: range.start,
132                        end: range.end,
133                        holes: Some(holes.into()),
134                    },
135                )),
136            },
137            U64Segment::RangeWithBitmap { range, bitmap } => Self {
138                segment: Some(pb::u64_segment::Segment::RangeWithBitmap(
139                    pb::u64_segment::RangeWithBitmap {
140                        start: range.start,
141                        end: range.end,
142                        bitmap: bitmap.data,
143                    },
144                )),
145            },
146            U64Segment::SortedArray(array) => Self {
147                segment: Some(pb::u64_segment::Segment::SortedArray(array.into())),
148            },
149            U64Segment::Array(array) => Self {
150                segment: Some(pb::u64_segment::Segment::Array(array.into())),
151            },
152        }
153    }
154}
155
156impl From<EncodedU64Array> for pb::EncodedU64Array {
157    fn from(array: EncodedU64Array) -> Self {
158        match array {
159            EncodedU64Array::U16 { base, offsets } => Self {
160                array: Some(pb::encoded_u64_array::Array::U16Array(
161                    pb::encoded_u64_array::U16Array {
162                        base,
163                        offsets: offsets
164                            .iter()
165                            .flat_map(|&offset| offset.to_le_bytes().to_vec())
166                            .collect(),
167                    },
168                )),
169            },
170            EncodedU64Array::U32 { base, offsets } => Self {
171                array: Some(pb::encoded_u64_array::Array::U32Array(
172                    pb::encoded_u64_array::U32Array {
173                        base,
174                        offsets: offsets
175                            .iter()
176                            .flat_map(|&offset| offset.to_le_bytes().to_vec())
177                            .collect(),
178                    },
179                )),
180            },
181            EncodedU64Array::U64(values) => Self {
182                array: Some(pb::encoded_u64_array::Array::U64Array(
183                    pb::encoded_u64_array::U64Array {
184                        values: values
185                            .iter()
186                            .flat_map(|&value| value.to_le_bytes().to_vec())
187                            .collect(),
188                    },
189                )),
190            },
191        }
192    }
193}
194
195/// Serialize a rowid sequence to a buffer.
196pub fn write_row_ids(sequence: &RowIdSequence) -> Vec<u8> {
197    let pb_sequence = pb::RowIdSequence::from(sequence.clone());
198    pb_sequence.encode_to_vec()
199}
200
201/// Deserialize a rowid sequence from some bytes.
202pub fn read_row_ids(reader: &[u8]) -> Result<RowIdSequence> {
203    let pb_sequence = pb::RowIdSequence::decode(reader)?;
204    RowIdSequence::try_from(pb_sequence)
205}
206
207#[cfg(test)]
208mod test {
209    use super::*;
210    use pretty_assertions::assert_eq;
211
212    #[test]
213    fn test_write_read_row_ids() {
214        let mut sequence = RowIdSequence::from(0..20);
215        sequence.0.push(U64Segment::Range(30..100));
216        sequence.0.push(U64Segment::RangeWithHoles {
217            range: 100..200,
218            holes: EncodedU64Array::U64(vec![104, 108, 150]),
219        });
220        sequence.0.push(U64Segment::RangeWithBitmap {
221            range: 200..300,
222            bitmap: Bitmap::new_empty(100),
223        });
224        sequence
225            .0
226            .push(U64Segment::SortedArray(EncodedU64Array::U16 {
227                base: 200,
228                offsets: vec![1, 2, 3],
229            }));
230        sequence
231            .0
232            .push(U64Segment::Array(EncodedU64Array::U64(vec![1, 2, 3])));
233
234        let serialized = write_row_ids(&sequence);
235
236        let sequence2 = read_row_ids(&serialized).unwrap();
237
238        assert_eq!(sequence.0, sequence2.0);
239    }
240}