Skip to main content

lance_table/rowids/
serde.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4use crate::{format::pb, rowids::bitmap::Bitmap};
5use lance_core::{Error, Result};
6
7use super::{RowIdSequence, U64Segment, encoded_array::EncodedU64Array};
8use prost::Message;
9
10impl TryFrom<pb::RowIdSequence> for RowIdSequence {
11    type Error = Error;
12
13    fn try_from(pb: pb::RowIdSequence) -> Result<Self> {
14        Ok(Self(
15            pb.segments
16                .into_iter()
17                .map(U64Segment::try_from)
18                .collect::<Result<Vec<_>>>()?,
19        ))
20    }
21}
22
23impl TryFrom<pb::U64Segment> for U64Segment {
24    type Error = Error;
25
26    fn try_from(pb: pb::U64Segment) -> Result<Self> {
27        use pb::u64_segment as pb_seg;
28        use pb::u64_segment::Segment::*;
29        match pb.segment {
30            Some(Range(pb_seg::Range { start, end })) => Ok(Self::Range(start..end)),
31            Some(RangeWithHoles(pb_seg::RangeWithHoles { start, end, holes })) => {
32                let holes = holes
33                    .ok_or_else(|| Error::invalid_input("missing hole"))?
34                    .try_into()?;
35                Ok(Self::RangeWithHoles {
36                    range: start..end,
37                    holes,
38                })
39            }
40            Some(RangeWithBitmap(pb_seg::RangeWithBitmap { start, end, bitmap })) => {
41                Ok(Self::RangeWithBitmap {
42                    range: start..end,
43                    bitmap: Bitmap {
44                        data: bitmap,
45                        len: (end - start) as usize,
46                    },
47                })
48            }
49            Some(SortedArray(array)) => Ok(Self::SortedArray(EncodedU64Array::try_from(array)?)),
50            Some(Array(array)) => Ok(Self::Array(EncodedU64Array::try_from(array)?)),
51            // TODO: why non-exhaustive?
52            // Some(_) => Err(Error::invalid_input("unknown segment type")),
53            None => Err(Error::invalid_input("missing segment type")),
54        }
55    }
56}
57
58impl TryFrom<pb::EncodedU64Array> for EncodedU64Array {
59    type Error = Error;
60
61    fn try_from(pb: pb::EncodedU64Array) -> Result<Self> {
62        use pb::encoded_u64_array as pb_arr;
63        use pb::encoded_u64_array::Array::*;
64        match pb.array {
65            Some(U16Array(pb_arr::U16Array { base, offsets })) => {
66                assert!(
67                    offsets.len() % 2 == 0,
68                    "Must have even number of bytes to store u16 array"
69                );
70                let offsets = offsets
71                    .chunks_exact(2)
72                    .map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]]))
73                    .collect();
74                Ok(Self::U16 { base, offsets })
75            }
76            Some(U32Array(pb_arr::U32Array { base, offsets })) => {
77                assert!(
78                    offsets.len() % 4 == 0,
79                    "Must have even number of bytes to store u32 array"
80                );
81                let offsets = offsets
82                    .chunks_exact(4)
83                    .map(|chunk| u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]))
84                    .collect();
85                Ok(Self::U32 { base, offsets })
86            }
87            Some(U64Array(pb_arr::U64Array { values })) => {
88                assert!(
89                    values.len() % 8 == 0,
90                    "Must have even number of bytes to store u64 array"
91                );
92                let values = values
93                    .chunks_exact(8)
94                    .map(|chunk| {
95                        u64::from_le_bytes([
96                            chunk[0], chunk[1], chunk[2], chunk[3], chunk[4], chunk[5], chunk[6],
97                            chunk[7],
98                        ])
99                    })
100                    .collect();
101                Ok(Self::U64(values))
102            }
103            // TODO: shouldn't this enum be non-exhaustive?
104            // Some(_) => Err(Error::invalid_input("unknown array type")),
105            None => Err(Error::invalid_input("missing array type")),
106        }
107    }
108}
109
110impl From<RowIdSequence> for pb::RowIdSequence {
111    fn from(sequence: RowIdSequence) -> Self {
112        Self {
113            segments: sequence.0.into_iter().map(pb::U64Segment::from).collect(),
114        }
115    }
116}
117
118impl From<U64Segment> for pb::U64Segment {
119    fn from(segment: U64Segment) -> Self {
120        match segment {
121            U64Segment::Range(range) => Self {
122                segment: Some(pb::u64_segment::Segment::Range(pb::u64_segment::Range {
123                    start: range.start,
124                    end: range.end,
125                })),
126            },
127            U64Segment::RangeWithHoles { range, holes } => Self {
128                segment: Some(pb::u64_segment::Segment::RangeWithHoles(
129                    pb::u64_segment::RangeWithHoles {
130                        start: range.start,
131                        end: range.end,
132                        holes: Some(holes.into()),
133                    },
134                )),
135            },
136            U64Segment::RangeWithBitmap { range, bitmap } => Self {
137                segment: Some(pb::u64_segment::Segment::RangeWithBitmap(
138                    pb::u64_segment::RangeWithBitmap {
139                        start: range.start,
140                        end: range.end,
141                        bitmap: bitmap.data,
142                    },
143                )),
144            },
145            U64Segment::SortedArray(array) => Self {
146                segment: Some(pb::u64_segment::Segment::SortedArray(array.into())),
147            },
148            U64Segment::Array(array) => Self {
149                segment: Some(pb::u64_segment::Segment::Array(array.into())),
150            },
151        }
152    }
153}
154
155impl From<EncodedU64Array> for pb::EncodedU64Array {
156    fn from(array: EncodedU64Array) -> Self {
157        match array {
158            EncodedU64Array::U16 { base, offsets } => Self {
159                array: Some(pb::encoded_u64_array::Array::U16Array(
160                    pb::encoded_u64_array::U16Array {
161                        base,
162                        offsets: offsets
163                            .iter()
164                            .flat_map(|&offset| offset.to_le_bytes().to_vec())
165                            .collect(),
166                    },
167                )),
168            },
169            EncodedU64Array::U32 { base, offsets } => Self {
170                array: Some(pb::encoded_u64_array::Array::U32Array(
171                    pb::encoded_u64_array::U32Array {
172                        base,
173                        offsets: offsets
174                            .iter()
175                            .flat_map(|&offset| offset.to_le_bytes().to_vec())
176                            .collect(),
177                    },
178                )),
179            },
180            EncodedU64Array::U64(values) => Self {
181                array: Some(pb::encoded_u64_array::Array::U64Array(
182                    pb::encoded_u64_array::U64Array {
183                        values: values
184                            .iter()
185                            .flat_map(|&value| value.to_le_bytes().to_vec())
186                            .collect(),
187                    },
188                )),
189            },
190        }
191    }
192}
193
194/// Serialize a rowid sequence to a buffer.
195pub fn write_row_ids(sequence: &RowIdSequence) -> Vec<u8> {
196    let pb_sequence = pb::RowIdSequence::from(sequence.clone());
197    pb_sequence.encode_to_vec()
198}
199
200/// Deserialize a rowid sequence from some bytes.
201pub fn read_row_ids(reader: &[u8]) -> Result<RowIdSequence> {
202    let pb_sequence = pb::RowIdSequence::decode(reader)?;
203    RowIdSequence::try_from(pb_sequence)
204}
205
206#[cfg(test)]
207mod test {
208    use super::*;
209    use pretty_assertions::assert_eq;
210
211    #[test]
212    fn test_write_read_row_ids() {
213        let mut sequence = RowIdSequence::from(0..20);
214        sequence.0.push(U64Segment::Range(30..100));
215        sequence.0.push(U64Segment::RangeWithHoles {
216            range: 100..200,
217            holes: EncodedU64Array::U64(vec![104, 108, 150]),
218        });
219        sequence.0.push(U64Segment::RangeWithBitmap {
220            range: 200..300,
221            bitmap: Bitmap::new_empty(100),
222        });
223        sequence
224            .0
225            .push(U64Segment::SortedArray(EncodedU64Array::U16 {
226                base: 200,
227                offsets: vec![1, 2, 3],
228            }));
229        sequence
230            .0
231            .push(U64Segment::Array(EncodedU64Array::U64(vec![1, 2, 3])));
232
233        let serialized = write_row_ids(&sequence);
234
235        let sequence2 = read_row_ids(&serialized).unwrap();
236
237        assert_eq!(sequence.0, sequence2.0);
238    }
239}