1use crate::bitmap::container::{Container, ARRAY_LIMIT};
2use crate::bitmap::store::{ArrayStore, BitmapStore, Store, BITMAP_LENGTH};
3use crate::RoaringBitmap;
4use bytemuck::cast_slice_mut;
5use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
6use core::convert::Infallible;
7use core::ops::RangeInclusive;
8use std::error::Error;
9use std::io;
10
11pub(crate) const SERIAL_COOKIE_NO_RUNCONTAINER: u32 = 12346;
12pub(crate) const SERIAL_COOKIE: u16 = 12347;
13pub(crate) const NO_OFFSET_THRESHOLD: usize = 4;
14
15pub(crate) const DESCRIPTION_BYTES: usize = 4;
17pub(crate) const OFFSET_BYTES: usize = 4;
18
19impl RoaringBitmap {
20 pub fn serialized_size(&self) -> usize {
36 let container_sizes: usize = self
37 .containers
38 .iter()
39 .map(|container| match container.store {
40 Store::Array(ref values) => 8 + values.len() as usize * 2,
41 Store::Bitmap(..) => 8 + 8 * 1024,
42 })
43 .sum();
44
45 8 + container_sizes
47 }
48
49 pub fn serialize_into<W: io::Write>(&self, mut writer: W) -> io::Result<()> {
67 writer.write_u32::<LittleEndian>(SERIAL_COOKIE_NO_RUNCONTAINER)?;
68 writer.write_u32::<LittleEndian>(self.containers.len() as u32)?;
69
70 for container in &self.containers {
71 writer.write_u16::<LittleEndian>(container.key)?;
72 writer.write_u16::<LittleEndian>((container.len() - 1) as u16)?;
73 }
74
75 let mut offset = 8 + 8 * self.containers.len() as u32;
76 for container in &self.containers {
77 writer.write_u32::<LittleEndian>(offset)?;
78 match container.store {
79 Store::Array(ref values) => {
80 offset += values.len() as u32 * 2;
81 }
82 Store::Bitmap(..) => {
83 offset += 8 * 1024;
84 }
85 }
86 }
87
88 for container in &self.containers {
89 match container.store {
90 Store::Array(ref values) => {
91 for &value in values.iter() {
92 writer.write_u16::<LittleEndian>(value)?;
93 }
94 }
95 Store::Bitmap(ref bits) => {
96 for &value in bits.as_array() {
97 writer.write_u64::<LittleEndian>(value)?;
98 }
99 }
100 }
101 }
102
103 Ok(())
104 }
105
106 pub fn deserialize_from<R: io::Read>(reader: R) -> io::Result<RoaringBitmap> {
127 RoaringBitmap::deserialize_from_impl(reader, ArrayStore::try_from, BitmapStore::try_from)
128 }
129
130 pub fn deserialize_unchecked_from<R: io::Read>(reader: R) -> io::Result<RoaringBitmap> {
150 RoaringBitmap::deserialize_from_impl::<R, _, Infallible, _, Infallible>(
151 reader,
152 |values| Ok(ArrayStore::from_vec_unchecked(values)),
153 |len, values| Ok(BitmapStore::from_unchecked(len, values)),
154 )
155 }
156
157 fn deserialize_from_impl<R, A, AErr, B, BErr>(
158 mut reader: R,
159 a: A,
160 b: B,
161 ) -> io::Result<RoaringBitmap>
162 where
163 R: io::Read,
164 A: Fn(Vec<u16>) -> Result<ArrayStore, AErr>,
165 AErr: Error + Send + Sync + 'static,
166 B: Fn(u64, Box<[u64; 1024]>) -> Result<BitmapStore, BErr>,
167 BErr: Error + Send + Sync + 'static,
168 {
169 let (size, has_offsets, has_run_containers) = {
171 let cookie = reader.read_u32::<LittleEndian>()?;
172 if cookie == SERIAL_COOKIE_NO_RUNCONTAINER {
173 (reader.read_u32::<LittleEndian>()? as usize, true, false)
174 } else if (cookie as u16) == SERIAL_COOKIE {
175 let size = ((cookie >> 16) + 1) as usize;
176 (size, size >= NO_OFFSET_THRESHOLD, true)
177 } else {
178 return Err(io::Error::new(io::ErrorKind::Other, "unknown cookie value"));
179 }
180 };
181
182 let run_container_bitmap = if has_run_containers {
184 let mut bitmap = vec![0u8; (size + 7) / 8];
185 reader.read_exact(&mut bitmap)?;
186 Some(bitmap)
187 } else {
188 None
189 };
190
191 if size > u16::MAX as usize + 1 {
192 return Err(io::Error::new(io::ErrorKind::Other, "size is greater than supported"));
193 }
194
195 let mut description_bytes = vec![0u8; size * DESCRIPTION_BYTES];
197 reader.read_exact(&mut description_bytes)?;
198 let mut description_bytes = &description_bytes[..];
199
200 if has_offsets {
201 let mut offsets = vec![0u8; size * OFFSET_BYTES];
202 reader.read_exact(&mut offsets)?;
203 drop(offsets); }
205
206 let mut containers = Vec::with_capacity(size);
207
208 for i in 0..size {
210 let key = description_bytes.read_u16::<LittleEndian>()?;
211 let cardinality = u64::from(description_bytes.read_u16::<LittleEndian>()?) + 1;
212
213 let is_run_container =
215 run_container_bitmap.as_ref().map_or(false, |bm| bm[i / 8] & (1 << (i % 8)) != 0);
216
217 let store = if is_run_container {
218 let runs = reader.read_u16::<LittleEndian>()?;
219 let mut intervals = vec![[0, 0]; runs as usize];
220 reader.read_exact(cast_slice_mut(&mut intervals))?;
221 intervals.iter_mut().for_each(|[s, len]| {
222 *s = u16::from_le(*s);
223 *len = u16::from_le(*len);
224 });
225
226 let cardinality = intervals.iter().map(|[_, len]| *len as usize).sum();
227 let mut store = Store::with_capacity(cardinality);
228 intervals.into_iter().try_for_each(|[s, len]| -> Result<(), io::ErrorKind> {
229 let end = s.checked_add(len).ok_or(io::ErrorKind::InvalidData)?;
230 store.insert_range(RangeInclusive::new(s, end));
231 Ok(())
232 })?;
233 store
234 } else if cardinality <= ARRAY_LIMIT {
235 let mut values = vec![0; cardinality as usize];
236 reader.read_exact(cast_slice_mut(&mut values))?;
237 values.iter_mut().for_each(|n| *n = u16::from_le(*n));
238 let array = a(values).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
239 Store::Array(array)
240 } else {
241 let mut values = Box::new([0; BITMAP_LENGTH]);
242 reader.read_exact(cast_slice_mut(&mut values[..]))?;
243 values.iter_mut().for_each(|n| *n = u64::from_le(*n));
244 let bitmap = b(cardinality, values)
245 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
246 Store::Bitmap(bitmap)
247 };
248
249 containers.push(Container { key, store });
250 }
251
252 Ok(RoaringBitmap { containers })
253 }
254}
255
256#[cfg(test)]
257mod test {
258 use crate::{bitmap::store::BITMAP_LENGTH, RoaringBitmap};
259 use proptest::prelude::*;
260
261 proptest! {
262 #[test]
263 fn test_serialization(
264 bitmap in RoaringBitmap::arbitrary(),
265 ) {
266 let mut buffer = Vec::new();
267 bitmap.serialize_into(&mut buffer).unwrap();
268 prop_assert_eq!(bitmap, RoaringBitmap::deserialize_from(buffer.as_slice()).unwrap());
269 }
270 }
271
272 #[test]
273 fn test_from_lsb0_bytes() {
274 const CONTAINER_OFFSET: u32 = u64::BITS * BITMAP_LENGTH as u32;
275 const CONTAINER_OFFSET_IN_BYTES: u32 = CONTAINER_OFFSET / 8;
276 let mut bytes = vec![0xff; CONTAINER_OFFSET_IN_BYTES as usize];
277 bytes.extend([0x00; CONTAINER_OFFSET_IN_BYTES as usize]);
278 bytes.extend([0b00000001, 0b00000010, 0b00000011, 0b00000100]);
279
280 let offset = 32;
281 let rb = RoaringBitmap::from_lsb0_bytes(offset, &bytes);
282 for i in 0..offset {
283 assert!(!rb.contains(i), "{i} should not be in the bitmap");
284 }
285 for i in offset..offset + CONTAINER_OFFSET {
286 assert!(rb.contains(i), "{i} should be in the bitmap");
287 }
288 for i in offset + CONTAINER_OFFSET..offset + CONTAINER_OFFSET * 2 {
289 assert!(!rb.contains(i), "{i} should not be in the bitmap");
290 }
291 for bit in [0, 9, 16, 17, 26] {
292 let i = bit + offset + CONTAINER_OFFSET * 2;
293 assert!(rb.contains(i), "{i} should be in the bitmap");
294 }
295
296 assert_eq!(rb.len(), CONTAINER_OFFSET as u64 + 5);
297
298 let mut bytes = vec![0x00u8; CONTAINER_OFFSET_IN_BYTES as usize];
300 bytes.extend([0xff]);
301 let rb = RoaringBitmap::from_lsb0_bytes(0, &bytes);
302 assert_eq!(rb.min(), Some(CONTAINER_OFFSET));
303
304 let rb = RoaringBitmap::from_lsb0_bytes(8, &bytes);
305 assert_eq!(rb.min(), Some(CONTAINER_OFFSET + 8));
306
307 let bytes = [0x80];
309 let rb = RoaringBitmap::from_lsb0_bytes(0xFFFFFFF8, &bytes);
310 assert_eq!(rb.len(), 1);
311 assert!(rb.contains(u32::MAX));
312
313 let bytes = vec![0xFF; 0x1_0000 / 8];
315 let rb = RoaringBitmap::from_lsb0_bytes(0xFFFF0000, &bytes);
316 assert_eq!(rb.len(), 0x1_0000);
317 assert!(rb.contains(u32::MAX));
318 }
319
320 #[test]
321 fn test_from_lsb0_bytes_not_multiple_of_8() {
322 const CONTAINER_OFFSET: u32 = u64::BITS * BITMAP_LENGTH as u32;
323 const CONTAINER_OFFSET_IN_BYTES: u32 = CONTAINER_OFFSET / 8;
324
325 let mut bytes = vec![0b0101_1001];
326 bytes.extend([0x00; CONTAINER_OFFSET_IN_BYTES as usize]);
327 bytes.extend([0b00000001, 0b00000010, 0b00000011, 0b00000100]);
328
329 let mut indices = vec![0, 3, 4, 6];
330 indices.extend([0, 9, 16, 17, 26].map(|i| 8 + CONTAINER_OFFSET + i));
331
332 for offset in 0..8 {
333 let rb = RoaringBitmap::from_lsb0_bytes(offset, &bytes);
334 for i in indices.iter().map(|&i| i + offset) {
335 assert!(rb.contains(i), "{i} should be in the bitmap");
336 }
337 }
338 }
339
340 #[test]
341 #[should_panic(expected = "<= 2^32")]
342 fn test_from_lsb0_bytes_overflow() {
343 let bytes = [0x01, 0x01];
344 RoaringBitmap::from_lsb0_bytes(u32::MAX - 7, &bytes);
345 }
346
347 #[test]
348 fn test_deserialize_overflow_s_plus_len() {
349 let data = vec![59, 48, 0, 0, 255, 130, 254, 59, 48, 2, 0, 41, 255, 255, 166, 197, 4, 0, 2];
350 let res = RoaringBitmap::deserialize_from(data.as_slice());
351 assert!(res.is_err());
352 }
353}