iroh_blobs/protocol/
range_spec.rs

1//! Specifications for ranges selection in blobs and sequences of blobs.
2//!
3//! The [`RangeSpec`] allows specifying which BAO chunks inside a single blob should be
4//! selected.
5//!
6//! The [`RangeSpecSeq`] builds on top of this to select blob chunks in an entire
7//! collection.
8use std::fmt;
9
10use bao_tree::{ChunkNum, ChunkRanges, ChunkRangesRef};
11use serde::{Deserialize, Serialize};
12use smallvec::{smallvec, SmallVec};
13
14/// A chunk range specification as a sequence of chunk offsets.
15///
16/// Offsets encode alternating spans starting on 0, where the first span is always
17/// deselected.
18///
19/// ## Examples:
20///
21/// - `[2, 5, 3, 1]` encodes five spans, of which two are selected:
22///   - `[0, 0+2) = [0, 2)` is not selected.
23///   - `[2, 2+5) = [2, 7)` is selected.
24///   - `[7, 7+3) = [7, 10)` is not selected.
25///   - `[10, 10+1) = [10, 11)` is selected.
26///   - `[11, inf)` is deselected.
27///
28///   Such a [`RangeSpec`] can be converted to a [`ChunkRanges`] using containing just the
29///   selected ranges: `ChunkRanges{2..7, 10..11}` using [`RangeSpec::to_chunk_ranges`].
30///
31/// - An empty range selects no spans, encoded as `[]`. This means nothing of the blob is
32///   selected.
33///
34/// - To select an entire blob create a single half-open span starting at the first chunk:
35///   `[0]`.
36///
37/// - To select the tail of a blob, create a single half-open span: `[15]`.
38///
39/// This is a SmallVec so we can avoid allocations for the very common case of a single
40/// chunk range.
41#[derive(Deserialize, Serialize, PartialEq, Eq, Clone, Hash)]
42#[repr(transparent)]
43pub struct RangeSpec(SmallVec<[u64; 2]>);
44
45impl RangeSpec {
46    /// Creates a new [`RangeSpec`] from a range set.
47    pub fn new(ranges: impl AsRef<ChunkRangesRef>) -> Self {
48        let ranges = ranges.as_ref().boundaries();
49        let mut res = SmallVec::new();
50        if let Some((start, rest)) = ranges.split_first() {
51            let mut prev = start.0;
52            res.push(prev);
53            for v in rest {
54                res.push(v.0 - prev);
55                prev = v.0;
56            }
57        }
58        Self(res)
59    }
60
61    /// A [`RangeSpec`] selecting nothing from the blob.
62    ///
63    /// This is called "empty" because the representation is an empty set.
64    pub const EMPTY: Self = Self(SmallVec::new_const());
65
66    /// Creates a [`RangeSpec`] selecting the entire blob.
67    pub fn all() -> Self {
68        Self(smallvec![0])
69    }
70
71    /// Checks if this [`RangeSpec`] does not select any chunks in the blob.
72    pub fn is_empty(&self) -> bool {
73        self.0.is_empty()
74    }
75
76    /// Checks if this [`RangeSpec`] selects all chunks in the blob.
77    pub fn is_all(&self) -> bool {
78        self.0.len() == 1 && self.0[0] == 0
79    }
80
81    /// Creates a [`ChunkRanges`] from this [`RangeSpec`].
82    pub fn to_chunk_ranges(&self) -> ChunkRanges {
83        // this is zero allocation for single ranges
84        // todo: optimize this in range collections
85        let mut ranges = ChunkRanges::empty();
86        let mut current = ChunkNum(0);
87        let mut on = false;
88        for &width in self.0.iter() {
89            let next = current + width;
90            if on {
91                ranges |= ChunkRanges::from(current..next);
92            }
93            current = next;
94            on = !on;
95        }
96        if on {
97            ranges |= ChunkRanges::from(current..);
98        }
99        ranges
100    }
101}
102
103impl fmt::Debug for RangeSpec {
104    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
105        if f.alternate() {
106            f.debug_list()
107                .entries(self.to_chunk_ranges().iter())
108                .finish()
109        } else if self.is_all() {
110            write!(f, "all")
111        } else if self.is_empty() {
112            write!(f, "empty")
113        } else {
114            f.debug_list().entries(self.0.iter()).finish()
115        }
116    }
117}
118
119/// A chunk range specification for a sequence of blobs.
120///
121/// To select chunks in a sequence of blobs this is encoded as a sequence of `(blob_offset,
122/// range_spec)` tuples. Offsets are interpreted in an accumulating fashion.
123///
124/// ## Example:
125///
126/// Suppose two [`RangeSpec`]s `range_a` and `range_b`.
127///
128/// - `[(0, range_a), (2, empty), (3, range_b), (1, empty)]` encodes:
129///   - Select `range_a` for children in the range `[0, 2)`
130///   - do no selection (empty) for children in the range `[2, 2+3) = [2, 5)` (3 children)
131///   - Select `range_b` for children in the range `[5, 5+1) = [5, 6)` (1 children)
132///   - do no selection (empty) for children in the open range `[6, inf)`
133///
134/// Another way to understand this is that offsets represent the number of times the
135/// previous range appears.
136///
137/// Other examples:
138///
139/// - Select `range_a` from all blobs after the 5th one in the sequence: `[(5, range_a)]`.
140///
141/// - Select `range_a` from all blobs in the sequence: `[(0, range_a)]`.
142///
143/// - Select `range_a` from blob 1234: `[(1234, range_a), (1, empty)]`.
144///
145/// - Select nothing: `[]`.
146///
147/// This is a smallvec so that we can avoid allocations in the common case of a single child
148/// range.
149#[derive(Deserialize, Serialize, Debug, PartialEq, Eq, Clone, Hash)]
150#[repr(transparent)]
151pub struct RangeSpecSeq(SmallVec<[(u64, RangeSpec); 2]>);
152
153impl RangeSpecSeq {
154    #[allow(dead_code)]
155    /// A [`RangeSpecSeq`] containing no chunks from any blobs in the sequence.
156    ///
157    /// [`RangeSpecSeq::iter`], will return an empty range forever.
158    pub const fn empty() -> Self {
159        Self(SmallVec::new_const())
160    }
161
162    /// If this range seq describes a range for a single item, returns the offset
163    /// and range spec for that item
164    pub fn as_single(&self) -> Option<(u64, &RangeSpec)> {
165        // we got two elements,
166        // the first element starts at offset 0,
167        // and the second element is empty
168        if self.0.len() != 2 {
169            return None;
170        }
171        let (fst_ofs, fst_val) = &self.0[0];
172        let (snd_ofs, snd_val) = &self.0[1];
173        if *snd_ofs == 1 && snd_val.is_empty() {
174            Some((*fst_ofs, fst_val))
175        } else {
176            None
177        }
178    }
179
180    /// A [`RangeSpecSeq`] containing all chunks from all blobs.
181    ///
182    /// [`RangeSpecSeq::iter`], will return a full range forever.
183    pub fn all() -> Self {
184        Self(smallvec![(0, RangeSpec::all())])
185    }
186
187    /// Convenience function to create a [`RangeSpecSeq`] from a finite sequence of range sets.
188    pub fn from_ranges(ranges: impl IntoIterator<Item = impl AsRef<ChunkRangesRef>>) -> Self {
189        Self::new(
190            ranges
191                .into_iter()
192                .map(RangeSpec::new)
193                .chain(std::iter::once(RangeSpec::EMPTY)),
194        )
195    }
196
197    /// Convenience function to create a [`RangeSpecSeq`] from a sequence of range sets.
198    ///
199    /// Compared to [`RangeSpecSeq::from_ranges`], this will not add an empty range spec at the end, so the final
200    /// range spec will repeat forever.
201    pub fn from_ranges_infinite(
202        ranges: impl IntoIterator<Item = impl AsRef<ChunkRangesRef>>,
203    ) -> Self {
204        Self::new(ranges.into_iter().map(RangeSpec::new))
205    }
206
207    /// Creates a new range spec sequence from a sequence of range specs.
208    ///
209    /// This will merge adjacent range specs with the same value and thus make
210    /// sure that the resulting sequence is as compact as possible.
211    pub fn new(children: impl IntoIterator<Item = RangeSpec>) -> Self {
212        let mut count = 0;
213        let mut res = SmallVec::new();
214        let before_all = RangeSpec::EMPTY;
215        for v in children.into_iter() {
216            let prev = res.last().map(|(_count, spec)| spec).unwrap_or(&before_all);
217            if &v == prev {
218                count += 1;
219            } else {
220                res.push((count, v.clone()));
221                count = 1;
222            }
223        }
224        Self(res)
225    }
226
227    /// An infinite iterator of range specs for blobs in the sequence.
228    ///
229    /// Each item yielded by the iterator is the [`RangeSpec`] for a blob in the sequence.
230    /// Thus the first call to `.next()` returns the range spec for the first blob, the next
231    /// call returns the range spec of the second blob, etc.
232    pub fn iter(&self) -> RequestRangeSpecIter<'_> {
233        let before_first = self.0.first().map(|(c, _)| *c).unwrap_or_default();
234        RequestRangeSpecIter {
235            current: &EMPTY_RANGE_SPEC,
236            count: before_first,
237            remaining: &self.0,
238        }
239    }
240
241    /// An iterator over blobs in the sequence with a non-empty range spec.
242    ///
243    /// This iterator will only yield items for blobs which have at least one chunk
244    /// selected.
245    ///
246    /// This iterator is infinite if the [`RangeSpecSeq`] ends on a non-empty [`RangeSpec`],
247    /// that is all further blobs have selected chunks spans.
248    pub fn iter_non_empty(&self) -> NonEmptyRequestRangeSpecIter<'_> {
249        NonEmptyRequestRangeSpecIter::new(self.iter())
250    }
251}
252
253static EMPTY_RANGE_SPEC: RangeSpec = RangeSpec::EMPTY;
254
255/// An infinite iterator yielding [`RangeSpec`]s for each blob in a sequence.
256///
257/// The first item yielded is the [`RangeSpec`] for the first blob in the sequence, the
258/// next item is the [`RangeSpec`] for the next blob, etc.
259#[derive(Debug)]
260pub struct RequestRangeSpecIter<'a> {
261    /// current value
262    current: &'a RangeSpec,
263    /// number of times to emit current before grabbing next value
264    /// if remaining is empty, this is ignored and current is emitted forever
265    count: u64,
266    /// remaining ranges
267    remaining: &'a [(u64, RangeSpec)],
268}
269
270impl<'a> RequestRangeSpecIter<'a> {
271    pub fn new(ranges: &'a [(u64, RangeSpec)]) -> Self {
272        let before_first = ranges.first().map(|(c, _)| *c).unwrap_or_default();
273        RequestRangeSpecIter {
274            current: &EMPTY_RANGE_SPEC,
275            count: before_first,
276            remaining: ranges,
277        }
278    }
279
280    /// True if we are at the end of the iterator.
281    ///
282    /// This does not mean that the iterator is terminated, it just means that
283    /// it will repeat the same value forever.
284    pub fn is_at_end(&self) -> bool {
285        self.count == 0 && self.remaining.is_empty()
286    }
287}
288
289impl<'a> Iterator for RequestRangeSpecIter<'a> {
290    type Item = &'a RangeSpec;
291
292    fn next(&mut self) -> Option<Self::Item> {
293        Some(loop {
294            break if self.count > 0 {
295                // emit current value count times
296                self.count -= 1;
297                self.current
298            } else if let Some(((_, new), rest)) = self.remaining.split_first() {
299                // get next current value, new count, and set remaining
300                self.current = new;
301                self.count = rest.first().map(|(c, _)| *c).unwrap_or_default();
302                self.remaining = rest;
303                continue;
304            } else {
305                // no more values, just repeat current forever
306                self.current
307            };
308        })
309    }
310}
311
312/// An iterator over blobs in the sequence with a non-empty range specs.
313///
314/// default is what to use if the children of this RequestRangeSpec are empty.
315#[derive(Debug)]
316pub struct NonEmptyRequestRangeSpecIter<'a> {
317    inner: RequestRangeSpecIter<'a>,
318    count: u64,
319}
320
321impl<'a> NonEmptyRequestRangeSpecIter<'a> {
322    fn new(inner: RequestRangeSpecIter<'a>) -> Self {
323        Self { inner, count: 0 }
324    }
325
326    pub(crate) fn offset(&self) -> u64 {
327        self.count
328    }
329}
330
331impl<'a> Iterator for NonEmptyRequestRangeSpecIter<'a> {
332    type Item = (u64, &'a RangeSpec);
333
334    fn next(&mut self) -> Option<Self::Item> {
335        loop {
336            // unwrapping is safe because we know that the inner iterator will never terminate
337            let curr = self.inner.next().unwrap();
338            let count = self.count;
339            // increase count in any case until we are at the end of possible u64 values
340            // we are unlikely to ever reach this limit.
341            self.count = self.count.checked_add(1)?;
342            // yield only if the current value is non-empty
343            if !curr.is_empty() {
344                break Some((count, curr));
345            } else if self.inner.is_at_end() {
346                // terminate instead of looping until we run out of u64 values
347                break None;
348            }
349        }
350    }
351}
352
353#[cfg(test)]
354mod tests {
355    use std::ops::Range;
356
357    use proptest::prelude::*;
358
359    use super::*;
360    use crate::{assert_eq_hex, util::hexdump::parse_hexdump};
361
362    fn ranges(value_range: Range<u64>) -> impl Strategy<Value = ChunkRanges> {
363        prop::collection::vec((value_range.clone(), value_range), 0..16).prop_map(|v| {
364            let mut res = ChunkRanges::empty();
365            for (a, b) in v {
366                let start = a.min(b);
367                let end = a.max(b);
368                res |= ChunkRanges::from(ChunkNum(start)..ChunkNum(end));
369            }
370            res
371        })
372    }
373
374    fn range_spec_seq_roundtrip_impl(ranges: &[ChunkRanges]) -> Vec<ChunkRanges> {
375        let spec = RangeSpecSeq::from_ranges(ranges.iter().cloned());
376        spec.iter()
377            .map(|x| x.to_chunk_ranges())
378            .take(ranges.len())
379            .collect::<Vec<_>>()
380    }
381
382    fn range_spec_seq_bytes_roundtrip_impl(ranges: &[ChunkRanges]) -> Vec<ChunkRanges> {
383        let spec = RangeSpecSeq::from_ranges(ranges.iter().cloned());
384        let bytes = postcard::to_allocvec(&spec).unwrap();
385        let spec2: RangeSpecSeq = postcard::from_bytes(&bytes).unwrap();
386        spec2
387            .iter()
388            .map(|x| x.to_chunk_ranges())
389            .take(ranges.len())
390            .collect::<Vec<_>>()
391    }
392
393    fn mk_case(case: Vec<Range<u64>>) -> Vec<ChunkRanges> {
394        case.iter()
395            .map(|x| ChunkRanges::from(ChunkNum(x.start)..ChunkNum(x.end)))
396            .collect::<Vec<_>>()
397    }
398
399    #[test]
400    fn range_spec_wire_format() {
401        // a list of commented hex dumps and the corresponding range spec
402        let cases = [
403            (RangeSpec::EMPTY, "00"),
404            (
405                RangeSpec::all(),
406                r"
407                    01 # length prefix - 1 element
408                    00 # span width - 0. everything stating from 0 is included
409                ",
410            ),
411            (
412                RangeSpec::new(ChunkRanges::from(ChunkNum(64)..)),
413                r"
414                    01 # length prefix - 1 element
415                    40 # span width - 64. everything starting from 64 is included
416                ",
417            ),
418            (
419                RangeSpec::new(ChunkRanges::from(ChunkNum(10000)..)),
420                r"
421                    01 # length prefix - 1 element
422                    904E # span width - 10000, 904E in postcard varint encoding. everything starting from 10000 is included
423                ",
424            ),
425            (
426                RangeSpec::new(ChunkRanges::from(..ChunkNum(64))),
427                r"
428                    02 # length prefix - 2 elements
429                    00 # span width - 0. everything stating from 0 is included
430                    40 # span width - 64. everything starting from 64 is excluded
431                ",
432            ),
433            (
434                RangeSpec::new(
435                    &ChunkRanges::from(ChunkNum(1)..ChunkNum(3))
436                        | &ChunkRanges::from(ChunkNum(9)..ChunkNum(13)),
437                ),
438                r"
439                    04 # length prefix - 4 elements
440                    01 # span width - 1
441                    02 # span width - 2 (3 - 1)
442                    06 # span width - 6 (9 - 3)
443                    04 # span width - 4 (13 - 9)
444                ",
445            ),
446        ];
447        for (case, expected_hex) in cases {
448            let expected = parse_hexdump(expected_hex).unwrap();
449            assert_eq_hex!(expected, postcard::to_stdvec(&case).unwrap());
450        }
451    }
452
453    #[test]
454    fn range_spec_seq_wire_format() {
455        let cases = [
456            (RangeSpecSeq::empty(), "00"),
457            (
458                RangeSpecSeq::all(),
459                r"
460                    01 # 1 tuple in total
461                    # first tuple
462                    00 # span 0 until start
463                    0100 # 1 element, RangeSpec::all()
464            ",
465            ),
466            (
467                RangeSpecSeq::from_ranges([
468                    ChunkRanges::from(ChunkNum(1)..ChunkNum(3)),
469                    ChunkRanges::from(ChunkNum(7)..ChunkNum(13)),
470                ]),
471                r"
472                    03 # 3 tuples in total
473                    # first tuple
474                    00 # span 0 until start
475                    020102 # range 1..3
476                    # second tuple
477                    01 # span 1 until next
478                    020706 # range 7..13
479                    # third tuple
480                    01 # span 1 until next
481                    00 # empty range forever from now
482                ",
483            ),
484            (
485                RangeSpecSeq::from_ranges_infinite([
486                    ChunkRanges::empty(),
487                    ChunkRanges::empty(),
488                    ChunkRanges::empty(),
489                    ChunkRanges::from(ChunkNum(7)..),
490                    ChunkRanges::all(),
491                ]),
492                r"
493                    02 # 2 tuples in total
494                    # first tuple
495                    03 # span 3 until start (first 3 elements are empty)
496                    01 07 # range 7..
497                    # second tuple
498                    01 # span 1 until next (1 element is 7..)
499                    01 00 # ChunkRanges::all() forever from now
500                ",
501            ),
502        ];
503        for (case, expected_hex) in cases {
504            let expected = parse_hexdump(expected_hex).unwrap();
505            assert_eq_hex!(expected, postcard::to_stdvec(&case).unwrap());
506        }
507    }
508
509    /// Test that the roundtrip from [`Vec<ChunkRanges>`] via [`RangeSpec`] to [`RangeSpecSeq`]  and back works.
510    #[test]
511    fn range_spec_seq_roundtrip_cases() {
512        for case in [
513            vec![0..1, 0..0],
514            vec![1..2, 1..2, 1..2],
515            vec![1..2, 1..2, 2..3, 2..3],
516        ] {
517            let case = mk_case(case);
518            let expected = case.clone();
519            let actual = range_spec_seq_roundtrip_impl(&case);
520            assert_eq!(expected, actual);
521        }
522    }
523
524    /// Test that the creation of a [`RangeSpecSeq`] from a sequence of [`ChunkRanges`]s canonicalizes the result.
525    #[test]
526    fn range_spec_seq_canonical() {
527        for (case, expected_count) in [
528            (vec![0..1, 0..0], 2),
529            (vec![1..2, 1..2, 1..2], 2),
530            (vec![1..2, 1..2, 2..3, 2..3], 3),
531        ] {
532            let case = mk_case(case);
533            let spec = RangeSpecSeq::from_ranges(case);
534            assert_eq!(spec.0.len(), expected_count);
535        }
536    }
537
538    proptest! {
539        #[test]
540        fn range_spec_roundtrip(ranges in ranges(0..1000)) {
541            let spec = RangeSpec::new(&ranges);
542            let ranges2 = spec.to_chunk_ranges();
543            prop_assert_eq!(ranges, ranges2);
544        }
545
546        #[test]
547        fn range_spec_seq_roundtrip(ranges in proptest::collection::vec(ranges(0..100), 0..10)) {
548            let expected = ranges.clone();
549            let actual = range_spec_seq_roundtrip_impl(&ranges);
550            prop_assert_eq!(expected, actual);
551        }
552
553        #[test]
554        fn range_spec_seq_bytes_roundtrip(ranges in proptest::collection::vec(ranges(0..100), 0..10)) {
555            let expected = ranges.clone();
556            let actual = range_spec_seq_bytes_roundtrip_impl(&ranges);
557            prop_assert_eq!(expected, actual);
558        }
559    }
560}