iroh_blobs/protocol/
range_spec.rs

1//! Specifications for ranges selection in blobs and sequences of blobs.
2//!
3//! The [`ChunkRanges`] allows specifying which BAO chunks inside a single blob should be
4//! selected.
5//!
6//! The [`ChunkRangesSeq`] builds on top of this to select blob chunks in an entire
7//! collection.
8use std::{fmt, sync::OnceLock};
9
10pub use bao_tree::ChunkRanges;
11use bao_tree::{ChunkNum, ChunkRangesRef};
12use serde::{Deserialize, Serialize};
13use smallvec::{smallvec, SmallVec};
14
15pub use crate::util::ChunkRangesExt;
16
17static CHUNK_RANGES_EMPTY: OnceLock<ChunkRanges> = OnceLock::new();
18
19fn chunk_ranges_empty() -> &'static ChunkRanges {
20    CHUNK_RANGES_EMPTY.get_or_init(ChunkRanges::empty)
21}
22
23#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
24#[serde(from = "wire::RangeSpecSeq", into = "wire::RangeSpecSeq")]
25pub struct ChunkRangesSeq(pub(crate) SmallVec<[(u64, ChunkRanges); 2]>);
26
27impl std::hash::Hash for ChunkRangesSeq {
28    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
29        for (i, r) in &self.0 {
30            i.hash(state);
31            r.boundaries().hash(state);
32        }
33    }
34}
35
36impl std::ops::Index<u64> for ChunkRangesSeq {
37    type Output = ChunkRanges;
38
39    fn index(&self, index: u64) -> &Self::Output {
40        match self.0.binary_search_by(|(o, _)| o.cmp(&index)) {
41            Ok(i) => &self.0[i].1,
42            Err(i) => {
43                if i == 0 {
44                    chunk_ranges_empty()
45                } else {
46                    &self.0[i - 1].1
47                }
48            }
49        }
50    }
51}
52
53impl ChunkRangesSeq {
54    pub const fn empty() -> Self {
55        Self(SmallVec::new_const())
56    }
57
58    /// Request just the first blob.
59    pub fn root() -> Self {
60        let mut inner = SmallVec::new();
61        inner.push((0, ChunkRanges::all()));
62        inner.push((1, ChunkRanges::empty()));
63        Self(inner)
64    }
65
66    /// A [`ChunkRangesSeq`] containing all chunks from all blobs.
67    ///
68    /// [`ChunkRangesSeq::iter`], will return a full range forever.
69    pub fn all() -> Self {
70        let mut inner = SmallVec::new();
71        inner.push((0, ChunkRanges::all()));
72        Self(inner)
73    }
74
75    /// A [`ChunkRangesSeq`] getting the verified size for the first blob.
76    pub fn verified_size() -> Self {
77        let mut inner = SmallVec::new();
78        inner.push((0, ChunkRanges::last_chunk()));
79        inner.push((1, ChunkRanges::empty()));
80        Self(inner)
81    }
82
83    /// A [`ChunkRangesSeq`] getting the entire first blob and verified sizes for all others.
84    pub fn verified_child_sizes() -> Self {
85        let mut inner = SmallVec::new();
86        inner.push((0, ChunkRanges::all()));
87        inner.push((1, ChunkRanges::last_chunk()));
88        Self(inner)
89    }
90
91    /// Checks if this [`ChunkRangesSeq`] does not select any chunks in the blob.
92    pub fn is_empty(&self) -> bool {
93        self.0.is_empty()
94    }
95
96    /// Checks if this [`ChunkRangesSeq`] selects all chunks in the blob.
97    pub fn is_all(&self) -> bool {
98        if self.0.len() != 1 {
99            return false;
100        }
101        let Some((_, ranges)) = self.0.iter().next() else {
102            return false;
103        };
104        ranges.is_all()
105    }
106
107    /// If this range seq describes a range for a single item, returns the offset
108    /// and range spec for that item
109    pub fn as_single(&self) -> Option<(u64, &ChunkRanges)> {
110        // we got two elements,
111        // the first element starts at offset 0,
112        // and the second element is empty
113        if self.0.len() != 2 {
114            return None;
115        }
116        let (o1, v1) = self.0.iter().next().unwrap();
117        let (o2, v2) = self.0.iter().next_back().unwrap();
118        if *o1 == (o2 - 1) && v2.is_empty() {
119            Some((*o1, v1))
120        } else {
121            None
122        }
123    }
124
125    pub fn is_blob(&self) -> bool {
126        #[allow(clippy::match_like_matches_macro)]
127        match self.as_single() {
128            Some((0, _)) => true,
129            _ => false,
130        }
131    }
132
133    /// Convenience function to create a [`ChunkRangesSeq`] from an iterator of
134    /// chunk ranges. If the last element is non-empty, it will be repeated
135    /// forever.
136    pub fn from_ranges_infinite(ranges: impl IntoIterator<Item = ChunkRanges>) -> Self {
137        let (ranges, _) = from_ranges_inner(ranges);
138        Self(ranges)
139    }
140
141    /// Convenience function to create a [`ChunkRangesSeq`] from an iterator of
142    /// chunk ranges. If the last element is non-empty, an empty range will be
143    /// added immediately after it to terminate the sequence.
144    pub fn from_ranges(ranges: impl IntoIterator<Item = ChunkRanges>) -> Self {
145        let (mut res, next) = from_ranges_inner(ranges);
146        if let Some((_, r)) = res.iter().next_back() {
147            if !r.is_empty() {
148                res.push((next, ChunkRanges::empty()));
149            }
150        }
151        Self(res)
152    }
153
154    /// An iterator over blobs in the sequence with a non-empty range spec.
155    ///
156    /// This iterator will only yield items for blobs which have at least one chunk
157    /// selected.
158    ///
159    /// This iterator is infinite if the [`ChunkRangesSeq`] ends on a non-empty [`ChunkRanges`],
160    /// that is all further blobs have selected chunks spans.
161    pub fn iter_non_empty_infinite(&self) -> NonEmptyRequestRangeSpecIter<'_> {
162        NonEmptyRequestRangeSpecIter::new(self.iter_infinite())
163    }
164
165    /// True if this range spec sequence repeats the last range spec forever.
166    pub fn is_infinite(&self) -> bool {
167        self.0
168            .iter()
169            .next_back()
170            .map(|(_, v)| !v.is_empty())
171            .unwrap_or_default()
172    }
173
174    pub fn iter_infinite(&self) -> ChunkRangesSeqIterInfinite<'_> {
175        ChunkRangesSeqIterInfinite {
176            current: chunk_ranges_empty(),
177            offset: 0,
178            remaining: self.0.iter().peekable(),
179        }
180    }
181
182    pub fn iter(&self) -> ChunkRangesSeqIter<'_> {
183        ChunkRangesSeqIter {
184            current: chunk_ranges_empty(),
185            offset: 0,
186            remaining: self.0.iter().peekable(),
187        }
188    }
189}
190
191fn from_ranges_inner(
192    ranges: impl IntoIterator<Item = ChunkRanges>,
193) -> (SmallVec<[(u64, ChunkRanges); 2]>, u64) {
194    let mut res = SmallVec::new();
195    let mut i = 0;
196    for range in ranges.into_iter() {
197        if range
198            != res
199                .iter()
200                .next_back()
201                .map(|(_, v)| v)
202                .unwrap_or(&ChunkRanges::empty())
203        {
204            res.push((i, range));
205        }
206        i += 1;
207    }
208    (res, i)
209}
210
211/// An infinite iterator yielding [`RangeSpec`]s for each blob in a sequence.
212///
213/// The first item yielded is the [`RangeSpec`] for the first blob in the sequence, the
214/// next item is the [`RangeSpec`] for the next blob, etc.
215#[derive(Debug)]
216pub struct ChunkRangesSeqIterInfinite<'a> {
217    /// current value
218    current: &'a ChunkRanges,
219    /// current offset
220    offset: u64,
221    /// remaining ranges
222    remaining: std::iter::Peekable<std::slice::Iter<'a, (u64, ChunkRanges)>>,
223}
224
225impl<'a> ChunkRangesSeqIterInfinite<'a> {
226    /// True if we are at the end of the iterator.
227    ///
228    /// This does not mean that the iterator is terminated, it just means that
229    /// it will repeat the same value forever.
230    pub fn is_at_end(&mut self) -> bool {
231        self.remaining.peek().is_none()
232    }
233}
234
235impl<'a> Iterator for ChunkRangesSeqIterInfinite<'a> {
236    type Item = &'a ChunkRanges;
237
238    fn next(&mut self) -> Option<Self::Item> {
239        loop {
240            match self.remaining.peek() {
241                Some((offset, _)) if self.offset < *offset => {
242                    // emit current value until we reach the next offset
243                    self.offset += 1;
244                    return Some(self.current);
245                }
246                None => {
247                    // no more values, just repeat current forever
248                    self.offset += 1;
249                    return Some(self.current);
250                }
251                Some((_, ranges)) => {
252                    // get next current value, new count, and set remaining
253                    self.current = ranges;
254                    self.remaining.next();
255                }
256            }
257        }
258    }
259}
260
261/// An infinite iterator yielding [`RangeSpec`]s for each blob in a sequence.
262///
263/// The first item yielded is the [`RangeSpec`] for the first blob in the sequence, the
264/// next item is the [`RangeSpec`] for the next blob, etc.
265#[derive(Debug)]
266pub struct ChunkRangesSeqIter<'a> {
267    /// current value
268    current: &'a ChunkRanges,
269    /// current offset
270    offset: u64,
271    /// remaining ranges
272    remaining: std::iter::Peekable<std::slice::Iter<'a, (u64, ChunkRanges)>>,
273}
274
275impl<'a> Iterator for ChunkRangesSeqIter<'a> {
276    type Item = &'a ChunkRanges;
277
278    fn next(&mut self) -> Option<Self::Item> {
279        match self.remaining.peek()? {
280            (offset, _) if self.offset < *offset => {
281                // emit current value until we reach the next offset
282                self.offset += 1;
283                Some(self.current)
284            }
285            (_, ranges) => {
286                // get next current value, new count, and set remaining
287                self.current = ranges;
288                self.remaining.next();
289                self.offset += 1;
290                Some(self.current)
291            }
292        }
293    }
294}
295
296/// An iterator over blobs in the sequence with a non-empty range specs.
297///
298/// default is what to use if the children of this RequestRangeSpec are empty.
299#[derive(Debug)]
300pub struct NonEmptyRequestRangeSpecIter<'a> {
301    inner: ChunkRangesSeqIterInfinite<'a>,
302    count: u64,
303}
304
305impl<'a> NonEmptyRequestRangeSpecIter<'a> {
306    fn new(inner: ChunkRangesSeqIterInfinite<'a>) -> Self {
307        Self { inner, count: 0 }
308    }
309
310    pub(crate) fn offset(&self) -> u64 {
311        self.count
312    }
313
314    pub fn is_at_end(&mut self) -> bool {
315        self.inner.is_at_end()
316    }
317}
318
319impl<'a> Iterator for NonEmptyRequestRangeSpecIter<'a> {
320    type Item = (u64, &'a ChunkRanges);
321
322    fn next(&mut self) -> Option<Self::Item> {
323        loop {
324            // unwrapping is safe because we know that the inner iterator will never terminate
325            let curr = self.inner.next().unwrap();
326            let count = self.count;
327            // increase count in any case until we are at the end of possible u64 values
328            // we are unlikely to ever reach this limit.
329            self.count = self.count.checked_add(1)?;
330            // yield only if the current value is non-empty
331            if !curr.is_empty() {
332                break Some((count, curr));
333            } else if self.inner.is_at_end() {
334                // terminate instead of looping until we run out of u64 values
335                break None;
336            }
337        }
338    }
339}
340
341/// A chunk range specification as a sequence of chunk offsets.
342///
343/// This is just the wire encoding of a [`ChunkRanges`]. You should rarely have to
344/// interact with this directly.
345///
346/// Offsets encode alternating spans starting on 0, where the first span is always
347/// deselected.
348///
349/// ## Examples:
350///
351/// - `[2, 5, 3, 1]` encodes five spans, of which two are selected:
352///   - `[0, 0+2) = [0, 2)` is not selected.
353///   - `[2, 2+5) = [2, 7)` is selected.
354///   - `[7, 7+3) = [7, 10)` is not selected.
355///   - `[10, 10+1) = [10, 11)` is selected.
356///   - `[11, inf)` is deselected.
357///
358///   Such a [`RangeSpec`] can be converted to a [`ChunkRanges`] using containing just the
359///   selected ranges: `ChunkRanges{2..7, 10..11}` using [`RangeSpec::to_chunk_ranges`].
360///
361/// - An empty range selects no spans, encoded as `[]`. This means nothing of the blob is
362///   selected.
363///
364/// - To select an entire blob create a single half-open span starting at the first chunk:
365///   `[0]`.
366///
367/// - To select the tail of a blob, create a single half-open span: `[15]`.
368///
369/// This is a SmallVec so we can avoid allocations for the very common case of a single
370/// chunk range.
371#[derive(Deserialize, Serialize, PartialEq, Eq, Clone, Hash)]
372#[repr(transparent)]
373pub struct RangeSpec(SmallVec<[u64; 2]>);
374
375impl RangeSpec {
376    /// Creates a new [`RangeSpec`] from a range set.
377    pub fn new(ranges: impl AsRef<ChunkRangesRef>) -> Self {
378        let ranges = ranges.as_ref().boundaries();
379        let mut res = SmallVec::new();
380        if let Some((start, rest)) = ranges.split_first() {
381            let mut prev = start.0;
382            res.push(prev);
383            for v in rest {
384                res.push(v.0 - prev);
385                prev = v.0;
386            }
387        }
388        Self(res)
389    }
390
391    /// A [`RangeSpec`] selecting nothing from the blob.
392    ///
393    /// This is called "empty" because the representation is an empty set.
394    pub const EMPTY: Self = Self(SmallVec::new_const());
395
396    /// Creates a [`RangeSpec`] selecting the entire blob.
397    pub fn all() -> Self {
398        Self(smallvec![0])
399    }
400
401    /// Creates a [`RangeSpec`] selecting the last chunk, which is also a size proof.
402    pub fn verified_size() -> Self {
403        Self(smallvec![u64::MAX])
404    }
405
406    /// Checks if this [`RangeSpec`] does not select any chunks in the blob.
407    pub fn is_empty(&self) -> bool {
408        self.0.is_empty()
409    }
410
411    /// Checks if this [`RangeSpec`] selects all chunks in the blob.
412    pub fn is_all(&self) -> bool {
413        self.0.len() == 1 && self.0[0] == 0
414    }
415
416    /// Returns the number of chunks selected by this [`RangeSpec`], as a tuple
417    /// with the minimum and maximum number of chunks.
418    pub fn chunks(&self) -> (u64, Option<u64>) {
419        let mut min = 0;
420        for i in 0..self.0.len() / 2 {
421            min += self.0[2 * i + 1];
422        }
423        let max = if self.0.len() % 2 != 0 {
424            // spec is open ended
425            None
426        } else {
427            Some(min)
428        };
429        (min, max)
430    }
431
432    /// Creates a [`ChunkRanges`] from this [`RangeSpec`].
433    pub fn to_chunk_ranges(&self) -> ChunkRanges {
434        // this is zero allocation for single ranges
435        // todo: optimize this in range collections
436        let mut ranges = ChunkRanges::empty();
437        let mut current = ChunkNum(0);
438        let mut on = false;
439        for &width in self.0.iter() {
440            let next = current + width;
441            if on {
442                ranges |= ChunkRanges::from(current..next);
443            }
444            current = next;
445            on = !on;
446        }
447        if on {
448            ranges |= ChunkRanges::from(current..);
449        }
450        ranges
451    }
452}
453
454impl fmt::Debug for RangeSpec {
455    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
456        if self.is_all() {
457            write!(f, "all")
458        } else if self.is_empty() {
459            write!(f, "empty")
460        } else if !f.alternate() {
461            f.debug_list()
462                .entries(self.to_chunk_ranges().iter())
463                .finish()
464        } else {
465            f.debug_list().entries(self.0.iter()).finish()
466        }
467    }
468}
469
470mod wire {
471
472    use serde::{Deserialize, Serialize};
473    use smallvec::SmallVec;
474
475    use super::{ChunkRangesSeq, RangeSpec};
476
477    #[derive(Deserialize, Serialize)]
478    pub struct RangeSpecSeq(SmallVec<[(u64, RangeSpec); 2]>);
479
480    impl From<RangeSpecSeq> for ChunkRangesSeq {
481        fn from(wire: RangeSpecSeq) -> Self {
482            let mut offset = 0;
483            let mut res = SmallVec::new();
484            for (delta, spec) in wire.0.iter() {
485                offset += *delta;
486                res.push((offset, spec.to_chunk_ranges()));
487            }
488            Self(res)
489        }
490    }
491
492    impl From<ChunkRangesSeq> for RangeSpecSeq {
493        fn from(value: ChunkRangesSeq) -> Self {
494            let mut res = SmallVec::new();
495            let mut offset = 0;
496            for (i, r) in value.0.iter() {
497                let delta = *i - offset;
498                res.push((delta, RangeSpec::new(r)));
499                offset = *i;
500            }
501            Self(res)
502        }
503    }
504}
505
506#[cfg(test)]
507mod tests {
508    use std::ops::Range;
509
510    use iroh_test::{assert_eq_hex, hexdump::parse_hexdump};
511    use proptest::prelude::*;
512
513    use super::*;
514    use crate::util::ChunkRangesExt;
515
516    fn ranges(value_range: Range<u64>) -> impl Strategy<Value = ChunkRanges> {
517        prop::collection::vec((value_range.clone(), value_range), 0..16).prop_map(|v| {
518            let mut res = ChunkRanges::empty();
519            for (a, b) in v {
520                let start = a.min(b);
521                let end = a.max(b);
522                res |= ChunkRanges::chunks(start..end);
523            }
524            res
525        })
526    }
527
528    fn range_spec_seq_roundtrip_impl(ranges: &[ChunkRanges]) -> Vec<ChunkRanges> {
529        let spec = ChunkRangesSeq::from_ranges(ranges.iter().cloned());
530        spec.iter_infinite()
531            .take(ranges.len())
532            .cloned()
533            .collect::<Vec<_>>()
534    }
535
536    fn range_spec_seq_bytes_roundtrip_impl(ranges: &[ChunkRanges]) -> Vec<ChunkRanges> {
537        let spec = ChunkRangesSeq::from_ranges(ranges.iter().cloned());
538        let bytes = postcard::to_allocvec(&spec).unwrap();
539        let spec2: ChunkRangesSeq = postcard::from_bytes(&bytes).unwrap();
540        spec2
541            .iter_infinite()
542            .take(ranges.len())
543            .cloned()
544            .collect::<Vec<_>>()
545    }
546
547    fn mk_case(case: Vec<Range<u64>>) -> Vec<ChunkRanges> {
548        case.iter()
549            .map(|x| ChunkRanges::chunks(x.start..x.end))
550            .collect::<Vec<_>>()
551    }
552
553    #[test]
554    fn range_spec_wire_format() {
555        // a list of commented hex dumps and the corresponding range spec
556        let cases = [
557            (RangeSpec::EMPTY, "00"),
558            (
559                RangeSpec::all(),
560                r"
561                    01 # length prefix - 1 element
562                    00 # span width - 0. everything stating from 0 is included
563                ",
564            ),
565            (
566                RangeSpec::new(ChunkRanges::chunks(64..)),
567                r"
568                    01 # length prefix - 1 element
569                    40 # span width - 64. everything starting from 64 is included
570                ",
571            ),
572            (
573                RangeSpec::new(ChunkRanges::chunks(10000..)),
574                r"
575                    01 # length prefix - 1 element
576                    904E # span width - 10000, 904E in postcard varint encoding. everything starting from 10000 is included
577                ",
578            ),
579            (
580                RangeSpec::new(ChunkRanges::chunks(..64)),
581                r"
582                    02 # length prefix - 2 elements
583                    00 # span width - 0. everything stating from 0 is included
584                    40 # span width - 64. everything starting from 64 is excluded
585                ",
586            ),
587            (
588                RangeSpec::new(&ChunkRanges::chunks(1..3) | &ChunkRanges::chunks(9..13)),
589                r"
590                    04 # length prefix - 4 elements
591                    01 # span width - 1
592                    02 # span width - 2 (3 - 1)
593                    06 # span width - 6 (9 - 3)
594                    04 # span width - 4 (13 - 9)
595                ",
596            ),
597        ];
598        for (case, expected_hex) in cases {
599            let expected = parse_hexdump(expected_hex).unwrap();
600            assert_eq_hex!(expected, postcard::to_stdvec(&case).unwrap());
601        }
602    }
603
604    #[test]
605    fn range_spec_seq_wire_format() {
606        let cases = [
607            (ChunkRangesSeq::empty(), "00"),
608            (
609                ChunkRangesSeq::all(),
610                r"
611                    01 # 1 tuple in total
612                    # first tuple
613                    00 # span 0 until start
614                    0100 # 1 element, RangeSpec::all()
615            ",
616            ),
617            (
618                ChunkRangesSeq::from_ranges([
619                    ChunkRanges::chunks(1..3),
620                    ChunkRanges::chunks(7..13),
621                ]),
622                r"
623                    03 # 3 tuples in total
624                    # first tuple
625                    00 # span 0 until start
626                    020102 # range 1..3
627                    # second tuple
628                    01 # span 1 until next
629                    020706 # range 7..13
630                    # third tuple
631                    01 # span 1 until next
632                    00 # empty range forever from now
633                ",
634            ),
635            (
636                ChunkRangesSeq::from_ranges_infinite([
637                    ChunkRanges::empty(),
638                    ChunkRanges::empty(),
639                    ChunkRanges::empty(),
640                    ChunkRanges::chunks(7..),
641                    ChunkRanges::all(),
642                ]),
643                r"
644                    02 # 2 tuples in total
645                    # first tuple
646                    03 # span 3 until start (first 3 elements are empty)
647                    01 07 # range 7..
648                    # second tuple
649                    01 # span 1 until next (1 element is 7..)
650                    01 00 # ChunkRanges::all() forever from now
651                ",
652            ),
653        ];
654        for (case, expected_hex) in cases {
655            let expected = parse_hexdump(expected_hex).unwrap();
656            assert_eq_hex!(expected, postcard::to_stdvec(&case).unwrap());
657        }
658    }
659
660    /// Test that the roundtrip from [`Vec<ChunkRanges>`] via [`RangeSpec`] to [`RangeSpecSeq`]  and back works.
661    #[test]
662    fn range_spec_seq_roundtrip_cases() {
663        for case in [
664            vec![0..1, 0..0],
665            vec![1..2, 1..2, 1..2],
666            vec![1..2, 1..2, 2..3, 2..3],
667        ] {
668            let case = mk_case(case);
669            let expected = case.clone();
670            let actual = range_spec_seq_roundtrip_impl(&case);
671            assert_eq!(expected, actual);
672        }
673    }
674
675    /// Test that the creation of a [`RangeSpecSeq`] from a sequence of [`ChunkRanges`]s canonicalizes the result.
676    #[test]
677    fn range_spec_seq_canonical() {
678        for (case, expected_count) in [
679            (vec![0..1, 0..0], 2),
680            (vec![1..2, 1..2, 1..2], 2),
681            (vec![1..2, 1..2, 2..3, 2..3], 3),
682        ] {
683            let case = mk_case(case);
684            let spec = ChunkRangesSeq::from_ranges(case);
685            assert_eq!(spec.0.len(), expected_count);
686        }
687    }
688
689    proptest! {
690
691        #[test]
692        fn range_spec_roundtrip(ranges in ranges(0..1000)) {
693            let spec = RangeSpec::new(&ranges);
694            let ranges2 = spec.to_chunk_ranges();
695            prop_assert_eq!(ranges, ranges2);
696        }
697
698        #[test]
699        fn range_spec_seq_roundtrip(ranges in proptest::collection::vec(ranges(0..100), 0..10)) {
700            let expected = ranges.clone();
701            let actual = range_spec_seq_roundtrip_impl(&ranges);
702            prop_assert_eq!(expected, actual);
703        }
704
705        #[test]
706        fn range_spec_seq_bytes_roundtrip(ranges in proptest::collection::vec(ranges(0..100), 0..10)) {
707            let expected = ranges.clone();
708            let actual = range_spec_seq_bytes_roundtrip_impl(&ranges);
709            prop_assert_eq!(expected, actual);
710        }
711    }
712}