iroh_blobs/protocol/
range_spec.rs

1//! Specifications for ranges selection in blobs and sequences of blobs.
2//!
3//! The [`ChunkRanges`] allows specifying which BAO chunks inside a single blob should be
4//! selected.
5//!
6//! The [`ChunkRangesSeq`] builds on top of this to select blob chunks in an entire
7//! collection.
8use std::{fmt, sync::OnceLock};
9
10use bao_tree::{ChunkNum, ChunkRanges, ChunkRangesRef};
11use serde::{Deserialize, Serialize};
12use smallvec::{smallvec, SmallVec};
13
14static CHUNK_RANGES_EMPTY: OnceLock<ChunkRanges> = OnceLock::new();
15
16fn chunk_ranges_empty() -> &'static ChunkRanges {
17    CHUNK_RANGES_EMPTY.get_or_init(ChunkRanges::empty)
18}
19
20use crate::util::ChunkRangesExt;
21
22#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
23#[serde(from = "wire::RangeSpecSeq", into = "wire::RangeSpecSeq")]
24pub struct ChunkRangesSeq(pub(crate) SmallVec<[(u64, ChunkRanges); 2]>);
25
26impl std::hash::Hash for ChunkRangesSeq {
27    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
28        for (i, r) in &self.0 {
29            i.hash(state);
30            r.boundaries().hash(state);
31        }
32    }
33}
34
35impl std::ops::Index<u64> for ChunkRangesSeq {
36    type Output = ChunkRanges;
37
38    fn index(&self, index: u64) -> &Self::Output {
39        match self.0.binary_search_by(|(o, _)| o.cmp(&index)) {
40            Ok(i) => &self.0[i].1,
41            Err(i) => {
42                if i == 0 {
43                    chunk_ranges_empty()
44                } else {
45                    &self.0[i - 1].1
46                }
47            }
48        }
49    }
50}
51
52impl ChunkRangesSeq {
53    pub const fn empty() -> Self {
54        Self(SmallVec::new_const())
55    }
56
57    /// Request just the first blob.
58    pub fn root() -> Self {
59        let mut inner = SmallVec::new();
60        inner.push((0, ChunkRanges::all()));
61        inner.push((1, ChunkRanges::empty()));
62        Self(inner)
63    }
64
65    /// A [`ChunkRangesSeq`] containing all chunks from all blobs.
66    ///
67    /// [`ChunkRangesSeq::iter`], will return a full range forever.
68    pub fn all() -> Self {
69        let mut inner = SmallVec::new();
70        inner.push((0, ChunkRanges::all()));
71        Self(inner)
72    }
73
74    /// A [`ChunkRangesSeq`] getting the verified size for the first blob.
75    pub fn verified_size() -> Self {
76        let mut inner = SmallVec::new();
77        inner.push((0, ChunkRanges::last_chunk()));
78        inner.push((1, ChunkRanges::empty()));
79        Self(inner)
80    }
81
82    /// A [`ChunkRangesSeq`] getting the entire first blob and verified sizes for all others.
83    pub fn verified_child_sizes() -> Self {
84        let mut inner = SmallVec::new();
85        inner.push((0, ChunkRanges::all()));
86        inner.push((1, ChunkRanges::last_chunk()));
87        Self(inner)
88    }
89
90    /// Checks if this [`ChunkRangesSeq`] does not select any chunks in the blob.
91    pub fn is_empty(&self) -> bool {
92        self.0.is_empty()
93    }
94
95    /// Checks if this [`ChunkRangesSeq`] selects all chunks in the blob.
96    pub fn is_all(&self) -> bool {
97        if self.0.len() != 1 {
98            return false;
99        }
100        let Some((_, ranges)) = self.0.iter().next() else {
101            return false;
102        };
103        ranges.is_all()
104    }
105
106    /// If this range seq describes a range for a single item, returns the offset
107    /// and range spec for that item
108    pub fn as_single(&self) -> Option<(u64, &ChunkRanges)> {
109        // we got two elements,
110        // the first element starts at offset 0,
111        // and the second element is empty
112        if self.0.len() != 2 {
113            return None;
114        }
115        let (o1, v1) = self.0.iter().next().unwrap();
116        let (o2, v2) = self.0.iter().next_back().unwrap();
117        if *o1 == (o2 - 1) && v2.is_empty() {
118            Some((*o1, v1))
119        } else {
120            None
121        }
122    }
123
124    pub fn is_blob(&self) -> bool {
125        #[allow(clippy::match_like_matches_macro)]
126        match self.as_single() {
127            Some((0, _)) => true,
128            _ => false,
129        }
130    }
131
132    /// Convenience function to create a [`ChunkRangesSeq`] from an iterator of
133    /// chunk ranges. If the last element is non-empty, it will be repeated
134    /// forever.
135    pub fn from_ranges_infinite(ranges: impl IntoIterator<Item = ChunkRanges>) -> Self {
136        let (ranges, _) = from_ranges_inner(ranges);
137        Self(ranges)
138    }
139
140    /// Convenience function to create a [`ChunkRangesSeq`] from an iterator of
141    /// chunk ranges. If the last element is non-empty, an empty range will be
142    /// added immediately after it to terminate the sequence.
143    pub fn from_ranges(ranges: impl IntoIterator<Item = ChunkRanges>) -> Self {
144        let (mut res, next) = from_ranges_inner(ranges);
145        if let Some((_, r)) = res.iter().next_back() {
146            if !r.is_empty() {
147                res.push((next, ChunkRanges::empty()));
148            }
149        }
150        Self(res)
151    }
152
153    /// An iterator over blobs in the sequence with a non-empty range spec.
154    ///
155    /// This iterator will only yield items for blobs which have at least one chunk
156    /// selected.
157    ///
158    /// This iterator is infinite if the [`ChunkRangesSeq`] ends on a non-empty [`ChunkRanges`],
159    /// that is all further blobs have selected chunks spans.
160    pub fn iter_non_empty_infinite(&self) -> NonEmptyRequestRangeSpecIter<'_> {
161        NonEmptyRequestRangeSpecIter::new(self.iter_infinite())
162    }
163
164    /// True if this range spec sequence repeats the last range spec forever.
165    pub fn is_infinite(&self) -> bool {
166        self.0
167            .iter()
168            .next_back()
169            .map(|(_, v)| !v.is_empty())
170            .unwrap_or_default()
171    }
172
173    pub fn iter_infinite(&self) -> ChunkRangesSeqIterInfinite<'_> {
174        ChunkRangesSeqIterInfinite {
175            current: chunk_ranges_empty(),
176            offset: 0,
177            remaining: self.0.iter().peekable(),
178        }
179    }
180
181    pub fn iter(&self) -> ChunkRangesSeqIter<'_> {
182        ChunkRangesSeqIter {
183            current: chunk_ranges_empty(),
184            offset: 0,
185            remaining: self.0.iter().peekable(),
186        }
187    }
188}
189
190fn from_ranges_inner(
191    ranges: impl IntoIterator<Item = ChunkRanges>,
192) -> (SmallVec<[(u64, ChunkRanges); 2]>, u64) {
193    let mut res = SmallVec::new();
194    let mut i = 0;
195    for range in ranges.into_iter() {
196        if range
197            != res
198                .iter()
199                .next_back()
200                .map(|(_, v)| v)
201                .unwrap_or(&ChunkRanges::empty())
202        {
203            res.push((i, range));
204        }
205        i += 1;
206    }
207    (res, i)
208}
209
210/// An infinite iterator yielding [`RangeSpec`]s for each blob in a sequence.
211///
212/// The first item yielded is the [`RangeSpec`] for the first blob in the sequence, the
213/// next item is the [`RangeSpec`] for the next blob, etc.
214#[derive(Debug)]
215pub struct ChunkRangesSeqIterInfinite<'a> {
216    /// current value
217    current: &'a ChunkRanges,
218    /// current offset
219    offset: u64,
220    /// remaining ranges
221    remaining: std::iter::Peekable<std::slice::Iter<'a, (u64, ChunkRanges)>>,
222}
223
224impl<'a> ChunkRangesSeqIterInfinite<'a> {
225    /// True if we are at the end of the iterator.
226    ///
227    /// This does not mean that the iterator is terminated, it just means that
228    /// it will repeat the same value forever.
229    pub fn is_at_end(&mut self) -> bool {
230        self.remaining.peek().is_none()
231    }
232}
233
234impl<'a> Iterator for ChunkRangesSeqIterInfinite<'a> {
235    type Item = &'a ChunkRanges;
236
237    fn next(&mut self) -> Option<Self::Item> {
238        loop {
239            match self.remaining.peek() {
240                Some((offset, _)) if self.offset < *offset => {
241                    // emit current value until we reach the next offset
242                    self.offset += 1;
243                    return Some(self.current);
244                }
245                None => {
246                    // no more values, just repeat current forever
247                    self.offset += 1;
248                    return Some(self.current);
249                }
250                Some((_, ranges)) => {
251                    // get next current value, new count, and set remaining
252                    self.current = ranges;
253                    self.remaining.next();
254                }
255            }
256        }
257    }
258}
259
260/// An infinite iterator yielding [`RangeSpec`]s for each blob in a sequence.
261///
262/// The first item yielded is the [`RangeSpec`] for the first blob in the sequence, the
263/// next item is the [`RangeSpec`] for the next blob, etc.
264#[derive(Debug)]
265pub struct ChunkRangesSeqIter<'a> {
266    /// current value
267    current: &'a ChunkRanges,
268    /// current offset
269    offset: u64,
270    /// remaining ranges
271    remaining: std::iter::Peekable<std::slice::Iter<'a, (u64, ChunkRanges)>>,
272}
273
274impl<'a> Iterator for ChunkRangesSeqIter<'a> {
275    type Item = &'a ChunkRanges;
276
277    fn next(&mut self) -> Option<Self::Item> {
278        match self.remaining.peek()? {
279            (offset, _) if self.offset < *offset => {
280                // emit current value until we reach the next offset
281                self.offset += 1;
282                Some(self.current)
283            }
284            (_, ranges) => {
285                // get next current value, new count, and set remaining
286                self.current = ranges;
287                self.remaining.next();
288                self.offset += 1;
289                Some(self.current)
290            }
291        }
292    }
293}
294
295/// An iterator over blobs in the sequence with a non-empty range specs.
296///
297/// default is what to use if the children of this RequestRangeSpec are empty.
298#[derive(Debug)]
299pub struct NonEmptyRequestRangeSpecIter<'a> {
300    inner: ChunkRangesSeqIterInfinite<'a>,
301    count: u64,
302}
303
304impl<'a> NonEmptyRequestRangeSpecIter<'a> {
305    fn new(inner: ChunkRangesSeqIterInfinite<'a>) -> Self {
306        Self { inner, count: 0 }
307    }
308
309    pub(crate) fn offset(&self) -> u64 {
310        self.count
311    }
312
313    pub fn is_at_end(&mut self) -> bool {
314        self.inner.is_at_end()
315    }
316}
317
318impl<'a> Iterator for NonEmptyRequestRangeSpecIter<'a> {
319    type Item = (u64, &'a ChunkRanges);
320
321    fn next(&mut self) -> Option<Self::Item> {
322        loop {
323            // unwrapping is safe because we know that the inner iterator will never terminate
324            let curr = self.inner.next().unwrap();
325            let count = self.count;
326            // increase count in any case until we are at the end of possible u64 values
327            // we are unlikely to ever reach this limit.
328            self.count = self.count.checked_add(1)?;
329            // yield only if the current value is non-empty
330            if !curr.is_empty() {
331                break Some((count, curr));
332            } else if self.inner.is_at_end() {
333                // terminate instead of looping until we run out of u64 values
334                break None;
335            }
336        }
337    }
338}
339
340/// A chunk range specification as a sequence of chunk offsets.
341///
342/// This is just the wire encoding of a [`ChunkRanges`]. You should rarely have to
343/// interact with this directly.
344///
345/// Offsets encode alternating spans starting on 0, where the first span is always
346/// deselected.
347///
348/// ## Examples:
349///
350/// - `[2, 5, 3, 1]` encodes five spans, of which two are selected:
351///   - `[0, 0+2) = [0, 2)` is not selected.
352///   - `[2, 2+5) = [2, 7)` is selected.
353///   - `[7, 7+3) = [7, 10)` is not selected.
354///   - `[10, 10+1) = [10, 11)` is selected.
355///   - `[11, inf)` is deselected.
356///
357///   Such a [`RangeSpec`] can be converted to a [`ChunkRanges`] using containing just the
358///   selected ranges: `ChunkRanges{2..7, 10..11}` using [`RangeSpec::to_chunk_ranges`].
359///
360/// - An empty range selects no spans, encoded as `[]`. This means nothing of the blob is
361///   selected.
362///
363/// - To select an entire blob create a single half-open span starting at the first chunk:
364///   `[0]`.
365///
366/// - To select the tail of a blob, create a single half-open span: `[15]`.
367///
368/// This is a SmallVec so we can avoid allocations for the very common case of a single
369/// chunk range.
370#[derive(Deserialize, Serialize, PartialEq, Eq, Clone, Hash)]
371#[repr(transparent)]
372pub struct RangeSpec(SmallVec<[u64; 2]>);
373
374impl RangeSpec {
375    /// Creates a new [`RangeSpec`] from a range set.
376    pub fn new(ranges: impl AsRef<ChunkRangesRef>) -> Self {
377        let ranges = ranges.as_ref().boundaries();
378        let mut res = SmallVec::new();
379        if let Some((start, rest)) = ranges.split_first() {
380            let mut prev = start.0;
381            res.push(prev);
382            for v in rest {
383                res.push(v.0 - prev);
384                prev = v.0;
385            }
386        }
387        Self(res)
388    }
389
390    /// A [`RangeSpec`] selecting nothing from the blob.
391    ///
392    /// This is called "empty" because the representation is an empty set.
393    pub const EMPTY: Self = Self(SmallVec::new_const());
394
395    /// Creates a [`RangeSpec`] selecting the entire blob.
396    pub fn all() -> Self {
397        Self(smallvec![0])
398    }
399
400    /// Creates a [`RangeSpec`] selecting the last chunk, which is also a size proof.
401    pub fn verified_size() -> Self {
402        Self(smallvec![u64::MAX])
403    }
404
405    /// Checks if this [`RangeSpec`] does not select any chunks in the blob.
406    pub fn is_empty(&self) -> bool {
407        self.0.is_empty()
408    }
409
410    /// Checks if this [`RangeSpec`] selects all chunks in the blob.
411    pub fn is_all(&self) -> bool {
412        self.0.len() == 1 && self.0[0] == 0
413    }
414
415    /// Returns the number of chunks selected by this [`RangeSpec`], as a tuple
416    /// with the minimum and maximum number of chunks.
417    pub fn chunks(&self) -> (u64, Option<u64>) {
418        let mut min = 0;
419        for i in 0..self.0.len() / 2 {
420            min += self.0[2 * i + 1];
421        }
422        let max = if self.0.len() % 2 != 0 {
423            // spec is open ended
424            None
425        } else {
426            Some(min)
427        };
428        (min, max)
429    }
430
431    /// Creates a [`ChunkRanges`] from this [`RangeSpec`].
432    pub fn to_chunk_ranges(&self) -> ChunkRanges {
433        // this is zero allocation for single ranges
434        // todo: optimize this in range collections
435        let mut ranges = ChunkRanges::empty();
436        let mut current = ChunkNum(0);
437        let mut on = false;
438        for &width in self.0.iter() {
439            let next = current + width;
440            if on {
441                ranges |= ChunkRanges::from(current..next);
442            }
443            current = next;
444            on = !on;
445        }
446        if on {
447            ranges |= ChunkRanges::from(current..);
448        }
449        ranges
450    }
451}
452
453impl fmt::Debug for RangeSpec {
454    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
455        if self.is_all() {
456            write!(f, "all")
457        } else if self.is_empty() {
458            write!(f, "empty")
459        } else if !f.alternate() {
460            f.debug_list()
461                .entries(self.to_chunk_ranges().iter())
462                .finish()
463        } else {
464            f.debug_list().entries(self.0.iter()).finish()
465        }
466    }
467}
468
469mod wire {
470
471    use serde::{Deserialize, Serialize};
472    use smallvec::SmallVec;
473
474    use super::{ChunkRangesSeq, RangeSpec};
475
476    #[derive(Deserialize, Serialize)]
477    pub struct RangeSpecSeq(SmallVec<[(u64, RangeSpec); 2]>);
478
479    impl From<RangeSpecSeq> for ChunkRangesSeq {
480        fn from(wire: RangeSpecSeq) -> Self {
481            let mut offset = 0;
482            let mut res = SmallVec::new();
483            for (delta, spec) in wire.0.iter() {
484                offset += *delta;
485                res.push((offset, spec.to_chunk_ranges()));
486            }
487            Self(res)
488        }
489    }
490
491    impl From<ChunkRangesSeq> for RangeSpecSeq {
492        fn from(value: ChunkRangesSeq) -> Self {
493            let mut res = SmallVec::new();
494            let mut offset = 0;
495            for (i, r) in value.0.iter() {
496                let delta = *i - offset;
497                res.push((delta, RangeSpec::new(r)));
498                offset = *i;
499            }
500            Self(res)
501        }
502    }
503}
504
505#[cfg(test)]
506mod tests {
507    use std::ops::Range;
508
509    use iroh_test::{assert_eq_hex, hexdump::parse_hexdump};
510    use proptest::prelude::*;
511
512    use super::*;
513    use crate::util::ChunkRangesExt;
514
515    fn ranges(value_range: Range<u64>) -> impl Strategy<Value = ChunkRanges> {
516        prop::collection::vec((value_range.clone(), value_range), 0..16).prop_map(|v| {
517            let mut res = ChunkRanges::empty();
518            for (a, b) in v {
519                let start = a.min(b);
520                let end = a.max(b);
521                res |= ChunkRanges::chunks(start..end);
522            }
523            res
524        })
525    }
526
527    fn range_spec_seq_roundtrip_impl(ranges: &[ChunkRanges]) -> Vec<ChunkRanges> {
528        let spec = ChunkRangesSeq::from_ranges(ranges.iter().cloned());
529        spec.iter_infinite()
530            .take(ranges.len())
531            .cloned()
532            .collect::<Vec<_>>()
533    }
534
535    fn range_spec_seq_bytes_roundtrip_impl(ranges: &[ChunkRanges]) -> Vec<ChunkRanges> {
536        let spec = ChunkRangesSeq::from_ranges(ranges.iter().cloned());
537        let bytes = postcard::to_allocvec(&spec).unwrap();
538        let spec2: ChunkRangesSeq = postcard::from_bytes(&bytes).unwrap();
539        spec2
540            .iter_infinite()
541            .take(ranges.len())
542            .cloned()
543            .collect::<Vec<_>>()
544    }
545
546    fn mk_case(case: Vec<Range<u64>>) -> Vec<ChunkRanges> {
547        case.iter()
548            .map(|x| ChunkRanges::chunks(x.start..x.end))
549            .collect::<Vec<_>>()
550    }
551
552    #[test]
553    fn range_spec_wire_format() {
554        // a list of commented hex dumps and the corresponding range spec
555        let cases = [
556            (RangeSpec::EMPTY, "00"),
557            (
558                RangeSpec::all(),
559                r"
560                    01 # length prefix - 1 element
561                    00 # span width - 0. everything stating from 0 is included
562                ",
563            ),
564            (
565                RangeSpec::new(ChunkRanges::chunks(64..)),
566                r"
567                    01 # length prefix - 1 element
568                    40 # span width - 64. everything starting from 64 is included
569                ",
570            ),
571            (
572                RangeSpec::new(ChunkRanges::chunks(10000..)),
573                r"
574                    01 # length prefix - 1 element
575                    904E # span width - 10000, 904E in postcard varint encoding. everything starting from 10000 is included
576                ",
577            ),
578            (
579                RangeSpec::new(ChunkRanges::chunks(..64)),
580                r"
581                    02 # length prefix - 2 elements
582                    00 # span width - 0. everything stating from 0 is included
583                    40 # span width - 64. everything starting from 64 is excluded
584                ",
585            ),
586            (
587                RangeSpec::new(&ChunkRanges::chunks(1..3) | &ChunkRanges::chunks(9..13)),
588                r"
589                    04 # length prefix - 4 elements
590                    01 # span width - 1
591                    02 # span width - 2 (3 - 1)
592                    06 # span width - 6 (9 - 3)
593                    04 # span width - 4 (13 - 9)
594                ",
595            ),
596        ];
597        for (case, expected_hex) in cases {
598            let expected = parse_hexdump(expected_hex).unwrap();
599            assert_eq_hex!(expected, postcard::to_stdvec(&case).unwrap());
600        }
601    }
602
603    #[test]
604    fn range_spec_seq_wire_format() {
605        let cases = [
606            (ChunkRangesSeq::empty(), "00"),
607            (
608                ChunkRangesSeq::all(),
609                r"
610                    01 # 1 tuple in total
611                    # first tuple
612                    00 # span 0 until start
613                    0100 # 1 element, RangeSpec::all()
614            ",
615            ),
616            (
617                ChunkRangesSeq::from_ranges([
618                    ChunkRanges::chunks(1..3),
619                    ChunkRanges::chunks(7..13),
620                ]),
621                r"
622                    03 # 3 tuples in total
623                    # first tuple
624                    00 # span 0 until start
625                    020102 # range 1..3
626                    # second tuple
627                    01 # span 1 until next
628                    020706 # range 7..13
629                    # third tuple
630                    01 # span 1 until next
631                    00 # empty range forever from now
632                ",
633            ),
634            (
635                ChunkRangesSeq::from_ranges_infinite([
636                    ChunkRanges::empty(),
637                    ChunkRanges::empty(),
638                    ChunkRanges::empty(),
639                    ChunkRanges::chunks(7..),
640                    ChunkRanges::all(),
641                ]),
642                r"
643                    02 # 2 tuples in total
644                    # first tuple
645                    03 # span 3 until start (first 3 elements are empty)
646                    01 07 # range 7..
647                    # second tuple
648                    01 # span 1 until next (1 element is 7..)
649                    01 00 # ChunkRanges::all() forever from now
650                ",
651            ),
652        ];
653        for (case, expected_hex) in cases {
654            let expected = parse_hexdump(expected_hex).unwrap();
655            assert_eq_hex!(expected, postcard::to_stdvec(&case).unwrap());
656        }
657    }
658
659    /// Test that the roundtrip from [`Vec<ChunkRanges>`] via [`RangeSpec`] to [`RangeSpecSeq`]  and back works.
660    #[test]
661    fn range_spec_seq_roundtrip_cases() {
662        for case in [
663            vec![0..1, 0..0],
664            vec![1..2, 1..2, 1..2],
665            vec![1..2, 1..2, 2..3, 2..3],
666        ] {
667            let case = mk_case(case);
668            let expected = case.clone();
669            let actual = range_spec_seq_roundtrip_impl(&case);
670            assert_eq!(expected, actual);
671        }
672    }
673
674    /// Test that the creation of a [`RangeSpecSeq`] from a sequence of [`ChunkRanges`]s canonicalizes the result.
675    #[test]
676    fn range_spec_seq_canonical() {
677        for (case, expected_count) in [
678            (vec![0..1, 0..0], 2),
679            (vec![1..2, 1..2, 1..2], 2),
680            (vec![1..2, 1..2, 2..3, 2..3], 3),
681        ] {
682            let case = mk_case(case);
683            let spec = ChunkRangesSeq::from_ranges(case);
684            assert_eq!(spec.0.len(), expected_count);
685        }
686    }
687
688    proptest! {
689
690        #[test]
691        fn range_spec_roundtrip(ranges in ranges(0..1000)) {
692            let spec = RangeSpec::new(&ranges);
693            let ranges2 = spec.to_chunk_ranges();
694            prop_assert_eq!(ranges, ranges2);
695        }
696
697        #[test]
698        fn range_spec_seq_roundtrip(ranges in proptest::collection::vec(ranges(0..100), 0..10)) {
699            let expected = ranges.clone();
700            let actual = range_spec_seq_roundtrip_impl(&ranges);
701            prop_assert_eq!(expected, actual);
702        }
703
704        #[test]
705        fn range_spec_seq_bytes_roundtrip(ranges in proptest::collection::vec(ranges(0..100), 0..10)) {
706            let expected = ranges.clone();
707            let actual = range_spec_seq_bytes_roundtrip_impl(&ranges);
708            prop_assert_eq!(expected, actual);
709        }
710    }
711}