iroh_bytes/
util.rs

1//! Utility functions and types.
2use bao_tree::{io::outboard::PreOrderMemOutboard, BaoTree, ChunkRanges};
3use bytes::Bytes;
4use derive_more::{Debug, Display, From, Into};
5use range_collections::range_set::RangeSetRange;
6use serde::{Deserialize, Serialize};
7use std::{borrow::Borrow, fmt, sync::Arc, time::SystemTime};
8
9use crate::{store::Store, BlobFormat, Hash, HashAndFormat, IROH_BLOCK_SIZE};
10
11pub mod io;
12mod mem_or_file;
13pub mod progress;
14pub use mem_or_file::MemOrFile;
15mod sparse_mem_file;
16pub use sparse_mem_file::SparseMemFile;
17
18/// A tag
19#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, From, Into)]
20pub struct Tag(pub Bytes);
21
22#[cfg(feature = "redb")]
23mod redb_support {
24    use super::Tag;
25    use bytes::Bytes;
26    use redb::{Key as RedbKey, Value as RedbValue};
27
28    impl RedbValue for Tag {
29        type SelfType<'a> = Self;
30
31        type AsBytes<'a> = bytes::Bytes;
32
33        fn fixed_width() -> Option<usize> {
34            None
35        }
36
37        fn from_bytes<'a>(data: &'a [u8]) -> Self::SelfType<'a>
38        where
39            Self: 'a,
40        {
41            Self(Bytes::copy_from_slice(data))
42        }
43
44        fn as_bytes<'a, 'b: 'a>(value: &'a Self::SelfType<'b>) -> Self::AsBytes<'a>
45        where
46            Self: 'a,
47            Self: 'b,
48        {
49            value.0.clone()
50        }
51
52        fn type_name() -> redb::TypeName {
53            redb::TypeName::new("Tag")
54        }
55    }
56
57    impl RedbKey for Tag {
58        fn compare(data1: &[u8], data2: &[u8]) -> std::cmp::Ordering {
59            data1.cmp(data2)
60        }
61    }
62}
63
64impl Borrow<[u8]> for Tag {
65    fn borrow(&self) -> &[u8] {
66        self.0.as_ref()
67    }
68}
69
70impl From<String> for Tag {
71    fn from(value: String) -> Self {
72        Self(Bytes::from(value))
73    }
74}
75
76impl From<&str> for Tag {
77    fn from(value: &str) -> Self {
78        Self(Bytes::from(value.to_owned()))
79    }
80}
81
82impl Display for Tag {
83    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
84        let bytes = self.0.as_ref();
85        match std::str::from_utf8(bytes) {
86            Ok(s) => write!(f, "\"{}\"", s),
87            Err(_) => write!(f, "{}", hex::encode(bytes)),
88        }
89    }
90}
91
92struct DD<T: fmt::Display>(T);
93
94impl<T: fmt::Display> fmt::Debug for DD<T> {
95    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
96        fmt::Display::fmt(&self.0, f)
97    }
98}
99
100impl Debug for Tag {
101    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
102        f.debug_tuple("Tag").field(&DD(self)).finish()
103    }
104}
105
106impl Tag {
107    /// Create a new tag that does not exist yet.
108    pub fn auto(time: SystemTime, exists: impl Fn(&[u8]) -> bool) -> Self {
109        let now = chrono::DateTime::<chrono::Utc>::from(time);
110        let mut i = 0;
111        loop {
112            let mut text = format!("auto-{}", now.format("%Y-%m-%dT%H:%M:%S%.3fZ"));
113            if i != 0 {
114                text.push_str(&format!("-{}", i));
115            }
116            if !exists(text.as_bytes()) {
117                return Self::from(text);
118            }
119            i += 1;
120        }
121    }
122}
123
124/// A set of merged [`SetTagOption`]s for a blob.
125#[derive(Debug, Default)]
126pub struct TagSet {
127    auto: bool,
128    named: Vec<Tag>,
129}
130
131impl TagSet {
132    /// Insert a new tag into the set.
133    pub fn insert(&mut self, tag: SetTagOption) {
134        match tag {
135            SetTagOption::Auto => self.auto = true,
136            SetTagOption::Named(tag) => {
137                if !self.named.iter().any(|t| t == &tag) {
138                    self.named.push(tag)
139                }
140            }
141        }
142    }
143
144    /// Convert the [`TagSet`] into a list of [`SetTagOption`].
145    pub fn into_tags(self) -> impl Iterator<Item = SetTagOption> {
146        self.auto
147            .then_some(SetTagOption::Auto)
148            .into_iter()
149            .chain(self.named.into_iter().map(SetTagOption::Named))
150    }
151
152    /// Apply the tags in the [`TagSet`] to the database.
153    pub async fn apply<D: Store>(
154        self,
155        db: &D,
156        hash_and_format: HashAndFormat,
157    ) -> std::io::Result<()> {
158        let tags = self.into_tags();
159        for tag in tags {
160            match tag {
161                SetTagOption::Named(tag) => {
162                    db.set_tag(tag, Some(hash_and_format)).await?;
163                }
164                SetTagOption::Auto => {
165                    db.create_tag(hash_and_format).await?;
166                }
167            }
168        }
169        Ok(())
170    }
171}
172
173/// Option for commands that allow setting a tag
174#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
175pub enum SetTagOption {
176    /// A tag will be automatically generated
177    Auto,
178    /// The tag is explicitly named
179    Named(Tag),
180}
181
182/// A trait for things that can track liveness of blobs and collections.
183///
184/// This trait works together with [TempTag] to keep track of the liveness of a
185/// blob or collection.
186///
187/// It is important to include the format in the liveness tracking, since
188/// protecting a collection means protecting the blob and all its children,
189/// whereas protecting a raw blob only protects the blob itself.
190pub trait LivenessTracker: std::fmt::Debug + Send + Sync + 'static {
191    /// Called on clone
192    fn on_clone(&self, inner: &HashAndFormat);
193    /// Called on drop
194    fn on_drop(&self, inner: &HashAndFormat);
195}
196
197/// A hash and format pair that is protected from garbage collection.
198///
199/// If format is raw, this will protect just the blob
200/// If format is collection, this will protect the collection and all blobs in it
201#[derive(Debug)]
202pub struct TempTag {
203    /// The hash and format we are pinning
204    inner: HashAndFormat,
205    /// liveness tracker
206    liveness: Option<Arc<dyn LivenessTracker>>,
207}
208
209impl TempTag {
210    /// Create a new temp tag for the given hash and format
211    ///
212    /// This should only be used by store implementations.
213    ///
214    /// The caller is responsible for increasing the refcount on creation and to
215    /// make sure that temp tags that are created between a mark phase and a sweep
216    /// phase are protected.
217    pub fn new(inner: HashAndFormat, liveness: Option<Arc<dyn LivenessTracker>>) -> Self {
218        if let Some(liveness) = liveness.as_ref() {
219            liveness.on_clone(&inner);
220        }
221        Self { inner, liveness }
222    }
223
224    /// The hash of the pinned item
225    pub fn inner(&self) -> &HashAndFormat {
226        &self.inner
227    }
228
229    /// The hash of the pinned item
230    pub fn hash(&self) -> &Hash {
231        &self.inner.hash
232    }
233
234    /// The format of the pinned item
235    pub fn format(&self) -> BlobFormat {
236        self.inner.format
237    }
238
239    /// Keep the item alive until the end of the process
240    pub fn leak(mut self) {
241        // set the liveness tracker to None, so that the refcount is not decreased
242        // during drop. This means that the refcount will never reach 0 and the
243        // item will not be gced until the end of the process.
244        self.liveness = None;
245    }
246}
247
248impl Clone for TempTag {
249    fn clone(&self) -> Self {
250        Self::new(self.inner, self.liveness.clone())
251    }
252}
253
254impl Drop for TempTag {
255    fn drop(&mut self) {
256        if let Some(liveness) = self.liveness.as_ref() {
257            liveness.on_drop(&self.inner);
258        }
259    }
260}
261
262/// Get the number of bytes given a set of chunk ranges and the total size.
263///
264/// If some ranges are out of bounds, they will be clamped to the size.
265pub fn total_bytes(ranges: ChunkRanges, size: u64) -> u64 {
266    ranges
267        .iter()
268        .map(|range| {
269            let (start, end) = match range {
270                RangeSetRange::Range(r) => {
271                    (r.start.to_bytes().min(size), r.end.to_bytes().min(size))
272                }
273                RangeSetRange::RangeFrom(range) => (range.start.to_bytes().min(size), size),
274            };
275            end.saturating_sub(start)
276        })
277        .reduce(u64::saturating_add)
278        .unwrap_or_default()
279}
280
281/// A non-sendable marker type
282#[derive(Debug)]
283pub(crate) struct NonSend {
284    _marker: std::marker::PhantomData<std::rc::Rc<()>>,
285}
286
287impl NonSend {
288    /// Create a new non-sendable marker.
289    #[allow(dead_code)]
290    pub const fn new() -> Self {
291        Self {
292            _marker: std::marker::PhantomData,
293        }
294    }
295}
296
297/// copy a limited slice from a slice as a `Bytes`.
298pub(crate) fn copy_limited_slice(bytes: &[u8], offset: u64, len: usize) -> Bytes {
299    bytes[limited_range(offset, len, bytes.len())]
300        .to_vec()
301        .into()
302}
303
304pub(crate) fn limited_range(offset: u64, len: usize, buf_len: usize) -> std::ops::Range<usize> {
305    if offset < buf_len as u64 {
306        let start = offset as usize;
307        let end = start.saturating_add(len).min(buf_len);
308        start..end
309    } else {
310        0..0
311    }
312}
313
314/// zero copy get a limited slice from a `Bytes` as a `Bytes`.
315#[allow(dead_code)]
316pub(crate) fn get_limited_slice(bytes: &Bytes, offset: u64, len: usize) -> Bytes {
317    bytes.slice(limited_range(offset, len, bytes.len()))
318}
319
320/// Compute raw outboard size, without the size header.
321#[allow(dead_code)]
322pub(crate) fn raw_outboard_size(size: u64) -> u64 {
323    BaoTree::new(size, IROH_BLOCK_SIZE).outboard_size()
324}
325
326/// Compute raw outboard, without the size header.
327pub(crate) fn raw_outboard(data: &[u8]) -> (Vec<u8>, Hash) {
328    let res = PreOrderMemOutboard::create(data, IROH_BLOCK_SIZE);
329    (res.data, res.root.into())
330}