iroh_blobs/format/
collection.rs

1//! The collection type used by iroh
2use std::{collections::BTreeMap, future::Future};
3
4use anyhow::Context;
5use bao_tree::blake3;
6use bytes::Bytes;
7use iroh_io::AsyncSliceReaderExt;
8use serde::{Deserialize, Serialize};
9
10use crate::{
11    get::{fsm, Stats},
12    hashseq::HashSeq,
13    store::MapEntry,
14    util::TempTag,
15    BlobFormat, Hash,
16};
17
18/// A collection of blobs
19///
20/// Note that the format is subject to change.
21#[derive(Clone, Debug, PartialEq, Deserialize, Serialize, Default)]
22pub struct Collection {
23    /// Links to the blobs in this collection
24    blobs: Vec<(String, Hash)>,
25}
26
27impl std::ops::Index<usize> for Collection {
28    type Output = (String, Hash);
29
30    fn index(&self, index: usize) -> &Self::Output {
31        &self.blobs[index]
32    }
33}
34
35impl<K, V> Extend<(K, V)> for Collection
36where
37    K: Into<String>,
38    V: Into<Hash>,
39{
40    fn extend<T: IntoIterator<Item = (K, V)>>(&mut self, iter: T) {
41        self.blobs
42            .extend(iter.into_iter().map(|(k, v)| (k.into(), v.into())));
43    }
44}
45
46impl<K, V> FromIterator<(K, V)> for Collection
47where
48    K: Into<String>,
49    V: Into<Hash>,
50{
51    fn from_iter<T: IntoIterator<Item = (K, V)>>(iter: T) -> Self {
52        let mut res = Self::default();
53        res.extend(iter);
54        res
55    }
56}
57
58impl IntoIterator for Collection {
59    type Item = (String, Hash);
60    type IntoIter = std::vec::IntoIter<Self::Item>;
61
62    fn into_iter(self) -> Self::IntoIter {
63        self.blobs.into_iter()
64    }
65}
66
67/// A simple store trait for loading blobs
68pub trait SimpleStore {
69    /// Load a blob from the store
70    fn load(&self, hash: Hash) -> impl Future<Output = anyhow::Result<Bytes>> + Send + '_;
71}
72
73/// Metadata for a collection
74///
75/// This is the wire format for the metadata blob.
76#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
77struct CollectionMeta {
78    header: [u8; 13], // Must contain "CollectionV0."
79    names: Vec<String>,
80}
81
82impl Collection {
83    /// The header for the collection format.
84    ///
85    /// This is the start of the metadata blob.
86    pub const HEADER: &'static [u8; 13] = b"CollectionV0.";
87
88    /// Convert the collection to an iterator of blobs, with the last being the
89    /// root blob.
90    ///
91    /// To persist the collection, write all the blobs to storage, and use the
92    /// hash of the last blob as the collection hash.
93    pub fn to_blobs(&self) -> impl DoubleEndedIterator<Item = Bytes> {
94        let meta = CollectionMeta {
95            header: *Self::HEADER,
96            names: self.names(),
97        };
98        let meta_bytes = postcard::to_stdvec(&meta).unwrap();
99        let meta_bytes_hash = blake3::hash(&meta_bytes).into();
100        let links = std::iter::once(meta_bytes_hash)
101            .chain(self.links())
102            .collect::<HashSeq>();
103        let links_bytes = links.into_inner();
104        [meta_bytes.into(), links_bytes].into_iter()
105    }
106
107    /// Read the collection from a get fsm.
108    ///
109    /// Returns the fsm at the start of the first child blob (if any),
110    /// the links array, and the collection.
111    pub async fn read_fsm(
112        fsm_at_start_root: fsm::AtStartRoot,
113    ) -> anyhow::Result<(fsm::EndBlobNext, HashSeq, Collection)> {
114        let (next, links) = {
115            let curr = fsm_at_start_root.next();
116            let (curr, data) = curr.concatenate_into_vec().await?;
117            let links = HashSeq::new(data.into()).context("links could not be parsed")?;
118            (curr.next(), links)
119        };
120        let fsm::EndBlobNext::MoreChildren(at_meta) = next else {
121            anyhow::bail!("expected meta");
122        };
123        let (next, collection) = {
124            let mut children = links.clone();
125            let meta_link = children.pop_front().context("meta link not found")?;
126            let curr = at_meta.next(meta_link);
127            let (curr, names) = curr.concatenate_into_vec().await?;
128            let names = postcard::from_bytes::<CollectionMeta>(&names)?;
129            anyhow::ensure!(
130                names.header == *Self::HEADER,
131                "expected header {:?}, got {:?}",
132                Self::HEADER,
133                names.header
134            );
135            let collection = Collection::from_parts(children, names);
136            (curr.next(), collection)
137        };
138        Ok((next, links, collection))
139    }
140
141    /// Read the collection and all it's children from a get fsm.
142    ///
143    /// Returns the collection, a map from blob offsets to bytes, and the stats.
144    pub async fn read_fsm_all(
145        fsm_at_start_root: crate::get::fsm::AtStartRoot,
146    ) -> anyhow::Result<(Collection, BTreeMap<u64, Bytes>, Stats)> {
147        let (next, links, collection) = Self::read_fsm(fsm_at_start_root).await?;
148        let mut res = BTreeMap::new();
149        let mut curr = next;
150        let end = loop {
151            match curr {
152                fsm::EndBlobNext::MoreChildren(more) => {
153                    let child_offset = more.child_offset();
154                    let Some(hash) = links.get(usize::try_from(child_offset)?) else {
155                        break more.finish();
156                    };
157                    let header = more.next(hash);
158                    let (next, blob) = header.concatenate_into_vec().await?;
159                    res.insert(child_offset - 1, blob.into());
160                    curr = next.next();
161                }
162                fsm::EndBlobNext::Closing(closing) => break closing,
163            }
164        };
165        let stats = end.next().await?;
166        Ok((collection, res, stats))
167    }
168
169    /// Create a new collection from a hash sequence and metadata.
170    pub async fn load(root: Hash, store: &impl SimpleStore) -> anyhow::Result<Self> {
171        let hs = store.load(root).await?;
172        let hs = HashSeq::try_from(hs)?;
173        let meta_hash = hs.iter().next().context("empty hash seq")?;
174        let meta = store.load(meta_hash).await?;
175        let meta: CollectionMeta = postcard::from_bytes(&meta)?;
176        anyhow::ensure!(
177            meta.names.len() + 1 == hs.len(),
178            "names and links length mismatch"
179        );
180        Ok(Self::from_parts(hs.into_iter().skip(1), meta))
181    }
182
183    /// Load a collection from a store given a root hash
184    ///
185    /// This assumes that both the links and the metadata of the collection is stored in the store.
186    /// It does not require that all child blobs are stored in the store.
187    pub async fn load_db<D>(db: &D, root: &Hash) -> anyhow::Result<Self>
188    where
189        D: crate::store::Map,
190    {
191        let links_entry = db.get(root).await?.context("links not found")?;
192        anyhow::ensure!(links_entry.is_complete(), "links not complete");
193        let links_bytes = links_entry.data_reader().await?.read_to_end().await?;
194        let mut links = HashSeq::try_from(links_bytes)?;
195        let meta_hash = links.pop_front().context("meta hash not found")?;
196        let meta_entry = db.get(&meta_hash).await?.context("meta not found")?;
197        anyhow::ensure!(links_entry.is_complete(), "links not complete");
198        let meta_bytes = meta_entry.data_reader().await?.read_to_end().await?;
199        let meta: CollectionMeta = postcard::from_bytes(&meta_bytes)?;
200        anyhow::ensure!(
201            meta.names.len() == links.len(),
202            "names and links length mismatch"
203        );
204        Ok(Self::from_parts(links, meta))
205    }
206
207    /// Store a collection in a store. returns the root hash of the collection
208    /// as a TempTag.
209    pub async fn store<D>(self, db: &D) -> anyhow::Result<TempTag>
210    where
211        D: crate::store::Store,
212    {
213        let (links, meta) = self.into_parts();
214        let meta_bytes = postcard::to_stdvec(&meta)?;
215        let meta_tag = db.import_bytes(meta_bytes.into(), BlobFormat::Raw).await?;
216        let links_bytes = std::iter::once(*meta_tag.hash())
217            .chain(links)
218            .collect::<HashSeq>();
219        let links_tag = db
220            .import_bytes(links_bytes.into(), BlobFormat::HashSeq)
221            .await?;
222        Ok(links_tag)
223    }
224
225    /// Split a collection into a sequence of links and metadata
226    fn into_parts(self) -> (Vec<Hash>, CollectionMeta) {
227        let mut names = Vec::with_capacity(self.blobs.len());
228        let mut links = Vec::with_capacity(self.blobs.len());
229        for (name, hash) in self.blobs {
230            names.push(name);
231            links.push(hash);
232        }
233        let meta = CollectionMeta {
234            header: *Self::HEADER,
235            names,
236        };
237        (links, meta)
238    }
239
240    /// Create a new collection from a list of hashes and metadata
241    fn from_parts(links: impl IntoIterator<Item = Hash>, meta: CollectionMeta) -> Self {
242        meta.names.into_iter().zip(links).collect()
243    }
244
245    /// Get the links to the blobs in this collection
246    fn links(&self) -> impl Iterator<Item = Hash> + '_ {
247        self.blobs.iter().map(|(_name, hash)| *hash)
248    }
249
250    /// Get the names of the blobs in this collection
251    fn names(&self) -> Vec<String> {
252        self.blobs.iter().map(|(name, _)| name.clone()).collect()
253    }
254
255    /// Iterate over the blobs in this collection
256    pub fn iter(&self) -> impl Iterator<Item = &(String, Hash)> {
257        self.blobs.iter()
258    }
259
260    /// Get the number of blobs in this collection
261    pub fn len(&self) -> usize {
262        self.blobs.len()
263    }
264
265    /// Check if this collection is empty
266    pub fn is_empty(&self) -> bool {
267        self.blobs.is_empty()
268    }
269
270    /// Add the given blob to the collection.
271    pub fn push(&mut self, name: String, hash: Hash) {
272        self.blobs.push((name, hash));
273    }
274}
275
276#[cfg(test)]
277mod tests {
278    use super::*;
279
280    #[test]
281    fn roundtrip_blob() {
282        let b = (
283            "test".to_string(),
284            blake3::Hash::from_hex(
285                "3aa61c409fd7717c9d9c639202af2fae470c0ef669be7ba2caea5779cb534e9d",
286            )
287            .unwrap()
288            .into(),
289        );
290
291        let mut buf = bytes::BytesMut::zeroed(1024);
292        postcard::to_slice(&b, &mut buf).unwrap();
293        let deserialize_b: (String, Hash) = postcard::from_bytes(&buf).unwrap();
294        assert_eq!(b, deserialize_b);
295    }
296
297    #[test]
298    fn roundtrip_collection_meta() {
299        let expected = CollectionMeta {
300            header: *Collection::HEADER,
301            names: vec!["test".to_string(), "a".to_string(), "b".to_string()],
302        };
303        let mut buf = bytes::BytesMut::zeroed(1024);
304        postcard::to_slice(&expected, &mut buf).unwrap();
305        let actual: CollectionMeta = postcard::from_bytes(&buf).unwrap();
306        assert_eq!(expected, actual);
307    }
308
309    #[tokio::test]
310    async fn collection_store_load() -> testresult::TestResult {
311        let collection = (0..3)
312            .map(|i| {
313                (
314                    format!("blob{}", i),
315                    crate::Hash::from(blake3::hash(&[i as u8])),
316                )
317            })
318            .collect::<Collection>();
319        let mut root = None;
320        let store = collection
321            .to_blobs()
322            .map(|data| {
323                let hash = crate::Hash::from(blake3::hash(&data));
324                root = Some(hash);
325                (hash, data)
326            })
327            .collect::<TestStore>();
328        let collection2 = Collection::load(root.unwrap(), &store).await?;
329        assert_eq!(collection, collection2);
330        Ok(())
331    }
332
333    /// An implementation of a [SimpleStore] for testing
334    struct TestStore(BTreeMap<Hash, Bytes>);
335
336    impl FromIterator<(Hash, Bytes)> for TestStore {
337        fn from_iter<T: IntoIterator<Item = (Hash, Bytes)>>(iter: T) -> Self {
338            Self(iter.into_iter().collect())
339        }
340    }
341
342    impl SimpleStore for TestStore {
343        async fn load(&self, hash: Hash) -> anyhow::Result<Bytes> {
344            self.0.get(&hash).cloned().context("not found")
345        }
346    }
347}