1use std::{collections::BTreeMap, future::Future};
3
4use anyhow::Context;
5use bao_tree::blake3;
6use bytes::Bytes;
7use iroh_io::AsyncSliceReaderExt;
8use serde::{Deserialize, Serialize};
9
10use crate::{
11 get::{fsm, Stats},
12 hashseq::HashSeq,
13 store::MapEntry,
14 util::TempTag,
15 BlobFormat, Hash,
16};
17
18#[derive(Clone, Debug, PartialEq, Deserialize, Serialize, Default)]
22pub struct Collection {
23 blobs: Vec<(String, Hash)>,
25}
26
27impl std::ops::Index<usize> for Collection {
28 type Output = (String, Hash);
29
30 fn index(&self, index: usize) -> &Self::Output {
31 &self.blobs[index]
32 }
33}
34
35impl<K, V> Extend<(K, V)> for Collection
36where
37 K: Into<String>,
38 V: Into<Hash>,
39{
40 fn extend<T: IntoIterator<Item = (K, V)>>(&mut self, iter: T) {
41 self.blobs
42 .extend(iter.into_iter().map(|(k, v)| (k.into(), v.into())));
43 }
44}
45
46impl<K, V> FromIterator<(K, V)> for Collection
47where
48 K: Into<String>,
49 V: Into<Hash>,
50{
51 fn from_iter<T: IntoIterator<Item = (K, V)>>(iter: T) -> Self {
52 let mut res = Self::default();
53 res.extend(iter);
54 res
55 }
56}
57
58impl IntoIterator for Collection {
59 type Item = (String, Hash);
60 type IntoIter = std::vec::IntoIter<Self::Item>;
61
62 fn into_iter(self) -> Self::IntoIter {
63 self.blobs.into_iter()
64 }
65}
66
67pub trait SimpleStore {
69 fn load(&self, hash: Hash) -> impl Future<Output = anyhow::Result<Bytes>> + Send + '_;
71}
72
73#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
77struct CollectionMeta {
78 header: [u8; 13], names: Vec<String>,
80}
81
82impl Collection {
83 pub const HEADER: &'static [u8; 13] = b"CollectionV0.";
87
88 pub fn to_blobs(&self) -> impl DoubleEndedIterator<Item = Bytes> {
94 let meta = CollectionMeta {
95 header: *Self::HEADER,
96 names: self.names(),
97 };
98 let meta_bytes = postcard::to_stdvec(&meta).unwrap();
99 let meta_bytes_hash = blake3::hash(&meta_bytes).into();
100 let links = std::iter::once(meta_bytes_hash)
101 .chain(self.links())
102 .collect::<HashSeq>();
103 let links_bytes = links.into_inner();
104 [meta_bytes.into(), links_bytes].into_iter()
105 }
106
107 pub async fn read_fsm(
112 fsm_at_start_root: fsm::AtStartRoot,
113 ) -> anyhow::Result<(fsm::EndBlobNext, HashSeq, Collection)> {
114 let (next, links) = {
115 let curr = fsm_at_start_root.next();
116 let (curr, data) = curr.concatenate_into_vec().await?;
117 let links = HashSeq::new(data.into()).context("links could not be parsed")?;
118 (curr.next(), links)
119 };
120 let fsm::EndBlobNext::MoreChildren(at_meta) = next else {
121 anyhow::bail!("expected meta");
122 };
123 let (next, collection) = {
124 let mut children = links.clone();
125 let meta_link = children.pop_front().context("meta link not found")?;
126 let curr = at_meta.next(meta_link);
127 let (curr, names) = curr.concatenate_into_vec().await?;
128 let names = postcard::from_bytes::<CollectionMeta>(&names)?;
129 anyhow::ensure!(
130 names.header == *Self::HEADER,
131 "expected header {:?}, got {:?}",
132 Self::HEADER,
133 names.header
134 );
135 let collection = Collection::from_parts(children, names);
136 (curr.next(), collection)
137 };
138 Ok((next, links, collection))
139 }
140
141 pub async fn read_fsm_all(
145 fsm_at_start_root: crate::get::fsm::AtStartRoot,
146 ) -> anyhow::Result<(Collection, BTreeMap<u64, Bytes>, Stats)> {
147 let (next, links, collection) = Self::read_fsm(fsm_at_start_root).await?;
148 let mut res = BTreeMap::new();
149 let mut curr = next;
150 let end = loop {
151 match curr {
152 fsm::EndBlobNext::MoreChildren(more) => {
153 let child_offset = more.child_offset();
154 let Some(hash) = links.get(usize::try_from(child_offset)?) else {
155 break more.finish();
156 };
157 let header = more.next(hash);
158 let (next, blob) = header.concatenate_into_vec().await?;
159 res.insert(child_offset - 1, blob.into());
160 curr = next.next();
161 }
162 fsm::EndBlobNext::Closing(closing) => break closing,
163 }
164 };
165 let stats = end.next().await?;
166 Ok((collection, res, stats))
167 }
168
169 pub async fn load(root: Hash, store: &impl SimpleStore) -> anyhow::Result<Self> {
171 let hs = store.load(root).await?;
172 let hs = HashSeq::try_from(hs)?;
173 let meta_hash = hs.iter().next().context("empty hash seq")?;
174 let meta = store.load(meta_hash).await?;
175 let meta: CollectionMeta = postcard::from_bytes(&meta)?;
176 anyhow::ensure!(
177 meta.names.len() + 1 == hs.len(),
178 "names and links length mismatch"
179 );
180 Ok(Self::from_parts(hs.into_iter().skip(1), meta))
181 }
182
183 pub async fn load_db<D>(db: &D, root: &Hash) -> anyhow::Result<Self>
188 where
189 D: crate::store::Map,
190 {
191 let links_entry = db.get(root).await?.context("links not found")?;
192 anyhow::ensure!(links_entry.is_complete(), "links not complete");
193 let links_bytes = links_entry.data_reader().await?.read_to_end().await?;
194 let mut links = HashSeq::try_from(links_bytes)?;
195 let meta_hash = links.pop_front().context("meta hash not found")?;
196 let meta_entry = db.get(&meta_hash).await?.context("meta not found")?;
197 anyhow::ensure!(links_entry.is_complete(), "links not complete");
198 let meta_bytes = meta_entry.data_reader().await?.read_to_end().await?;
199 let meta: CollectionMeta = postcard::from_bytes(&meta_bytes)?;
200 anyhow::ensure!(
201 meta.names.len() == links.len(),
202 "names and links length mismatch"
203 );
204 Ok(Self::from_parts(links, meta))
205 }
206
207 pub async fn store<D>(self, db: &D) -> anyhow::Result<TempTag>
210 where
211 D: crate::store::Store,
212 {
213 let (links, meta) = self.into_parts();
214 let meta_bytes = postcard::to_stdvec(&meta)?;
215 let meta_tag = db.import_bytes(meta_bytes.into(), BlobFormat::Raw).await?;
216 let links_bytes = std::iter::once(*meta_tag.hash())
217 .chain(links)
218 .collect::<HashSeq>();
219 let links_tag = db
220 .import_bytes(links_bytes.into(), BlobFormat::HashSeq)
221 .await?;
222 Ok(links_tag)
223 }
224
225 fn into_parts(self) -> (Vec<Hash>, CollectionMeta) {
227 let mut names = Vec::with_capacity(self.blobs.len());
228 let mut links = Vec::with_capacity(self.blobs.len());
229 for (name, hash) in self.blobs {
230 names.push(name);
231 links.push(hash);
232 }
233 let meta = CollectionMeta {
234 header: *Self::HEADER,
235 names,
236 };
237 (links, meta)
238 }
239
240 fn from_parts(links: impl IntoIterator<Item = Hash>, meta: CollectionMeta) -> Self {
242 meta.names.into_iter().zip(links).collect()
243 }
244
245 fn links(&self) -> impl Iterator<Item = Hash> + '_ {
247 self.blobs.iter().map(|(_name, hash)| *hash)
248 }
249
250 fn names(&self) -> Vec<String> {
252 self.blobs.iter().map(|(name, _)| name.clone()).collect()
253 }
254
255 pub fn iter(&self) -> impl Iterator<Item = &(String, Hash)> {
257 self.blobs.iter()
258 }
259
260 pub fn len(&self) -> usize {
262 self.blobs.len()
263 }
264
265 pub fn is_empty(&self) -> bool {
267 self.blobs.is_empty()
268 }
269
270 pub fn push(&mut self, name: String, hash: Hash) {
272 self.blobs.push((name, hash));
273 }
274}
275
276#[cfg(test)]
277mod tests {
278 use super::*;
279
280 #[test]
281 fn roundtrip_blob() {
282 let b = (
283 "test".to_string(),
284 blake3::Hash::from_hex(
285 "3aa61c409fd7717c9d9c639202af2fae470c0ef669be7ba2caea5779cb534e9d",
286 )
287 .unwrap()
288 .into(),
289 );
290
291 let mut buf = bytes::BytesMut::zeroed(1024);
292 postcard::to_slice(&b, &mut buf).unwrap();
293 let deserialize_b: (String, Hash) = postcard::from_bytes(&buf).unwrap();
294 assert_eq!(b, deserialize_b);
295 }
296
297 #[test]
298 fn roundtrip_collection_meta() {
299 let expected = CollectionMeta {
300 header: *Collection::HEADER,
301 names: vec!["test".to_string(), "a".to_string(), "b".to_string()],
302 };
303 let mut buf = bytes::BytesMut::zeroed(1024);
304 postcard::to_slice(&expected, &mut buf).unwrap();
305 let actual: CollectionMeta = postcard::from_bytes(&buf).unwrap();
306 assert_eq!(expected, actual);
307 }
308
309 #[tokio::test]
310 async fn collection_store_load() -> testresult::TestResult {
311 let collection = (0..3)
312 .map(|i| {
313 (
314 format!("blob{}", i),
315 crate::Hash::from(blake3::hash(&[i as u8])),
316 )
317 })
318 .collect::<Collection>();
319 let mut root = None;
320 let store = collection
321 .to_blobs()
322 .map(|data| {
323 let hash = crate::Hash::from(blake3::hash(&data));
324 root = Some(hash);
325 (hash, data)
326 })
327 .collect::<TestStore>();
328 let collection2 = Collection::load(root.unwrap(), &store).await?;
329 assert_eq!(collection, collection2);
330 Ok(())
331 }
332
333 struct TestStore(BTreeMap<Hash, Bytes>);
335
336 impl FromIterator<(Hash, Bytes)> for TestStore {
337 fn from_iter<T: IntoIterator<Item = (Hash, Bytes)>>(iter: T) -> Self {
338 Self(iter.into_iter().collect())
339 }
340 }
341
342 impl SimpleStore for TestStore {
343 async fn load(&self, hash: Hash) -> anyhow::Result<Bytes> {
344 self.0.get(&hash).cloned().context("not found")
345 }
346 }
347}