ipfs_unixfs/dir/builder/
buffered.rs

1use super::{DirBuilder, Entry, Leaf, PostOrderIterator, TreeBuildingFailed, TreeOptions};
2use crate::Metadata;
3use alloc::collections::btree_map::Entry::*;
4use cid::Cid;
5
6/// UnixFs directory tree builder which buffers entries until `build()` is called.
7#[derive(Debug)]
8pub struct BufferingTreeBuilder {
9    /// At the root there can be only one element, unless an option was given to create a new
10    /// directory surrounding the root elements.
11    root_builder: DirBuilder,
12    longest_path: usize,
13    // used to generate a unique id for each node; it is used when doing the post order traversal to
14    // recover all children's rendered Cids
15    counter: u64,
16    opts: TreeOptions,
17}
18
19impl Default for BufferingTreeBuilder {
20    fn default() -> Self {
21        Self::new(TreeOptions::default())
22    }
23}
24
25impl BufferingTreeBuilder {
26    /// Construct a new tree builder with the given configuration.
27    pub fn new(opts: TreeOptions) -> Self {
28        BufferingTreeBuilder {
29            root_builder: DirBuilder::root(0),
30            longest_path: 0,
31            counter: 1,
32            opts,
33        }
34    }
35
36    /// Registers the given path to be a link to the cid that follows. The target leaf should be
37    /// either a file, directory or symlink but could of course be anything. It will be treated as
38    /// an opaque link.
39    pub fn put_link(
40        &mut self,
41        full_path: &str,
42        target: Cid,
43        total_size: u64,
44    ) -> Result<(), TreeBuildingFailed> {
45        let leaf = Leaf {
46            link: target,
47            total_size,
48        };
49
50        self.modify_with(full_path, |parent, basename, _| {
51            parent
52                .put_leaf(basename, leaf)
53                .map_err(|_| TreeBuildingFailed::DuplicatePath(full_path.to_string()))
54        })
55    }
56
57    /// Directories get "put" implicitly through the put files, and directories need to be adjusted
58    /// only when wanting them to have metadata.
59    pub fn set_metadata(
60        &mut self,
61        full_path: &str,
62        metadata: Metadata,
63    ) -> Result<(), TreeBuildingFailed> {
64        // create all paths along the way
65        //
66        // set if not set, error otherwise? FIXME: doesn't error atm
67        self.modify_with(full_path, |parent, basename, id| {
68            parent
69                .add_or_get_node(basename, id)
70                .map_err(|_| TreeBuildingFailed::LeafAsDirectory(full_path.to_string()))?
71                .set_metadata(metadata);
72            Ok(())
73        })
74    }
75
76    fn modify_with<F>(&mut self, full_path: &str, f: F) -> Result<(), TreeBuildingFailed>
77    where
78        F: FnOnce(&mut DirBuilder, String, &mut Option<u64>) -> Result<(), TreeBuildingFailed>,
79    {
80        // create all paths along the way
81        //
82        // assuming it's ok to split at '/' since that cannot be escaped in linux at least
83
84        self.longest_path = full_path.len().max(self.longest_path);
85        let mut remaining = full_path.split('/').enumerate().peekable();
86        let mut dir_builder = &mut self.root_builder;
87
88        // check these before to avoid creation of bogus nodes in the tree or having to clean up.
89
90        if full_path.ends_with('/') {
91            return Err(TreeBuildingFailed::PathEndsInSlash(full_path.to_string()));
92        }
93
94        if full_path.contains("//") {
95            return Err(TreeBuildingFailed::RepeatSlashesInPath(
96                full_path.to_string(),
97            ));
98        }
99
100        // needed to avoid borrowing into the DirBuilder::new calling closure
101        let counter = &mut self.counter;
102
103        while let Some((depth, next)) = remaining.next() {
104            let last = remaining.peek().is_none();
105
106            match (depth, next, last) {
107                // this might need to be accepted in case there is just a single file
108                (0, "", true) => {
109                    // accepted: allows unconditional tree building in ipfs-http
110                    // but the resulting tree will have at most single node, which doesn't prompt
111                    // creation of new directories and should be fine.
112                }
113                (0, "", false) => {
114                    // ok to keep this inside the loop; we are yet to create any nodes.
115                    // note the ipfs-http (and for example js-ipfs) normalizes the path by
116                    // removing the slash from the start.
117                    return Err(TreeBuildingFailed::RootedPath(full_path.to_string()));
118                }
119                (_, "", false) => unreachable!("already validated: no repeat slashes"),
120                (_, "", true) => unreachable!("already validated: path does not end in slash"),
121                _ => {}
122            }
123
124            // our first level can be full, depending on the options given
125            let full = depth == 0 && !self.opts.wrap_with_directory && !dir_builder.is_empty();
126
127            if last {
128                let mut next_id = Some(*counter);
129
130                let ret = if full {
131                    Err(TreeBuildingFailed::TooManyRootLevelEntries)
132                } else {
133                    f(dir_builder, next.to_string(), &mut next_id)
134                };
135
136                if next_id.is_none() {
137                    *counter += 1;
138                }
139
140                if ret.is_err() {
141                    // FIXME: there might be a case where we have now stale nodes in our tree but
142                    // cannot figure out an example for that.
143                }
144
145                return ret;
146            }
147
148            let parent_id = dir_builder.id;
149
150            dir_builder = match (full, dir_builder.nodes.entry(next.to_string())) {
151                (_, Occupied(oe)) => oe
152                    .into_mut()
153                    .as_dir_builder()
154                    .map_err(|_| TreeBuildingFailed::LeafAsDirectory(full_path.to_string()))?,
155                (false, Vacant(ve)) => {
156                    let next_id = *counter;
157                    *counter += 1;
158                    ve.insert(Entry::Directory(DirBuilder::new(parent_id, next_id)))
159                        .as_dir_builder()
160                        .expect("safe: we just inserted a DirBuilder")
161                }
162                (true, Vacant(_)) => return Err(TreeBuildingFailed::TooManyRootLevelEntries),
163            };
164        }
165
166        // as the str::split will always return a single element this should not ever be hit
167        unreachable!(
168            "walked the full_path but failed to add anything: {:?}",
169            full_path
170        );
171    }
172
173    /// Called to build the tree. The built tree will have the added files and their implied
174    /// directory structure, along with the directory entries which were created using
175    /// `set_metadata`. To build the whole hierarchy, one must iterate the returned iterator to
176    /// completion while storing the created blocks.
177    ///
178    /// Returned `PostOrderIterator` will use the given `full_path` and `block_buffer` to store
179    /// its data during the walk. `PostOrderIterator` implements `Iterator` while also allowing
180    /// borrowed access via `next_borrowed`.
181    pub fn build(self) -> PostOrderIterator {
182        PostOrderIterator::new(self.root_builder, self.opts, self.longest_path)
183    }
184}
185
186#[cfg(test)]
187mod tests {
188    use super::{
189        super::OwnedTreeNode, BufferingTreeBuilder, Metadata, TreeBuildingFailed, TreeOptions,
190    };
191    use cid::Cid;
192    use core::convert::TryFrom;
193
194    #[test]
195    fn some_directories() {
196        let mut builder = BufferingTreeBuilder::default();
197
198        // foobar\n
199        let five_block_foobar =
200            Cid::try_from("QmRJHYTNvC3hmd9gJQARxLR1QMEincccBV53bBw524yyq6").unwrap();
201
202        builder
203            .put_link("a/b/c/d/e/f/g.txt", five_block_foobar.clone(), 221)
204            .unwrap();
205        builder
206            .put_link("a/b/c/d/e/h.txt", five_block_foobar.clone(), 221)
207            .unwrap();
208        builder
209            .put_link("a/b/c/d/e/i.txt", five_block_foobar, 221)
210            .unwrap();
211
212        let actual = builder
213            .build()
214            .map(|res| res.map(|n| (n.path, n.cid, n.block)))
215            .collect::<Result<Vec<_>, _>>()
216            .unwrap();
217
218        let expected = vec![
219            (
220                "a/b/c/d/e/f",
221                "Qmbgf44ztW9wLcGNRNYGinGQB6SQDQtbHVbkM5MrWms698",
222            ),
223            (
224                "a/b/c/d/e",
225                "Qma1hCr3CuPRAq2Gw4DCNMqsi42Bjs4Bt1MGSS57kNh144",
226            ),
227            ("a/b/c/d", "QmUqaYatcJqiSFdykHXGh4Nog1eMSfDJBeYzcG67KV5Ri4"),
228            ("a/b/c", "QmYwaNBaGpDCNN9XpHmjxVPHmEXZMw9KDY3uikE2UU5fVB"),
229            ("a/b", "QmeAzCPig4o4gBLh2LvP96Sr8MUBrsu2Scw9MTq1EvTDhY"),
230            ("a", "QmSTUFaPwJW8xD4KNRLLQRqVTYtYC29xuhYTJoYPWdzvKp"),
231        ];
232
233        verify_results(expected, actual);
234    }
235
236    #[test]
237    fn empty_path() {
238        let mut builder = BufferingTreeBuilder::default();
239        builder.put_link("", some_cid(0), 1).unwrap();
240
241        let actual = builder
242            .build()
243            .map(|res| res.map(|OwnedTreeNode { path, .. }| path))
244            .collect::<Result<Vec<_>, _>>()
245            .unwrap();
246
247        assert!(
248            actual.is_empty(),
249            "wrapping in directory was not asked, single element"
250        );
251    }
252
253    #[test]
254    #[should_panic]
255    fn rooted_path() {
256        let mut builder = BufferingTreeBuilder::default();
257        builder.put_link("/a", some_cid(0), 1).unwrap();
258    }
259
260    #[test]
261    #[should_panic]
262    fn successive_slashes() {
263        let mut builder = BufferingTreeBuilder::default();
264        builder.put_link("a//b", some_cid(0), 1).unwrap();
265    }
266
267    #[test]
268    fn multiple_roots() {
269        // foobar\n
270        let five_block_foobar =
271            Cid::try_from("QmRJHYTNvC3hmd9gJQARxLR1QMEincccBV53bBw524yyq6").unwrap();
272
273        let mut opts = TreeOptions::default();
274        opts.wrap_with_directory();
275        let mut builder = BufferingTreeBuilder::new(opts);
276        builder
277            .put_link("a", five_block_foobar.clone(), 221)
278            .unwrap();
279        builder.put_link("b", five_block_foobar, 221).unwrap();
280
281        let actual = builder
282            .build()
283            .map(|res| res.map(|OwnedTreeNode { path, cid, .. }| (path, cid.to_string())))
284            .collect::<Result<Vec<_>, _>>()
285            .unwrap();
286
287        assert_eq!(
288            actual,
289            &[(
290                "".to_string(),
291                "QmdbWuhpVCX9weVMMqvVTMeGwKMqCNJDbx7ZK1zG36sea7".to_string()
292            )]
293        );
294    }
295
296    #[test]
297    fn single_wrapped_root() {
298        // foobar\n
299        let five_block_foobar =
300            Cid::try_from("QmRJHYTNvC3hmd9gJQARxLR1QMEincccBV53bBw524yyq6").unwrap();
301
302        let mut opts = TreeOptions::default();
303        opts.wrap_with_directory();
304        let mut builder = BufferingTreeBuilder::new(opts);
305        builder.put_link("a", five_block_foobar, 221).unwrap();
306
307        let actual = builder
308            .build()
309            .map(|res| res.map(|OwnedTreeNode { path, cid, .. }| (path, cid.to_string())))
310            .collect::<Result<Vec<_>, _>>()
311            .unwrap();
312
313        assert_eq!(
314            actual,
315            &[(
316                "".to_string(),
317                "QmQBseoi3b2FBrYhjM2E4mCF4Q7C8MgCUbzAbGNfyVwgNk".to_string()
318            )]
319        );
320    }
321
322    #[test]
323    #[should_panic]
324    fn denied_multiple_root_dirs() {
325        let mut builder = BufferingTreeBuilder::default();
326        builder.put_link("a/c.txt", some_cid(0), 1).unwrap();
327        builder.put_link("b/d.txt", some_cid(1), 1).unwrap();
328    }
329
330    #[test]
331    #[should_panic]
332    fn denied_multiple_root_files() {
333        let mut builder = BufferingTreeBuilder::default();
334        builder.put_link("a.txt", some_cid(0), 1).unwrap();
335        builder.put_link("b.txt", some_cid(1), 1).unwrap();
336    }
337
338    #[test]
339    #[should_panic]
340    fn using_leaf_as_node() {
341        let mut builder = BufferingTreeBuilder::default();
342        builder.put_link("a.txt", some_cid(0), 1).unwrap();
343        builder.put_link("a.txt/b.txt", some_cid(1), 1).unwrap();
344    }
345
346    #[test]
347    fn set_metadata_before_files() {
348        let mut builder = BufferingTreeBuilder::default();
349        builder
350            .set_metadata("a/b/c/d", Metadata::default())
351            .unwrap();
352        builder.put_link("a/b/c/d/e.txt", some_cid(1), 1).unwrap();
353        builder.put_link("a/b/c/d/f.txt", some_cid(2), 1).unwrap();
354
355        let actual = builder
356            .build()
357            .map(|res| res.map(|OwnedTreeNode { path, .. }| path))
358            .collect::<Result<Vec<_>, _>>()
359            .unwrap();
360
361        assert_eq!(actual, &["a/b/c/d", "a/b/c", "a/b", "a",])
362    }
363
364    #[test]
365    fn set_metadata_on_file() {
366        let mut builder = BufferingTreeBuilder::default();
367        builder.put_link("a/a.txt", some_cid(0), 1).unwrap();
368        let err = builder
369            .set_metadata("a/a.txt", Metadata::default())
370            .unwrap_err();
371
372        assert!(
373            matches!(err, TreeBuildingFailed::LeafAsDirectory(_)),
374            "{:?}",
375            err
376        );
377    }
378
379    #[test]
380    fn dir_with_cidv1_link() {
381        // this is `echo '{ "name": "hello" }` | ./ipfs dag put`
382        let target =
383            Cid::try_from("bafyreihakpd7te5nbmlhdk5ntvcvhf2hmfgrvcwna2sddq5zz5342mcbli").unwrap();
384
385        let mut builder = BufferingTreeBuilder::default();
386        builder.put_link("a/b", target, 12).unwrap();
387
388        let actual = builder
389            .build()
390            .map(|res| res.map(|n| (n.path, n.cid, n.block)))
391            .collect::<Result<Vec<_>, _>>()
392            .unwrap();
393
394        let expected = vec![("a", "QmPMDMPG8dbHDC9GuvqWr9pfruLnp4GZCAWrskwCmenVQa")];
395
396        verify_results(expected, actual);
397    }
398
399    fn verify_results(
400        mut expected: Vec<(
401            impl AsRef<str> + core::fmt::Debug,
402            impl AsRef<str> + core::fmt::Debug,
403        )>,
404        mut actual: Vec<(String, Cid, Box<[u8]>)>,
405    ) {
406        use core::fmt;
407
408        struct Hex<'a>(&'a [u8]);
409
410        impl<'a> fmt::Debug for Hex<'a> {
411            fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
412                for b in self.0 {
413                    write!(fmt, "{:02x}", b)?;
414                }
415                Ok(())
416            }
417        }
418
419        // hopefully this way the errors will be easier to hunt down
420
421        actual.reverse();
422        expected.reverse();
423
424        while let Some(actual) = actual.pop() {
425            let expected = expected.pop().expect("size mismatch");
426            assert_eq!(actual.0, expected.0.as_ref());
427            assert_eq!(
428                actual.1.to_string(),
429                expected.1.as_ref(),
430                "{:?}: {:?}",
431                actual.0,
432                Hex(&actual.2)
433            );
434        }
435
436        assert_eq!(expected.len(), 0, "size mismatch: {:?}", actual);
437    }
438
439    /// Returns a quick and dirty sha2-256 of the given number as a Cidv0
440    fn some_cid(number: usize) -> Cid {
441        use multihash::Sha2_256;
442        let mh = Sha2_256::digest(&number.to_le_bytes());
443        Cid::new_v0(mh).unwrap()
444    }
445}