unixfs_v1/dir/builder/
buffered.rs

1use super::{DirBuilder, Entry, Leaf, PostOrderIterator, TreeBuildingFailed, TreeOptions};
2use crate::Metadata;
3use alloc::collections::btree_map::Entry::*;
4use libipld::Cid;
5
6/// UnixFs directory tree builder which buffers entries until `build()` is called.
7#[derive(Debug)]
8pub struct BufferingTreeBuilder {
9    /// At the root there can be only one element, unless an option was given to create a new
10    /// directory surrounding the root elements.
11    root_builder: DirBuilder,
12    longest_path: usize,
13    // used to generate a unique id for each node; it is used when doing the post order traversal to
14    // recover all children's rendered Cids
15    counter: u64,
16    opts: TreeOptions,
17}
18
19impl Default for BufferingTreeBuilder {
20    fn default() -> Self {
21        Self::new(TreeOptions::default())
22    }
23}
24
25impl BufferingTreeBuilder {
26    /// Construct a new tree builder with the given configuration.
27    pub fn new(opts: TreeOptions) -> Self {
28        BufferingTreeBuilder {
29            root_builder: DirBuilder::root(0),
30            longest_path: 0,
31            counter: 1,
32            opts,
33        }
34    }
35
36    /// Registers the given path to be a link to the cid that follows. The target leaf should be
37    /// either a file, directory or symlink but could of course be anything. It will be treated as
38    /// an opaque link.
39    pub fn put_link(
40        &mut self,
41        full_path: &str,
42        target: Cid,
43        total_size: u64,
44    ) -> Result<(), TreeBuildingFailed> {
45        let leaf = Leaf {
46            link: target,
47            total_size,
48        };
49
50        self.modify_with(full_path, |parent, basename, _| {
51            parent
52                .put_leaf(basename, leaf)
53                .map_err(|_| TreeBuildingFailed::DuplicatePath(full_path.to_string()))
54        })
55    }
56
57    /// Directories get "put" implicitly through the put files, and directories need to be adjusted
58    /// only when wanting them to have metadata.
59    pub fn set_metadata(
60        &mut self,
61        full_path: &str,
62        metadata: Metadata,
63    ) -> Result<(), TreeBuildingFailed> {
64        // create all paths along the way
65        //
66        // set if not set, error otherwise? FIXME: doesn't error atm
67        self.modify_with(full_path, |parent, basename, id| {
68            parent
69                .add_or_get_node(basename, id)
70                .map_err(|_| TreeBuildingFailed::LeafAsDirectory(full_path.to_string()))?
71                .set_metadata(metadata);
72            Ok(())
73        })
74    }
75
76    fn modify_with<F>(&mut self, full_path: &str, f: F) -> Result<(), TreeBuildingFailed>
77    where
78        F: FnOnce(&mut DirBuilder, String, &mut Option<u64>) -> Result<(), TreeBuildingFailed>,
79    {
80        // create all paths along the way
81        //
82        // assuming it's ok to split at '/' since that cannot be escaped in linux at least
83
84        self.longest_path = full_path.len().max(self.longest_path);
85        let mut remaining = full_path.split('/').enumerate().peekable();
86        let mut dir_builder = &mut self.root_builder;
87
88        // check these before to avoid creation of bogus nodes in the tree or having to clean up.
89
90        if full_path.ends_with('/') {
91            return Err(TreeBuildingFailed::PathEndsInSlash(full_path.to_string()));
92        }
93
94        if full_path.contains("//") {
95            return Err(TreeBuildingFailed::RepeatSlashesInPath(
96                full_path.to_string(),
97            ));
98        }
99
100        // needed to avoid borrowing into the DirBuilder::new calling closure
101        let counter = &mut self.counter;
102
103        while let Some((depth, next)) = remaining.next() {
104            let last = remaining.peek().is_none();
105
106            match (depth, next, last) {
107                // this might need to be accepted in case there is just a single file
108                (0, "", true) => {
109                    // accepted: allows unconditional tree building in ipfs-http
110                    // but the resulting tree will have at most single node, which doesn't prompt
111                    // creation of new directories and should be fine.
112                }
113                (0, "", false) => {
114                    // ok to keep this inside the loop; we are yet to create any nodes.
115                    // note the ipfs-http (and for example js-ipfs) normalizes the path by
116                    // removing the slash from the start.
117                    return Err(TreeBuildingFailed::RootedPath(full_path.to_string()));
118                }
119                (_, "", false) => unreachable!("already validated: no repeat slashes"),
120                (_, "", true) => unreachable!("already validated: path does not end in slash"),
121                _ => {}
122            }
123
124            // our first level can be full, depending on the options given
125            let full = depth == 0 && !self.opts.wrap_with_directory && !dir_builder.is_empty();
126
127            if last {
128                let mut next_id = Some(*counter);
129
130                let ret = if full {
131                    Err(TreeBuildingFailed::TooManyRootLevelEntries)
132                } else {
133                    f(dir_builder, next.to_string(), &mut next_id)
134                };
135
136                if next_id.is_none() {
137                    *counter += 1;
138                }
139
140                if ret.is_err() {
141                    // FIXME: there might be a case where we have now stale nodes in our tree but
142                    // cannot figure out an example for that.
143                }
144
145                return ret;
146            }
147
148            let parent_id = dir_builder.id;
149
150            dir_builder = match (full, dir_builder.nodes.entry(next.to_string())) {
151                (_, Occupied(oe)) => oe
152                    .into_mut()
153                    .as_dir_builder()
154                    .map_err(|_| TreeBuildingFailed::LeafAsDirectory(full_path.to_string()))?,
155                (false, Vacant(ve)) => {
156                    let next_id = *counter;
157                    *counter += 1;
158                    ve.insert(Entry::Directory(DirBuilder::new(parent_id, next_id)))
159                        .as_dir_builder()
160                        .expect("safe: we just inserted a DirBuilder")
161                }
162                (true, Vacant(_)) => return Err(TreeBuildingFailed::TooManyRootLevelEntries),
163            };
164        }
165
166        // as the str::split will always return a single element this should not ever be hit
167        unreachable!(
168            "walked the full_path but failed to add anything: {:?}",
169            full_path
170        );
171    }
172
173    /// Called to build the tree. The built tree will have the added files and their implied
174    /// directory structure, along with the directory entries which were created using
175    /// `set_metadata`. To build the whole hierarchy, one must iterate the returned iterator to
176    /// completion while storing the created blocks.
177    ///
178    /// Returned `PostOrderIterator` will use the given `full_path` and `block_buffer` to store
179    /// its data during the walk. `PostOrderIterator` implements `Iterator` while also allowing
180    /// borrowed access via `next_borrowed`.
181    pub fn build(self) -> PostOrderIterator {
182        PostOrderIterator::new(self.root_builder, self.opts, self.longest_path)
183    }
184}
185
186#[cfg(test)]
187mod tests {
188    use crate::pb::DAG_PB;
189
190    use super::{
191        super::OwnedTreeNode, BufferingTreeBuilder, Metadata, TreeBuildingFailed, TreeOptions,
192    };
193    use core::convert::TryFrom;
194    use libipld::multihash::{Code, MultihashDigest};
195    use libipld::Cid;
196
197    #[test]
198    fn some_directories() {
199        let mut builder = BufferingTreeBuilder::default();
200
201        // foobar\n
202        let five_block_foobar =
203            Cid::try_from("QmRJHYTNvC3hmd9gJQARxLR1QMEincccBV53bBw524yyq6").unwrap();
204
205        builder
206            .put_link("a/b/c/d/e/f/g.txt", five_block_foobar, 221)
207            .unwrap();
208        builder
209            .put_link("a/b/c/d/e/h.txt", five_block_foobar, 221)
210            .unwrap();
211        builder
212            .put_link("a/b/c/d/e/i.txt", five_block_foobar, 221)
213            .unwrap();
214
215        let actual = builder
216            .build()
217            .map(|res| res.map(|n| (n.path, n.cid, n.block)))
218            .collect::<Result<Vec<_>, _>>()
219            .unwrap();
220
221        let expected = vec![
222            (
223                "a/b/c/d/e/f",
224                "bafybeiggi7xxndgcvvn776odlzbyo42os3m2ns7flmd2zoqjdtxhxjj5pm",
225            ),
226            (
227                "a/b/c/d/e",
228                "bafybeienpcl75unpkin7sublc6b4liop2efhscxxlxgipncfvnxydsu6ya",
229            ),
230            (
231                "a/b/c/d",
232                "bafybeigwwu5zfiu7trjhnulcejlpo7ytinr4iyqasetnmw3sko5l36wjqm",
233            ),
234            (
235                "a/b/c",
236                "bafybeig7jp4rbhaejauys6drkl66c7gcsp26gn3hdryezex5j4jng4ppda",
237            ),
238            (
239                "a/b",
240                "bafybeiethbkivdfv6yvvl4ho4tv2ogbyn4uzblvtkjuqksu2acbk32gwf4",
241            ),
242            (
243                "a",
244                "bafybeiaadgmref7r6eqwxv66nmwfoy3qxv57gdwmk4xnierff5bobsvsny",
245            ),
246        ];
247
248        verify_results(expected, actual);
249    }
250
251    #[test]
252    fn empty_path() {
253        let mut builder = BufferingTreeBuilder::default();
254        builder.put_link("", some_cid(0), 1).unwrap();
255
256        let actual = builder
257            .build()
258            .map(|res| res.map(|OwnedTreeNode { path, .. }| path))
259            .collect::<Result<Vec<_>, _>>()
260            .unwrap();
261
262        assert!(
263            actual.is_empty(),
264            "wrapping in directory was not asked, single element"
265        );
266    }
267
268    #[test]
269    #[should_panic]
270    fn rooted_path() {
271        let mut builder = BufferingTreeBuilder::default();
272        builder.put_link("/a", some_cid(0), 1).unwrap();
273    }
274
275    #[test]
276    #[should_panic]
277    fn successive_slashes() {
278        let mut builder = BufferingTreeBuilder::default();
279        builder.put_link("a//b", some_cid(0), 1).unwrap();
280    }
281
282    #[test]
283    fn multiple_roots() {
284        // foobar\n
285        let five_block_foobar =
286            Cid::try_from("QmRJHYTNvC3hmd9gJQARxLR1QMEincccBV53bBw524yyq6").unwrap();
287
288        let mut opts = TreeOptions::default();
289        opts.wrap_with_directory();
290        let mut builder = BufferingTreeBuilder::new(opts);
291        builder.put_link("a", five_block_foobar, 221).unwrap();
292        builder.put_link("b", five_block_foobar, 221).unwrap();
293
294        let actual = builder
295            .build()
296            .map(|res| res.map(|OwnedTreeNode { path, cid, .. }| (path, cid.to_string())))
297            .collect::<Result<Vec<_>, _>>()
298            .unwrap();
299
300        assert_eq!(
301            actual,
302            &[(
303                "".to_string(),
304                "bafybeihcvyze4cpjen52osvvcqyhlgtbe456g6lyudqa37tcbctkl4x6kq".to_string()
305            )]
306        );
307    }
308
309    #[test]
310    fn single_wrapped_root() {
311        // foobar\n
312        let five_block_foobar =
313            Cid::try_from("QmRJHYTNvC3hmd9gJQARxLR1QMEincccBV53bBw524yyq6").unwrap();
314
315        let mut opts = TreeOptions::default();
316        opts.wrap_with_directory();
317        let mut builder = BufferingTreeBuilder::new(opts);
318        builder.put_link("a", five_block_foobar, 221).unwrap();
319
320        let actual = builder
321            .build()
322            .map(|res| res.map(|OwnedTreeNode { path, cid, .. }| (path, cid.to_string())))
323            .collect::<Result<Vec<_>, _>>()
324            .unwrap();
325
326        assert_eq!(
327            actual,
328            &[(
329                "".to_string(),
330                "bafybeia3o7eaqkxuxpdotmu5su3rmdvmuimpo7vpb23ulpzg7od7j3nlte".to_string()
331            )]
332        );
333    }
334
335    #[test]
336    #[should_panic]
337    fn denied_multiple_root_dirs() {
338        let mut builder = BufferingTreeBuilder::default();
339        builder.put_link("a/c.txt", some_cid(0), 1).unwrap();
340        builder.put_link("b/d.txt", some_cid(1), 1).unwrap();
341    }
342
343    #[test]
344    #[should_panic]
345    fn denied_multiple_root_files() {
346        let mut builder = BufferingTreeBuilder::default();
347        builder.put_link("a.txt", some_cid(0), 1).unwrap();
348        builder.put_link("b.txt", some_cid(1), 1).unwrap();
349    }
350
351    #[test]
352    #[should_panic]
353    fn using_leaf_as_node() {
354        let mut builder = BufferingTreeBuilder::default();
355        builder.put_link("a.txt", some_cid(0), 1).unwrap();
356        builder.put_link("a.txt/b.txt", some_cid(1), 1).unwrap();
357    }
358
359    #[test]
360    fn set_metadata_before_files() {
361        let mut builder = BufferingTreeBuilder::default();
362        builder
363            .set_metadata("a/b/c/d", Metadata::default())
364            .unwrap();
365        builder.put_link("a/b/c/d/e.txt", some_cid(1), 1).unwrap();
366        builder.put_link("a/b/c/d/f.txt", some_cid(2), 1).unwrap();
367
368        let actual = builder
369            .build()
370            .map(|res| res.map(|OwnedTreeNode { path, .. }| path))
371            .collect::<Result<Vec<_>, _>>()
372            .unwrap();
373
374        assert_eq!(actual, &["a/b/c/d", "a/b/c", "a/b", "a",])
375    }
376
377    #[test]
378    fn set_metadata_on_file() {
379        let mut builder = BufferingTreeBuilder::default();
380        builder.put_link("a/a.txt", some_cid(0), 1).unwrap();
381        let err = builder
382            .set_metadata("a/a.txt", Metadata::default())
383            .unwrap_err();
384
385        assert!(
386            matches!(err, TreeBuildingFailed::LeafAsDirectory(_)),
387            "{:?}",
388            err
389        );
390    }
391
392    #[test]
393    fn dir_with_cidv1_link() {
394        // this is `echo '{ "name": "hello" }` | ./ipfs dag put`
395        let target =
396            Cid::try_from("bafyreihakpd7te5nbmlhdk5ntvcvhf2hmfgrvcwna2sddq5zz5342mcbli").unwrap();
397
398        let mut builder = BufferingTreeBuilder::default();
399        builder.put_link("a/b", target, 12).unwrap();
400
401        let actual = builder
402            .build()
403            .map(|res| res.map(|n| (n.path, n.cid, n.block)))
404            .collect::<Result<Vec<_>, _>>()
405            .unwrap();
406
407        let expected = vec![(
408            "a",
409            "bafybeiapacmj5oggpmkv7c4d2ujfokpdmnczg2gtr5ddumhz7j4sdy7734",
410        )];
411
412        verify_results(expected, actual);
413    }
414
415    fn verify_results(
416        mut expected: Vec<(
417            impl AsRef<str> + core::fmt::Debug,
418            impl AsRef<str> + core::fmt::Debug,
419        )>,
420        mut actual: Vec<(String, Cid, Box<[u8]>)>,
421    ) {
422        use core::fmt;
423
424        struct Hex<'a>(&'a [u8]);
425
426        impl<'a> fmt::Debug for Hex<'a> {
427            fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
428                for b in self.0 {
429                    write!(fmt, "{:02x}", b)?;
430                }
431                Ok(())
432            }
433        }
434
435        // hopefully this way the errors will be easier to hunt down
436
437        actual.reverse();
438        expected.reverse();
439
440        while let Some(actual) = actual.pop() {
441            let expected = expected.pop().expect("size mismatch");
442            assert_eq!(actual.0, expected.0.as_ref());
443            assert_eq!(
444                actual.1.to_string(),
445                expected.1.as_ref(),
446                "{:?}: {:?}",
447                actual.0,
448                Hex(&actual.2)
449            );
450        }
451
452        assert_eq!(expected.len(), 0, "size mismatch: {:?}", actual);
453    }
454
455    /// Returns a quick and dirty sha2-256 of the given number as a Cidv0
456    fn some_cid(number: usize) -> Cid {
457        let mh = Code::Sha2_256.digest(&number.to_le_bytes());
458        Cid::new_v1(DAG_PB, mh)
459    }
460}