rust_unixfs/dir/builder/
buffered.rs

1use super::{DirBuilder, Entry, Leaf, PostOrderIterator, TreeBuildingFailed, TreeOptions};
2use crate::Metadata;
3use alloc::collections::btree_map::Entry::*;
4use ipld_core::cid::Cid;
5
6/// UnixFs directory tree builder which buffers entries until `build()` is called.
7#[derive(Debug)]
8pub struct BufferingTreeBuilder {
9    /// At the root there can be only one element, unless an option was given to create a new
10    /// directory surrounding the root elements.
11    root_builder: DirBuilder,
12    longest_path: usize,
13    // used to generate a unique id for each node; it is used when doing the post order traversal to
14    // recover all children's rendered Cids
15    counter: u64,
16    opts: TreeOptions,
17}
18
19impl Default for BufferingTreeBuilder {
20    fn default() -> Self {
21        Self::new(TreeOptions::default())
22    }
23}
24
25impl BufferingTreeBuilder {
26    /// Construct a new tree builder with the given configuration.
27    pub fn new(opts: TreeOptions) -> Self {
28        BufferingTreeBuilder {
29            root_builder: DirBuilder::root(0),
30            longest_path: 0,
31            counter: 1,
32            opts,
33        }
34    }
35
36    /// Registers the given path to be a link to the cid that follows. The target leaf should be
37    /// either a file, directory or symlink but could of course be anything. It will be treated as
38    /// an opaque link.
39    pub fn put_link(
40        &mut self,
41        full_path: &str,
42        target: Cid,
43        total_size: u64,
44    ) -> Result<(), TreeBuildingFailed> {
45        let leaf = Leaf {
46            link: target,
47            total_size,
48        };
49
50        self.modify_with(full_path, |parent, basename, _| {
51            parent
52                .put_leaf(basename, leaf)
53                .map_err(|_| TreeBuildingFailed::DuplicatePath(full_path.to_string()))
54        })
55    }
56
57    /// Directories get "put" implicitly through the put files, and directories need to be adjusted
58    /// only when wanting them to have metadata.
59    pub fn set_metadata(
60        &mut self,
61        full_path: &str,
62        metadata: Metadata,
63    ) -> Result<(), TreeBuildingFailed> {
64        // create all paths along the way
65        //
66        // set if not set, error otherwise? FIXME: doesn't error atm
67        self.modify_with(full_path, |parent, basename, id| {
68            parent
69                .add_or_get_node(basename, id)
70                .map_err(|_| TreeBuildingFailed::LeafAsDirectory(full_path.to_string()))?
71                .set_metadata(metadata);
72            Ok(())
73        })
74    }
75
76    fn modify_with<F>(&mut self, full_path: &str, f: F) -> Result<(), TreeBuildingFailed>
77    where
78        F: FnOnce(&mut DirBuilder, String, &mut Option<u64>) -> Result<(), TreeBuildingFailed>,
79    {
80        // create all paths along the way
81        //
82        // assuming it's ok to split at '/' since that cannot be escaped in linux at least
83
84        self.longest_path = full_path.len().max(self.longest_path);
85        let mut remaining = full_path.split('/').enumerate().peekable();
86        let mut dir_builder = &mut self.root_builder;
87
88        // check these before to avoid creation of bogus nodes in the tree or having to clean up.
89
90        if full_path.ends_with('/') {
91            return Err(TreeBuildingFailed::PathEndsInSlash(full_path.to_string()));
92        }
93
94        if full_path.contains("//") {
95            return Err(TreeBuildingFailed::RepeatSlashesInPath(
96                full_path.to_string(),
97            ));
98        }
99
100        // needed to avoid borrowing into the DirBuilder::new calling closure
101        let counter = &mut self.counter;
102
103        while let Some((depth, next)) = remaining.next() {
104            let last = remaining.peek().is_none();
105
106            match (depth, next, last) {
107                // this might need to be accepted in case there is just a single file
108                (0, "", true) => {
109                    // accepted: allows unconditional tree building in ipfs-http
110                    // but the resulting tree will have at most single node, which doesn't prompt
111                    // creation of new directories and should be fine.
112                }
113                (0, "", false) => {
114                    // ok to keep this inside the loop; we are yet to create any nodes.
115                    // note the ipfs-http (and for example js-ipfs) normalizes the path by
116                    // removing the slash from the start.
117                    return Err(TreeBuildingFailed::RootedPath(full_path.to_string()));
118                }
119                (_, "", false) => unreachable!("already validated: no repeat slashes"),
120                (_, "", true) => unreachable!("already validated: path does not end in slash"),
121                _ => {}
122            }
123
124            // our first level can be full, depending on the options given
125            let full = depth == 0 && !self.opts.wrap_with_directory && !dir_builder.is_empty();
126
127            if last {
128                let mut next_id = Some(*counter);
129
130                let ret = if full {
131                    Err(TreeBuildingFailed::TooManyRootLevelEntries)
132                } else {
133                    f(dir_builder, next.to_string(), &mut next_id)
134                };
135
136                if next_id.is_none() {
137                    *counter += 1;
138                }
139
140                if ret.is_err() {
141                    // FIXME: there might be a case where we have now stale nodes in our tree but
142                    // cannot figure out an example for that.
143                }
144
145                return ret;
146            }
147
148            let parent_id = dir_builder.id;
149
150            dir_builder = match (full, dir_builder.nodes.entry(next.to_string())) {
151                (_, Occupied(oe)) => oe
152                    .into_mut()
153                    .as_dir_builder()
154                    .map_err(|_| TreeBuildingFailed::LeafAsDirectory(full_path.to_string()))?,
155                (false, Vacant(ve)) => {
156                    let next_id = *counter;
157                    *counter += 1;
158                    ve.insert(Entry::Directory(DirBuilder::new(parent_id, next_id)))
159                        .as_dir_builder()
160                        .expect("safe: we just inserted a DirBuilder")
161                }
162                (true, Vacant(_)) => return Err(TreeBuildingFailed::TooManyRootLevelEntries),
163            };
164        }
165
166        // as the str::split will always return a single element this should not ever be hit
167        unreachable!(
168            "walked the full_path but failed to add anything: {:?}",
169            full_path
170        );
171    }
172
173    /// Called to build the tree. The built tree will have the added files and their implied
174    /// directory structure, along with the directory entries which were created using
175    /// `set_metadata`. To build the whole hierarchy, one must iterate the returned iterator to
176    /// completion while storing the created blocks.
177    ///
178    /// Returned `PostOrderIterator` will use the given `full_path` and `block_buffer` to store
179    /// its data during the walk. `PostOrderIterator` implements `Iterator` while also allowing
180    /// borrowed access via `next_borrowed`.
181    pub fn build(self) -> PostOrderIterator {
182        PostOrderIterator::new(self.root_builder, self.opts, self.longest_path)
183    }
184}
185
186#[cfg(test)]
187mod tests {
188    use super::{
189        super::OwnedTreeNode, BufferingTreeBuilder, Metadata, TreeBuildingFailed, TreeOptions,
190    };
191    use core::convert::TryFrom;
192    use ipld_core::cid::Cid;
193    use multihash_codetable::Code;
194    use multihash_derive::MultihashDigest;
195
196    #[test]
197    fn some_directories() {
198        let mut builder = BufferingTreeBuilder::default();
199
200        // foobar\n
201        let five_block_foobar =
202            Cid::try_from("QmRJHYTNvC3hmd9gJQARxLR1QMEincccBV53bBw524yyq6").unwrap();
203
204        builder
205            .put_link("a/b/c/d/e/f/g.txt", five_block_foobar, 221)
206            .unwrap();
207        builder
208            .put_link("a/b/c/d/e/h.txt", five_block_foobar, 221)
209            .unwrap();
210        builder
211            .put_link("a/b/c/d/e/i.txt", five_block_foobar, 221)
212            .unwrap();
213
214        let actual = builder
215            .build()
216            .map(|res| res.map(|n| (n.path, n.cid, n.block)))
217            .collect::<Result<Vec<_>, _>>()
218            .unwrap();
219
220        let expected = vec![
221            (
222                "a/b/c/d/e/f",
223                "Qmbgf44ztW9wLcGNRNYGinGQB6SQDQtbHVbkM5MrWms698",
224            ),
225            (
226                "a/b/c/d/e",
227                "Qma1hCr3CuPRAq2Gw4DCNMqsi42Bjs4Bt1MGSS57kNh144",
228            ),
229            ("a/b/c/d", "QmUqaYatcJqiSFdykHXGh4Nog1eMSfDJBeYzcG67KV5Ri4"),
230            ("a/b/c", "QmYwaNBaGpDCNN9XpHmjxVPHmEXZMw9KDY3uikE2UU5fVB"),
231            ("a/b", "QmeAzCPig4o4gBLh2LvP96Sr8MUBrsu2Scw9MTq1EvTDhY"),
232            ("a", "QmSTUFaPwJW8xD4KNRLLQRqVTYtYC29xuhYTJoYPWdzvKp"),
233        ];
234
235        verify_results(expected, actual);
236    }
237
238    #[test]
239    fn empty_path() {
240        let mut builder = BufferingTreeBuilder::default();
241        builder.put_link("", some_cid(0), 1).unwrap();
242
243        let actual = builder
244            .build()
245            .map(|res| res.map(|OwnedTreeNode { path, .. }| path))
246            .collect::<Result<Vec<_>, _>>()
247            .unwrap();
248
249        assert!(
250            actual.is_empty(),
251            "wrapping in directory was not asked, single element"
252        );
253    }
254
255    #[test]
256    #[should_panic]
257    fn rooted_path() {
258        let mut builder = BufferingTreeBuilder::default();
259        builder.put_link("/a", some_cid(0), 1).unwrap();
260    }
261
262    #[test]
263    #[should_panic]
264    fn successive_slashes() {
265        let mut builder = BufferingTreeBuilder::default();
266        builder.put_link("a//b", some_cid(0), 1).unwrap();
267    }
268
269    #[test]
270    fn multiple_roots() {
271        // foobar\n
272        let five_block_foobar =
273            Cid::try_from("QmRJHYTNvC3hmd9gJQARxLR1QMEincccBV53bBw524yyq6").unwrap();
274
275        let mut opts = TreeOptions::default();
276        opts.wrap_with_directory();
277        let mut builder = BufferingTreeBuilder::new(opts);
278        builder.put_link("a", five_block_foobar, 221).unwrap();
279        builder.put_link("b", five_block_foobar, 221).unwrap();
280
281        let actual = builder
282            .build()
283            .map(|res| res.map(|OwnedTreeNode { path, cid, .. }| (path, cid.to_string())))
284            .collect::<Result<Vec<_>, _>>()
285            .unwrap();
286
287        assert_eq!(
288            actual,
289            &[(
290                "".to_string(),
291                "QmdbWuhpVCX9weVMMqvVTMeGwKMqCNJDbx7ZK1zG36sea7".to_string()
292            )]
293        );
294    }
295
296    #[test]
297    fn single_wrapped_root() {
298        // foobar\n
299        let five_block_foobar = "QmRJHYTNvC3hmd9gJQARxLR1QMEincccBV53bBw524yyq6"
300            .parse()
301            .unwrap();
302
303        let mut opts = TreeOptions::default();
304        opts.wrap_with_directory();
305        let mut builder = BufferingTreeBuilder::new(opts);
306        builder.put_link("a", five_block_foobar, 221).unwrap();
307
308        let actual = builder
309            .build()
310            .map(|res| res.map(|OwnedTreeNode { path, cid, .. }| (path, cid.to_string())))
311            .collect::<Result<Vec<_>, _>>()
312            .unwrap();
313
314        assert_eq!(
315            actual,
316            &[(
317                "".to_string(),
318                "QmQBseoi3b2FBrYhjM2E4mCF4Q7C8MgCUbzAbGNfyVwgNk".to_string()
319            )]
320        );
321    }
322
323    #[test]
324    #[should_panic]
325    fn denied_multiple_root_dirs() {
326        let mut builder = BufferingTreeBuilder::default();
327        builder.put_link("a/c.txt", some_cid(0), 1).unwrap();
328        builder.put_link("b/d.txt", some_cid(1), 1).unwrap();
329    }
330
331    #[test]
332    #[should_panic]
333    fn denied_multiple_root_files() {
334        let mut builder = BufferingTreeBuilder::default();
335        builder.put_link("a.txt", some_cid(0), 1).unwrap();
336        builder.put_link("b.txt", some_cid(1), 1).unwrap();
337    }
338
339    #[test]
340    #[should_panic]
341    fn using_leaf_as_node() {
342        let mut builder = BufferingTreeBuilder::default();
343        builder.put_link("a.txt", some_cid(0), 1).unwrap();
344        builder.put_link("a.txt/b.txt", some_cid(1), 1).unwrap();
345    }
346
347    #[test]
348    fn set_metadata_before_files() {
349        let mut builder = BufferingTreeBuilder::default();
350        builder
351            .set_metadata("a/b/c/d", Metadata::default())
352            .unwrap();
353        builder.put_link("a/b/c/d/e.txt", some_cid(1), 1).unwrap();
354        builder.put_link("a/b/c/d/f.txt", some_cid(2), 1).unwrap();
355
356        let actual = builder
357            .build()
358            .map(|res| res.map(|OwnedTreeNode { path, .. }| path))
359            .collect::<Result<Vec<_>, _>>()
360            .unwrap();
361
362        assert_eq!(actual, &["a/b/c/d", "a/b/c", "a/b", "a",])
363    }
364
365    #[test]
366    fn set_metadata_on_file() {
367        let mut builder = BufferingTreeBuilder::default();
368        builder.put_link("a/a.txt", some_cid(0), 1).unwrap();
369        let err = builder
370            .set_metadata("a/a.txt", Metadata::default())
371            .unwrap_err();
372
373        assert!(
374            matches!(err, TreeBuildingFailed::LeafAsDirectory(_)),
375            "{err:?}"
376        );
377    }
378
379    #[test]
380    fn dir_with_cidv1_link() {
381        // this is `echo '{ "name": "hello" }` | ./ipfs dag put`
382        let target =
383            Cid::try_from("bafyreihakpd7te5nbmlhdk5ntvcvhf2hmfgrvcwna2sddq5zz5342mcbli").unwrap();
384
385        let mut builder = BufferingTreeBuilder::default();
386        builder.put_link("a/b", target, 12).unwrap();
387
388        let actual = builder
389            .build()
390            .map(|res| res.map(|n| (n.path, n.cid, n.block)))
391            .collect::<Result<Vec<_>, _>>()
392            .unwrap();
393
394        let expected = vec![("a", "QmPMDMPG8dbHDC9GuvqWr9pfruLnp4GZCAWrskwCmenVQa")];
395
396        verify_results(expected, actual);
397    }
398
399    fn verify_results(
400        mut expected: Vec<(
401            impl AsRef<str> + core::fmt::Debug,
402            impl AsRef<str> + core::fmt::Debug,
403        )>,
404        mut actual: Vec<(String, Cid, Box<[u8]>)>,
405    ) {
406        use core::fmt;
407
408        struct Hex<'a>(&'a [u8]);
409
410        impl<'a> fmt::Debug for Hex<'a> {
411            fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
412                for b in self.0 {
413                    write!(fmt, "{b:02x}")?;
414                }
415                Ok(())
416            }
417        }
418
419        // hopefully this way the errors will be easier to hunt down
420
421        actual.reverse();
422        expected.reverse();
423
424        while let Some(actual) = actual.pop() {
425            let expected = expected.pop().expect("size mismatch");
426            assert_eq!(actual.0, expected.0.as_ref());
427            assert_eq!(
428                actual.1.to_string(),
429                expected.1.as_ref(),
430                "{:?}: {:?}",
431                actual.0,
432                Hex(&actual.2)
433            );
434        }
435
436        assert_eq!(expected.len(), 0, "size mismatch: {actual:?}");
437    }
438
439    /// Returns a quick and dirty sha2-256 of the given number as a Cidv0
440    fn some_cid(number: usize) -> Cid {
441        let mh = Code::Sha2_256.digest(&number.to_le_bytes());
442        Cid::new_v0(mh).unwrap()
443    }
444}