rust_unixfs/
walk.rs

1use crate::dir::{ShardError, UnexpectedDirectoryProperties};
2use crate::file::visit::{Cache, FileVisit, IdleFileVisit};
3use crate::file::{FileError, FileReadFailed};
4use crate::pb::{FlatUnixFs, PBLink, ParsingFailed, UnixFsType};
5use crate::{InvalidCidInLink, Metadata, UnexpectedNodeType};
6use alloc::borrow::Cow;
7use core::convert::TryFrom;
8use core::fmt;
9use either::Either;
10use ipld_core::cid::Cid;
11use std::path::{Path, PathBuf};
12
13/// `Walker` helps with walking a UnixFS tree, including all of the content and files. It is
14/// created with `Walker::new` and walked over each block with `Walker::continue_block`. Use
15/// `Walker::pending_links` to obtain the next [`Cid`] to be loaded and the prefetchable links.
16#[derive(Debug)]
17pub struct Walker {
18    /// This is `None` until the first block has been visited. Any failing unwraps would be logic
19    /// errors.
20    current: Option<InnerEntry>,
21    /// On the next call to `continue_walk` this will be the block, unless we have an ongoing file
22    /// walk, in which case we short-circuit to continue it. Any failing unwraps of
23    /// `self.next` would be logic errors.
24    next: Option<(Cid, String, usize)>,
25    pending: Vec<(Cid, String, usize)>,
26    // tried to recycle the names but that was consistently as fast and used more memory than just
27    // cloning the strings
28    should_continue: bool,
29}
30
31/// Converts a link of specifically a Directory (and not a link of a HAMTShard).
32fn convert_link(
33    nested_depth: usize,
34    nth: usize,
35    link: PBLink<'_>,
36) -> Result<(Cid, String, usize), InvalidCidInLink> {
37    let hash = link.Hash.as_deref().unwrap_or_default();
38    let cid = match Cid::try_from(hash) {
39        Ok(cid) => cid,
40        Err(e) => return Err(InvalidCidInLink::from((nth, link, e))),
41    };
42    let name = match link.Name {
43        Some(Cow::Borrowed(s)) if !s.is_empty() => s.to_owned(),
44        None | Some(Cow::Borrowed(_)) => todo!("link cannot be empty"),
45        Some(Cow::Owned(_s)) => unreachable!("FlatUnixFs is never transformed to owned"),
46    };
47    assert!(!name.contains('/'));
48    Ok((cid, name, nested_depth))
49}
50
51/// Converts a link of specifically a HAMTShard (and not a link of a Directory).
52fn convert_sharded_link(
53    nested_depth: usize,
54    sibling_depth: usize,
55    nth: usize,
56    link: PBLink<'_>,
57) -> Result<(Cid, String, usize), InvalidCidInLink> {
58    let hash = link.Hash.as_deref().unwrap_or_default();
59    let cid = match Cid::try_from(hash) {
60        Ok(cid) => cid,
61        Err(e) => return Err(InvalidCidInLink::from((nth, link, e))),
62    };
63    let (depth, name) = match link.Name {
64        Some(Cow::Borrowed(s)) if s.len() > 2 => (nested_depth, s[2..].to_owned()),
65        Some(Cow::Borrowed(s)) if s.len() == 2 => (sibling_depth, String::from("")),
66        None | Some(Cow::Borrowed(_)) => todo!("link cannot be empty"),
67        Some(Cow::Owned(_s)) => unreachable!("FlatUnixFs is never transformed to owned"),
68    };
69    assert!(!name.contains('/'));
70    Ok((cid, name, depth))
71}
72
73impl Walker {
74    /// Returns a new instance of a walker, ready to start from the given `Cid`.
75    pub fn new(cid: Cid, root_name: String) -> Walker {
76        // 1 == Path::ancestors().count() for an empty path
77        let depth = if root_name.is_empty() { 1 } else { 2 };
78        let next = Some((cid, root_name, depth));
79
80        Walker {
81            current: None,
82            next,
83            pending: Vec::new(),
84            should_continue: true,
85        }
86    }
87
88    /// Returns the next [`Cid`] to load and pass its associated content to [`next`].
89    ///
90    /// # Panics
91    ///
92    /// When [`should_continue()`] returns `false`.
93    // TODO: perhaps this should return an option?
94    pub fn pending_links(&self) -> (&Cid, impl Iterator<Item = &Cid> + '_) {
95        use InnerKind::*;
96        // rev: because we'll pop any of the pending
97        let cids = self.pending.iter().map(|(cid, ..)| cid).rev();
98
99        match self.current.as_ref().map(|c| &c.kind) {
100            Some(File(Some(ref visit), _)) => {
101                let (first, rest) = visit.pending_links();
102                let next = self.next.iter().map(|(cid, _, _)| cid);
103                (first, Either::Left(rest.chain(next.chain(cids))))
104            }
105            _ => {
106                let next = self
107                    .next
108                    .as_ref()
109                    .expect("we've validated that we have the next in new and continue_walk");
110                (&next.0, Either::Right(cids))
111            }
112        }
113    }
114
115    /// Continues the walk.
116    ///
117    /// Returns a descriptor for the next element found as `ContinuedWalk` which includes the means
118    /// to further continue the walk. `bytes` is the raw data of the next block, `cache` is an
119    /// optional cache for data structures which can always be substituted with `&mut None`.
120    pub fn next<'a: 'c, 'b: 'c, 'c>(
121        &'a mut self,
122        bytes: &'b [u8],
123        cache: &mut Option<Cache>,
124    ) -> Result<ContinuedWalk<'c>, Error> {
125        let Self {
126            current,
127            next,
128            pending,
129            should_continue,
130        } = self;
131
132        *should_continue = false;
133
134        if let Some(InnerEntry {
135            cid,
136            kind: InnerKind::File(visit @ Some(_), sz),
137            metadata,
138            path,
139            ..
140        }) = current
141        {
142            // we have an ongoing filevisit, the block must be related to it.
143            let (bytes, step) = visit.take().unwrap().continue_walk(bytes, cache)?;
144            let file_continues = step.is_some();
145            let segment = FileSegment::later(bytes, !file_continues);
146            *visit = step;
147            if file_continues || next.is_some() {
148                *should_continue = true;
149            }
150            return Ok(ContinuedWalk::File(segment, cid, path, metadata, *sz));
151        }
152
153        let flat = FlatUnixFs::try_from(bytes)?;
154        let metadata = Metadata::from(&flat.data);
155
156        match flat.data.Type {
157            UnixFsType::Directory => {
158                let flat = crate::dir::check_directory_supported(flat)?;
159                let (cid, name, depth) = next.take().expect("validated at new and earlier");
160
161                // depth + 1 because all entries below a directory are children of next, as in,
162                // deeper
163                let links = flat
164                    .links
165                    .into_iter()
166                    .enumerate()
167                    .map(|(nth, link)| convert_link(depth + 1, nth, link))
168                    .rev();
169
170                // replacing this with try_fold takes as many lines as the R: Try<Ok = B> cannot be
171                // deduced without specifying the Error
172                for link in links {
173                    pending.push(link?);
174                }
175
176                if let next_local @ Some(_) = pending.pop() {
177                    *next = next_local;
178                    *should_continue = true;
179                }
180
181                Ok(match current {
182                    None => {
183                        *current = Some(InnerEntry::new_root_dir(cid, metadata, &name, depth));
184                        let ie = current.as_ref().unwrap();
185                        ContinuedWalk::RootDirectory(&ie.cid, &ie.path, &ie.metadata)
186                    }
187                    Some(ie) => {
188                        ie.as_directory(cid, &name, depth, metadata);
189                        ContinuedWalk::Directory(&ie.cid, &ie.path, &ie.metadata)
190                    }
191                })
192            }
193            UnixFsType::HAMTShard => {
194                let flat = crate::dir::check_hamtshard_supported(flat)?;
195                let (cid, name, depth) = next.take().expect("validated at start and this method");
196
197                // similar to directory, the depth is +1 for nested entries, but the sibling buckets
198                // are at depth
199                let links = flat
200                    .links
201                    .into_iter()
202                    .enumerate()
203                    .map(|(nth, link)| convert_sharded_link(depth + 1, depth, nth, link))
204                    .rev();
205
206                // TODO: it might be worthwhile to lose the `rev` and sort the pushed links using
207                // the depth ascending. This should make sure we are first visiting the shortest
208                // path items.
209                for link in links {
210                    pending.push(link?);
211                }
212
213                if let next_local @ Some(_) = pending.pop() {
214                    *next = next_local;
215                    *should_continue = true;
216                }
217
218                Ok(match current {
219                    None => {
220                        *current = Some(InnerEntry::new_root_bucket(cid, metadata, &name, depth));
221                        let ie = current.as_ref().unwrap();
222                        ContinuedWalk::RootDirectory(&ie.cid, &ie.path, &ie.metadata)
223                    }
224                    Some(ie) => {
225                        // the name should be empty for all of the siblings
226                        if name.is_empty() {
227                            ie.as_bucket(cid, &name, depth);
228                            ContinuedWalk::Bucket(&ie.cid, &ie.path)
229                        }
230                        // but it should be non-empty for the directories
231                        else {
232                            ie.as_bucket_root(cid, &name, depth, metadata);
233                            ContinuedWalk::RootDirectory(&ie.cid, &ie.path, &ie.metadata)
234                        }
235                    }
236                })
237            }
238            UnixFsType::Raw | UnixFsType::File => {
239                let (bytes, file_size, metadata, step) =
240                    IdleFileVisit::default().start_from_parsed(flat, cache)?;
241                let (cid, name, depth) = next.take().expect("validated at new and earlier");
242                let file_continues = step.is_some();
243
244                match current {
245                    None => {
246                        let ie =
247                            InnerEntry::new_root_file(cid, metadata, &name, step, file_size, depth);
248                        *current = Some(ie);
249                    }
250                    Some(ie) => {
251                        ie.as_file(cid, &name, depth, metadata, step, file_size);
252                    }
253                };
254
255                let next_local = pending.pop();
256                if file_continues || next_local.is_some() {
257                    *next = next_local;
258                    *should_continue = true;
259                }
260
261                let segment = FileSegment::first(bytes, !file_continues);
262
263                let ie = current.as_ref().unwrap();
264                Ok(ContinuedWalk::File(
265                    segment,
266                    &ie.cid,
267                    &ie.path,
268                    &ie.metadata,
269                    file_size,
270                ))
271            }
272            UnixFsType::Metadata => Err(Error::UnsupportedType(flat.data.Type.into())),
273            UnixFsType::Symlink => {
274                let contents = match flat.data.Data {
275                    Some(Cow::Borrowed(bytes)) if !bytes.is_empty() => bytes,
276                    None | Some(Cow::Borrowed(_)) => &[][..],
277                    _ => unreachable!("never used into_owned"),
278                };
279
280                let (cid, name, depth) = next.take().expect("continued without next");
281                match current {
282                    None => {
283                        let ie = InnerEntry::new_root_symlink(cid, metadata, &name, depth);
284                        *current = Some(ie);
285                    }
286                    Some(ie) => {
287                        ie.as_symlink(cid, &name, depth, metadata);
288                    }
289                };
290
291                if let next_local @ Some(_) = pending.pop() {
292                    *next = next_local;
293                    *should_continue = true;
294                }
295
296                let ie = current.as_ref().unwrap();
297                Ok(ContinuedWalk::Symlink(
298                    contents,
299                    &ie.cid,
300                    &ie.path,
301                    &ie.metadata,
302                ))
303            }
304        }
305    }
306
307    /// Returns `true` if there are more links to walk over.
308    pub fn should_continue(&self) -> bool {
309        self.should_continue
310    }
311
312    // TODO: we could easily split a 'static value for a directory or bucket, which would pop all
313    // entries at a single level out to do some parallel walking, though the skipping could already
314    // be used to do that... Maybe we could return the filevisit on Skipped to save user from
315    // re-creating one? How to do the same for directories?
316}
317
318/// Represents what the `Walker` is currently looking at.
319struct InnerEntry {
320    cid: Cid,
321    kind: InnerKind,
322    path: PathBuf,
323    metadata: Metadata,
324    depth: usize,
325}
326
327impl From<InnerEntry> for Metadata {
328    fn from(e: InnerEntry) -> Self {
329        e.metadata
330    }
331}
332
333#[derive(Debug)]
334enum InnerKind {
335    /// This is necessarily at the root of the walk
336    RootDirectory,
337    /// This is necessarily at the root of the walk
338    BucketAtRoot,
339    /// This is the metadata containing bucket, for which we have a name
340    RootBucket,
341    /// This is a sibling to a previous named metadata containing bucket
342    Bucket,
343    /// Directory on any level except root
344    Directory,
345    /// File optionally on the root level
346    File(Option<FileVisit>, u64),
347    /// Symlink optionally on the root level
348    Symlink,
349}
350
351impl InnerEntry {
352    fn new_root_dir(cid: Cid, metadata: Metadata, name: &str, depth: usize) -> Self {
353        let mut path = PathBuf::new();
354        path.push(name);
355        Self {
356            cid,
357            kind: InnerKind::RootDirectory,
358            path,
359            metadata,
360            depth,
361        }
362    }
363
364    fn new_root_bucket(cid: Cid, metadata: Metadata, name: &str, depth: usize) -> Self {
365        let mut path = PathBuf::new();
366        path.push(name);
367        Self {
368            cid,
369            kind: InnerKind::BucketAtRoot,
370            path,
371            metadata,
372            depth,
373        }
374    }
375
376    fn new_root_file(
377        cid: Cid,
378        metadata: Metadata,
379        name: &str,
380        step: Option<FileVisit>,
381        file_size: u64,
382        depth: usize,
383    ) -> Self {
384        let mut path = PathBuf::new();
385        path.push(name);
386        Self {
387            cid,
388            kind: InnerKind::File(step, file_size),
389            path,
390            metadata,
391            depth,
392        }
393    }
394
395    fn new_root_symlink(cid: Cid, metadata: Metadata, name: &str, depth: usize) -> Self {
396        let mut path = PathBuf::new();
397        path.push(name);
398        Self {
399            cid,
400            kind: InnerKind::Symlink,
401            path,
402            metadata,
403            depth,
404        }
405    }
406
407    fn set_path(&mut self, name: &str, depth: usize) {
408        debug_assert_eq!(self.depth, self.path.ancestors().count());
409
410        while self.depth >= depth && self.depth > 0 {
411            assert!(self.path.pop());
412            self.depth = self
413                .depth
414                .checked_sub(1)
415                .expect("underflowed path components");
416        }
417
418        self.path.push(name);
419        self.depth = depth;
420
421        debug_assert_eq!(self.depth, self.path.ancestors().count());
422    }
423
424    fn as_directory(&mut self, cid: Cid, name: &str, depth: usize, metadata: Metadata) {
425        use InnerKind::*;
426        match self.kind {
427            RootDirectory
428            | BucketAtRoot
429            | Bucket
430            | RootBucket
431            | Directory
432            | File(None, _)
433            | Symlink => {
434                self.cid = cid;
435                self.kind = Directory;
436                self.set_path(name, depth);
437                self.metadata = metadata;
438            }
439            ref x => unreachable!("directory ({}, {}, {}) following {:?}", cid, name, depth, x),
440        }
441    }
442
443    fn as_bucket_root(&mut self, cid: Cid, name: &str, depth: usize, metadata: Metadata) {
444        use InnerKind::*;
445        match self.kind {
446            RootDirectory
447            | BucketAtRoot
448            | Bucket
449            | RootBucket
450            | Directory
451            | File(None, _)
452            | Symlink => {
453                self.cid = cid;
454                self.kind = RootBucket;
455                self.set_path(name, depth);
456                self.metadata = metadata;
457            }
458            ref x => unreachable!(
459                "root bucket ({}, {}, {}) following {:?}",
460                cid, name, depth, x
461            ),
462        }
463    }
464
465    fn as_bucket(&mut self, cid: Cid, name: &str, depth: usize) {
466        use InnerKind::*;
467        match self.kind {
468            BucketAtRoot => {
469                assert_eq!(self.depth, depth, "{:?}", self.path);
470            }
471            RootBucket | Bucket | File(None, _) | Symlink => {
472                self.cid = cid;
473                self.kind = Bucket;
474
475                assert!(name.is_empty());
476                // continuation bucket going bucket -> bucket
477                while self.depth > depth {
478                    assert!(self.path.pop());
479                    self.depth = self
480                        .depth
481                        .checked_sub(1)
482                        .expect("underflowed depth calculation during bucket->bucket");
483                }
484
485                assert_eq!(self.depth, depth, "{:?}", self.path);
486            }
487            ref x => unreachable!("bucket ({}, {}, {}) following {:?}", cid, name, depth, x),
488        }
489    }
490
491    fn as_file(
492        &mut self,
493        cid: Cid,
494        name: &str,
495        depth: usize,
496        metadata: Metadata,
497        step: Option<FileVisit>,
498        file_size: u64,
499    ) {
500        use InnerKind::*;
501        match self.kind {
502            RootDirectory
503            | BucketAtRoot
504            | RootBucket
505            | Bucket
506            | Directory
507            | File(None, _)
508            | Symlink => {
509                self.cid = cid;
510                self.kind = File(step, file_size);
511                self.set_path(name, depth);
512                self.metadata = metadata;
513            }
514            ref x => unreachable!(
515                "file ({}, {}, {}, {}) following {:?}",
516                cid, name, depth, file_size, x
517            ),
518        }
519    }
520
521    fn as_symlink(&mut self, cid: Cid, name: &str, depth: usize, metadata: Metadata) {
522        use InnerKind::*;
523        match self.kind {
524            Bucket
525            | BucketAtRoot
526            | Directory
527            | File(None, _)
528            | RootBucket
529            | RootDirectory
530            | Symlink => {
531                self.cid = cid;
532                self.kind = Symlink;
533                self.set_path(name, depth);
534                self.metadata = metadata;
535            }
536            ref x => unreachable!("symlink ({}, {}, {}) following {:?}", cid, name, depth, x),
537        }
538    }
539}
540
541impl fmt::Debug for InnerEntry {
542    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
543        fmt.debug_struct("InnerEntry")
544            .field("depth", &self.depth)
545            .field("kind", &self.kind)
546            .field("cid", &format_args!("{}", self.cid))
547            .field("path", &self.path)
548            .field("metadata", &self.metadata)
549            .finish()
550    }
551}
552
553/// Representation of the walk progress.
554#[derive(Debug)]
555pub enum ContinuedWalk<'a> {
556    /// Currently looking at a continuation of a HAMT sharded directory. Usually safe to ignore.
557    Bucket(&'a Cid, &'a Path),
558    /// Currently looking at a directory.
559    Directory(&'a Cid, &'a Path, &'a Metadata),
560    /// Currently looking at a file. The first tuple value contains the file bytes accessible
561    /// from the block, which can also be an empty slice.
562    File(FileSegment<'a>, &'a Cid, &'a Path, &'a Metadata, u64),
563    /// Currently looking at a root directory.
564    RootDirectory(&'a Cid, &'a Path, &'a Metadata),
565    /// Currently looking at a symlink. The first tuple value contains the symlink target path. It
566    /// might be convertible to UTF-8, but this is not specified in the spec.
567    Symlink(&'a [u8], &'a Cid, &'a Path, &'a Metadata),
568}
569
570impl ContinuedWalk<'_> {
571    #[cfg(test)]
572    fn path(&self) -> &Path {
573        match self {
574            Self::Bucket(_, p)
575            | Self::Directory(_, p, ..)
576            | Self::File(_, _, p, ..)
577            | Self::RootDirectory(_, p, ..)
578            | Self::Symlink(_, _, p, ..) => p,
579        }
580    }
581}
582
583/// A slice of bytes of a possibly multi-block file. The slice can be accessed via `as_bytes()` or
584/// `AsRef<[u8]>::as_ref()`.
585#[derive(Debug)]
586pub struct FileSegment<'a> {
587    bytes: &'a [u8],
588    first_block: bool,
589    last_block: bool,
590}
591
592impl<'a> FileSegment<'a> {
593    fn first(bytes: &'a [u8], last_block: bool) -> Self {
594        FileSegment {
595            bytes,
596            first_block: true,
597            last_block,
598        }
599    }
600
601    fn later(bytes: &'a [u8], last_block: bool) -> Self {
602        FileSegment {
603            bytes,
604            first_block: false,
605            last_block,
606        }
607    }
608
609    /// Returns `true` if this is the first block in the file, `false` otherwise.
610    ///
611    /// Note: the first block can also be the last one.
612    pub fn is_first(&self) -> bool {
613        self.first_block
614    }
615
616    /// Returns `true` if this is the last block in the file, `false` otherwise.
617    ///
618    /// Note: the last block can also be the first one.
619    pub fn is_last(&self) -> bool {
620        self.last_block
621    }
622
623    /// Returns a slice into the file's bytes, which can be empty, as is the case for any
624    /// intermediate blocks which only contain links to further blocks.
625    pub fn as_bytes(&self) -> &'a [u8] {
626        self.bytes
627    }
628}
629
630impl AsRef<[u8]> for FileSegment<'_> {
631    fn as_ref(&self) -> &[u8] {
632        self.bytes
633    }
634}
635
636/// Errors which can occur while walking a tree.
637#[derive(Debug)]
638pub enum Error {
639    /// An unsupported type of UnixFS node was encountered. There should be a way to skip these. Of the
640    /// defined types only `Metadata` is unsupported, all undefined types as of 2020-06 are also
641    /// unsupported.
642    UnsupportedType(UnexpectedNodeType),
643
644    /// This error is returned when a file e.g. links to a non-Raw or non-File subtree.
645    UnexpectedType(UnexpectedNodeType),
646
647    /// dag-pb node parsing failed, perhaps the block is not a dag-pb node?
648    DagPbParsingFailed(quick_protobuf::Error),
649
650    /// Failed to parse the unixfs node inside the dag-pb node.
651    UnixFsParsingFailed(quick_protobuf::Error),
652
653    /// dag-pb node contained no data.
654    EmptyDagPbNode,
655
656    /// dag-pb link could not be converted to a Cid
657    InvalidCid(InvalidCidInLink),
658
659    /// A File has an invalid structure
660    File(FileError),
661
662    /// A Directory has an unsupported structure
663    UnsupportedDirectory(UnexpectedDirectoryProperties),
664
665    /// HAMTSharded directory has unsupported properties
666    UnsupportedHAMTShard(ShardError),
667}
668
669impl From<ParsingFailed<'_>> for Error {
670    fn from(e: ParsingFailed<'_>) -> Self {
671        use ParsingFailed::*;
672        match e {
673            InvalidDagPb(e) => Error::DagPbParsingFailed(e),
674            InvalidUnixFs(e, _) => Error::UnixFsParsingFailed(e),
675            NoData(_) => Error::EmptyDagPbNode,
676        }
677    }
678}
679
680impl From<InvalidCidInLink> for Error {
681    fn from(e: InvalidCidInLink) -> Self {
682        Error::InvalidCid(e)
683    }
684}
685
686impl From<FileReadFailed> for Error {
687    fn from(e: FileReadFailed) -> Self {
688        use FileReadFailed::*;
689        match e {
690            File(e) => Error::File(e),
691            UnexpectedType(ut) => Error::UnexpectedType(ut),
692            Read(_) => unreachable!("FileVisit does not parse any blocks"),
693            InvalidCid(l) => Error::InvalidCid(l),
694        }
695    }
696}
697
698impl From<UnexpectedDirectoryProperties> for Error {
699    fn from(e: UnexpectedDirectoryProperties) -> Self {
700        Error::UnsupportedDirectory(e)
701    }
702}
703
704impl From<ShardError> for Error {
705    fn from(e: ShardError) -> Self {
706        Error::UnsupportedHAMTShard(e)
707    }
708}
709
710impl fmt::Display for Error {
711    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
712        use Error::*;
713
714        match self {
715            UnsupportedType(ut) => write!(fmt, "unsupported UnixFs type: {ut:?}"),
716            UnexpectedType(ut) => write!(fmt, "link to unexpected UnixFs type from File: {ut:?}"),
717            DagPbParsingFailed(e) => write!(fmt, "failed to parse the outer dag-pb: {e}"),
718            UnixFsParsingFailed(e) => write!(fmt, "failed to parse the inner UnixFs: {e}"),
719            EmptyDagPbNode => write!(fmt, "failed to parse the inner UnixFs: no data"),
720            InvalidCid(e) => write!(fmt, "link contained an invalid Cid: {e}"),
721            File(e) => write!(fmt, "invalid file: {e}"),
722            UnsupportedDirectory(udp) => write!(fmt, "unsupported directory: {udp}"),
723            UnsupportedHAMTShard(se) => write!(fmt, "unsupported hamtshard: {se}"),
724        }
725    }
726}
727
728impl std::error::Error for Error {}
729
730#[cfg(test)]
731mod tests {
732    use super::*;
733    use crate::test_support::FakeBlockstore;
734    use std::collections::HashMap;
735    use std::path::PathBuf;
736
737    #[test]
738    fn walk_two_file_directory_empty() {
739        two_file_directory_scenario("");
740    }
741
742    #[test]
743    fn walk_two_file_directory_named() {
744        two_file_directory_scenario("foo");
745    }
746
747    fn two_file_directory_scenario(root_name: &str) {
748        println!("new two_file_directory_scenario");
749        let mut counts =
750            walk_everything(root_name, "QmPTotyhVnnfCu9R4qwR4cdhpi5ENaiP8ZJfdqsm8Dw2jB");
751
752        let mut pb = PathBuf::new();
753        pb.push(root_name);
754        counts.checked_removal(&pb, 1);
755
756        pb.push("QmVkvLsSEm2uJx1h5Fqukje8mMPYg393o5C2kMCkF2bBTA");
757        counts.checked_removal(&pb, 1);
758
759        pb.push("foobar.balanced");
760        counts.checked_removal(&pb, 5);
761
762        assert!(pb.pop());
763        pb.push("foobar.trickle");
764        counts.checked_removal(&pb, 5);
765
766        assert!(counts.is_empty(), "{counts:#?}");
767    }
768
769    #[test]
770    fn sharded_dir_different_root_empty() {
771        sharded_dir_scenario("");
772    }
773
774    #[test]
775    fn sharded_dir_different_root_named() {
776        sharded_dir_scenario("foo");
777    }
778
779    fn sharded_dir_scenario(root_name: &str) {
780        use core::fmt::Write;
781
782        // the hamt sharded directory is such that the root only has buckets so all of the actual files
783        // are at second level buckets, each bucket should have 2 files. the actual files, in fact, constitute a single empty
784        // file, linked from many names.
785
786        let mut counts =
787            walk_everything(root_name, "QmZbFPTnDBMWbQ6iBxQAhuhLz8Nu9XptYS96e7cuf5wvbk");
788        let mut buf = PathBuf::from(root_name);
789
790        counts.checked_removal(&buf, 9);
791
792        let indices = [38, 48, 50, 58, 9, 33, 4, 34, 17, 37, 40, 16, 41, 3, 25, 49];
793        let mut fmtbuf = String::new();
794
795        for (index, i) in indices.iter().enumerate() {
796            fmtbuf.clear();
797            write!(fmtbuf, "long-named-file-{i:03}").unwrap();
798
799            if index > 0 {
800                buf.pop();
801            }
802            buf.push(&fmtbuf);
803
804            counts.checked_removal(&buf, 1);
805        }
806
807        assert!(counts.is_empty(), "{counts:#?}");
808    }
809
810    #[test]
811    fn top_level_single_block_file_empty() {
812        single_block_top_level_file_scenario("");
813    }
814
815    #[test]
816    fn top_level_single_block_file_named() {
817        single_block_top_level_file_scenario("empty.txt");
818    }
819
820    fn single_block_top_level_file_scenario(root_name: &str) {
821        let mut counts =
822            walk_everything(root_name, "QmbFMke1KXqnYyBBWxB74N4c5SBnJMVAiMNRcGu6x1AwQH");
823        let buf = PathBuf::from(root_name);
824        counts.checked_removal(&buf, 1);
825    }
826
827    #[test]
828    fn top_level_symlink_empty() {
829        top_level_symlink_scenario("");
830    }
831
832    #[test]
833    fn top_level_symlink_named() {
834        top_level_symlink_scenario("this_links_to_foobar");
835    }
836
837    fn top_level_symlink_scenario(root_name: &str) {
838        let mut counts =
839            walk_everything(root_name, "QmNgQEdXVdLw79nH2bnxLMxnyWMaXrijfqMTiDVat3iyuz");
840        let buf = PathBuf::from(root_name);
841        counts.checked_removal(&buf, 1);
842    }
843
844    #[test]
845    fn top_level_multiblock_file_empty() {
846        top_level_multiblock_file_scenario("");
847    }
848
849    #[test]
850    fn top_level_multiblock_file_named() {
851        top_level_multiblock_file_scenario("foobar_and_newline.txt");
852    }
853
854    fn top_level_multiblock_file_scenario(root_name: &str) {
855        let mut counts =
856            walk_everything(root_name, "QmWfQ48ChJUj4vWKFsUDe4646xCBmXgdmNfhjz9T7crywd");
857        let buf = PathBuf::from(root_name);
858        counts.checked_removal(&buf, 5);
859    }
860
861    #[test]
862    fn test_walked_file_segments() {
863        let blocks = FakeBlockstore::with_fixtures();
864
865        let trickle_foobar =
866            ipld_core::cid::Cid::try_from("QmWfQ48ChJUj4vWKFsUDe4646xCBmXgdmNfhjz9T7crywd")
867                .unwrap();
868        let mut walker = Walker::new(trickle_foobar, String::new());
869
870        let mut counter = 0;
871
872        while walker.should_continue() {
873            let (next, _) = walker.pending_links();
874
875            let block = blocks.get_by_cid(next);
876
877            counter += 1;
878
879            match walker.next(block, &mut None).unwrap() {
880                ContinuedWalk::File(segment, ..) => {
881                    match counter {
882                        1 => {
883                            // the root block has only links
884                            assert!(segment.as_ref().is_empty());
885                            assert!(segment.is_first());
886                            assert!(!segment.is_last());
887                        }
888                        2..=4 => {
889                            assert_eq!(segment.as_ref().len(), 2);
890                            assert!(!segment.is_first());
891                            assert!(!segment.is_last());
892                        }
893                        5 => {
894                            assert_eq!(segment.as_ref().len(), 1);
895                            assert!(!segment.is_first());
896                            assert!(segment.is_last());
897                        }
898                        _ => unreachable!(),
899                    }
900                }
901                x => unreachable!("{:?}", x),
902            };
903        }
904    }
905
906    trait CountsExt {
907        fn checked_removal(&mut self, key: &Path, expected: usize);
908    }
909
910    impl CountsExt for HashMap<PathBuf, usize> {
911        fn checked_removal(&mut self, key: &Path, expected: usize) {
912            use std::collections::hash_map::Entry::*;
913
914            match self.entry(key.to_owned()) {
915                Occupied(oe) => {
916                    assert_eq!(oe.remove(), expected);
917                }
918                Vacant(_) => {
919                    panic!("no such key {key:?} (expected {expected}) in {self:#?}");
920                }
921            }
922        }
923    }
924
925    fn walk_everything(root_name: &str, cid: &str) -> HashMap<PathBuf, usize> {
926        let mut ret = HashMap::new();
927
928        let blocks = FakeBlockstore::with_fixtures();
929
930        let mut cache = None;
931        let mut walker = Walker::new(
932            ipld_core::cid::Cid::try_from(cid).unwrap(),
933            root_name.to_string(),
934        );
935
936        while walker.should_continue() {
937            let (next, _) = walker.pending_links();
938            let block = blocks.get_by_cid(next);
939            let cw = walker.next(block, &mut cache).unwrap();
940            *ret.entry(PathBuf::from(cw.path())).or_insert(0) += 1;
941        }
942
943        ret
944    }
945}