ipfs_unixfs/
walk.rs

1use crate::dir::{ShardError, UnexpectedDirectoryProperties};
2use crate::file::visit::{Cache, FileVisit, IdleFileVisit};
3use crate::file::{FileError, FileReadFailed};
4use crate::pb::{FlatUnixFs, PBLink, ParsingFailed, UnixFsType};
5use crate::{InvalidCidInLink, Metadata, UnexpectedNodeType};
6use alloc::borrow::Cow;
7use cid::Cid;
8use core::convert::TryFrom;
9use core::fmt;
10use either::Either;
11use std::path::{Path, PathBuf};
12
13/// `Walker` helps with walking a UnixFS tree, including all of the content and files. It is
14/// created with `Walker::new` and walked over each block with `Walker::continue_block`. Use
15/// `Walker::pending_links` to obtain the next [`Cid`] to be loaded and the prefetchable links.
16#[derive(Debug)]
17pub struct Walker {
18    /// This is `None` until the first block has been visited. Any failing unwraps would be logic
19    /// errors.
20    current: Option<InnerEntry>,
21    /// On the next call to `continue_walk` this will be the block, unless we have an ongoing file
22    /// walk, in which case we short-circuit to continue it. Any failing unwraps of
23    /// `self.next` would be logic errors.
24    next: Option<(Cid, String, usize)>,
25    pending: Vec<(Cid, String, usize)>,
26    // tried to recycle the names but that was consistently as fast and used more memory than just
27    // cloning the strings
28    should_continue: bool,
29}
30
31/// Converts a link of specifically a Directory (and not a link of a HAMTShard).
32fn convert_link(
33    nested_depth: usize,
34    nth: usize,
35    link: PBLink<'_>,
36) -> Result<(Cid, String, usize), InvalidCidInLink> {
37    let hash = link.Hash.as_deref().unwrap_or_default();
38    let cid = match Cid::try_from(hash) {
39        Ok(cid) => cid,
40        Err(e) => return Err(InvalidCidInLink::from((nth, link, e))),
41    };
42    let name = match link.Name {
43        Some(Cow::Borrowed(s)) if !s.is_empty() => s.to_owned(),
44        None | Some(Cow::Borrowed(_)) => todo!("link cannot be empty"),
45        Some(Cow::Owned(_s)) => unreachable!("FlatUnixFs is never transformed to owned"),
46    };
47    assert!(!name.contains('/'));
48    Ok((cid, name, nested_depth))
49}
50
51/// Converts a link of specifically a HAMTShard (and not a link of a Directory).
52fn convert_sharded_link(
53    nested_depth: usize,
54    sibling_depth: usize,
55    nth: usize,
56    link: PBLink<'_>,
57) -> Result<(Cid, String, usize), InvalidCidInLink> {
58    let hash = link.Hash.as_deref().unwrap_or_default();
59    let cid = match Cid::try_from(hash) {
60        Ok(cid) => cid,
61        Err(e) => return Err(InvalidCidInLink::from((nth, link, e))),
62    };
63    let (depth, name) = match link.Name {
64        Some(Cow::Borrowed(s)) if s.len() > 2 => (nested_depth, s[2..].to_owned()),
65        Some(Cow::Borrowed(s)) if s.len() == 2 => (sibling_depth, String::from("")),
66        None | Some(Cow::Borrowed(_)) => todo!("link cannot be empty"),
67        Some(Cow::Owned(_s)) => unreachable!("FlatUnixFs is never transformed to owned"),
68    };
69    assert!(!name.contains('/'));
70    Ok((cid, name, depth))
71}
72
73impl Walker {
74    /// Returns a new instance of a walker, ready to start from the given `Cid`.
75    pub fn new(cid: Cid, root_name: String) -> Walker {
76        // 1 == Path::ancestors().count() for an empty path
77        let depth = if root_name.is_empty() { 1 } else { 2 };
78        let next = Some((cid, root_name, depth));
79
80        Walker {
81            current: None,
82            next,
83            pending: Vec::new(),
84            should_continue: true,
85        }
86    }
87
88    /// Returns the next `Cid` to load and pass its associated content to `continue_walk`.
89    pub fn pending_links<'a>(&'a self) -> (&'a Cid, impl Iterator<Item = &'a Cid> + 'a) {
90        use InnerKind::*;
91        // rev: because we'll pop any of the pending
92        let cids = self.pending.iter().map(|(cid, ..)| cid).rev();
93
94        match self.current.as_ref().map(|c| &c.kind) {
95            Some(File(Some(ref visit), _)) => {
96                let (first, rest) = visit.pending_links();
97                let next = self.next.iter().map(|(cid, _, _)| cid);
98                (first, Either::Left(rest.chain(next.chain(cids))))
99            }
100            _ => {
101                let next = self
102                    .next
103                    .as_ref()
104                    .expect("we've validated that we have the next in new and continue_walk");
105                (&next.0, Either::Right(cids))
106            }
107        }
108    }
109
110    /// Continues the walk.
111    ///
112    /// Returns a descriptor for the next element found as `ContinuedWalk` which includes the means
113    /// to further continue the walk. `bytes` is the raw data of the next block, `cache` is an
114    /// optional cache for data structures which can always be substituted with `&mut None`.
115    pub fn next<'a: 'c, 'b: 'c, 'c>(
116        &'a mut self,
117        bytes: &'b [u8],
118        cache: &mut Option<Cache>,
119    ) -> Result<ContinuedWalk<'c>, Error> {
120        let Self {
121            current,
122            next,
123            pending,
124            should_continue,
125        } = self;
126
127        *should_continue = false;
128
129        if let Some(InnerEntry {
130            cid,
131            kind: InnerKind::File(visit @ Some(_), sz),
132            metadata,
133            path,
134            ..
135        }) = current
136        {
137            // we have an ongoing filevisit, the block must be related to it.
138            let (bytes, step) = visit.take().unwrap().continue_walk(bytes, cache)?;
139            let file_continues = step.is_some();
140            let segment = FileSegment::later(bytes, !file_continues);
141            *visit = step;
142            if file_continues || next.is_some() {
143                *should_continue = true;
144            }
145            return Ok(ContinuedWalk::File(segment, cid, path, metadata, *sz));
146        }
147
148        let flat = FlatUnixFs::try_from(bytes)?;
149        let metadata = Metadata::from(&flat.data);
150
151        match flat.data.Type {
152            UnixFsType::Directory => {
153                let flat = crate::dir::check_directory_supported(flat)?;
154                let (cid, name, depth) = next.take().expect("validated at new and earlier");
155
156                // depth + 1 because all entries below a directory are children of next, as in,
157                // deeper
158                let links = flat
159                    .links
160                    .into_iter()
161                    .enumerate()
162                    .map(|(nth, link)| convert_link(depth + 1, nth, link))
163                    .rev();
164
165                // replacing this with try_fold takes as many lines as the R: Try<Ok = B> cannot be
166                // deduced without specifying the Error
167                for link in links {
168                    pending.push(link?);
169                }
170
171                if let next_local @ Some(_) = pending.pop() {
172                    *next = next_local;
173                    *should_continue = true;
174                }
175
176                Ok(match current {
177                    None => {
178                        *current = Some(InnerEntry::new_root_dir(cid, metadata, &name, depth));
179                        let ie = current.as_ref().unwrap();
180                        ContinuedWalk::RootDirectory(&ie.cid, &ie.path, &ie.metadata)
181                    }
182                    Some(ie) => {
183                        ie.as_directory(cid, &name, depth, metadata);
184                        ContinuedWalk::Directory(&ie.cid, &ie.path, &ie.metadata)
185                    }
186                })
187            }
188            UnixFsType::HAMTShard => {
189                let flat = crate::dir::check_hamtshard_supported(flat)?;
190                let (cid, name, depth) = next.take().expect("validated at start and this method");
191
192                // similar to directory, the depth is +1 for nested entries, but the sibling buckets
193                // are at depth
194                let links = flat
195                    .links
196                    .into_iter()
197                    .enumerate()
198                    .map(|(nth, link)| convert_sharded_link(depth + 1, depth, nth, link))
199                    .rev();
200
201                // TODO: it might be worthwhile to lose the `rev` and sort the pushed links using
202                // the depth ascending. This should make sure we are first visiting the shortest
203                // path items.
204                for link in links {
205                    pending.push(link?);
206                }
207
208                if let next_local @ Some(_) = pending.pop() {
209                    *next = next_local;
210                    *should_continue = true;
211                }
212
213                Ok(match current {
214                    None => {
215                        *current = Some(InnerEntry::new_root_bucket(cid, metadata, &name, depth));
216                        let ie = current.as_ref().unwrap();
217                        ContinuedWalk::RootDirectory(&ie.cid, &ie.path, &ie.metadata)
218                    }
219                    Some(ie) => {
220                        // the name should be empty for all of the siblings
221                        if name.is_empty() {
222                            ie.as_bucket(cid, &name, depth);
223                            ContinuedWalk::Bucket(&ie.cid, &ie.path)
224                        }
225                        // but it should be non-empty for the directories
226                        else {
227                            ie.as_bucket_root(cid, &name, depth, metadata);
228                            ContinuedWalk::RootDirectory(&ie.cid, &ie.path, &ie.metadata)
229                        }
230                    }
231                })
232            }
233            UnixFsType::Raw | UnixFsType::File => {
234                let (bytes, file_size, metadata, step) =
235                    IdleFileVisit::default().start_from_parsed(flat, cache)?;
236                let (cid, name, depth) = next.take().expect("validated at new and earlier");
237                let file_continues = step.is_some();
238
239                match current {
240                    None => {
241                        let ie =
242                            InnerEntry::new_root_file(cid, metadata, &name, step, file_size, depth);
243                        *current = Some(ie);
244                    }
245                    Some(ie) => {
246                        ie.as_file(cid, &name, depth, metadata, step, file_size);
247                    }
248                };
249
250                let next_local = pending.pop();
251                if file_continues || next_local.is_some() {
252                    *next = next_local;
253                    *should_continue = true;
254                }
255
256                let segment = FileSegment::first(bytes, !file_continues);
257
258                let ie = current.as_ref().unwrap();
259                Ok(ContinuedWalk::File(
260                    segment,
261                    &ie.cid,
262                    &ie.path,
263                    &ie.metadata,
264                    file_size,
265                ))
266            }
267            UnixFsType::Metadata => Err(Error::UnsupportedType(flat.data.Type.into())),
268            UnixFsType::Symlink => {
269                let contents = match flat.data.Data {
270                    Some(Cow::Borrowed(bytes)) if !bytes.is_empty() => bytes,
271                    None | Some(Cow::Borrowed(_)) => &[][..],
272                    _ => unreachable!("never used into_owned"),
273                };
274
275                let (cid, name, depth) = next.take().expect("continued without next");
276                match current {
277                    None => {
278                        let ie = InnerEntry::new_root_symlink(cid, metadata, &name, depth);
279                        *current = Some(ie);
280                    }
281                    Some(ie) => {
282                        ie.as_symlink(cid, &name, depth, metadata);
283                    }
284                };
285
286                if let next_local @ Some(_) = pending.pop() {
287                    *next = next_local;
288                    *should_continue = true;
289                }
290
291                let ie = current.as_ref().unwrap();
292                Ok(ContinuedWalk::Symlink(
293                    contents,
294                    &ie.cid,
295                    &ie.path,
296                    &ie.metadata,
297                ))
298            }
299        }
300    }
301
302    /// `true` if the walk of `inspect` should continue
303    pub fn should_continue(&self) -> bool {
304        self.should_continue
305    }
306
307    // TODO: we could easily split a 'static value for a directory or bucket, which would pop all
308    // entries at a single level out to do some parallel walking, though the skipping could already
309    // be used to do that... Maybe we could return the filevisit on Skipped to save user from
310    // re-creating one? How to do the same for directories?
311}
312
313/// Represents what the `Walker` is currently looking at.
314struct InnerEntry {
315    cid: Cid,
316    kind: InnerKind,
317    path: PathBuf,
318    metadata: Metadata,
319    depth: usize,
320}
321
322impl From<InnerEntry> for Metadata {
323    fn from(e: InnerEntry) -> Self {
324        e.metadata
325    }
326}
327
328#[derive(Debug)]
329enum InnerKind {
330    /// This is necessarily at the root of the walk
331    RootDirectory,
332    /// This is necessarily at the root of the walk
333    BucketAtRoot,
334    /// This is the metadata containing bucket, for which we have a name
335    RootBucket,
336    /// This is a sibling to a previous named metadata containing bucket
337    Bucket,
338    /// Directory on any level except root
339    Directory,
340    /// File optionally on the root level
341    File(Option<FileVisit>, u64),
342    /// Symlink optionally on the root level
343    Symlink,
344}
345
346impl InnerEntry {
347    fn new_root_dir(cid: Cid, metadata: Metadata, name: &str, depth: usize) -> Self {
348        let mut path = PathBuf::new();
349        path.push(name);
350        Self {
351            cid,
352            kind: InnerKind::RootDirectory,
353            path,
354            metadata,
355            depth,
356        }
357    }
358
359    fn new_root_bucket(cid: Cid, metadata: Metadata, name: &str, depth: usize) -> Self {
360        let mut path = PathBuf::new();
361        path.push(name);
362        Self {
363            cid,
364            kind: InnerKind::BucketAtRoot,
365            path,
366            metadata,
367            depth,
368        }
369    }
370
371    fn new_root_file(
372        cid: Cid,
373        metadata: Metadata,
374        name: &str,
375        step: Option<FileVisit>,
376        file_size: u64,
377        depth: usize,
378    ) -> Self {
379        let mut path = PathBuf::new();
380        path.push(name);
381        Self {
382            cid,
383            kind: InnerKind::File(step, file_size),
384            path,
385            metadata,
386            depth,
387        }
388    }
389
390    fn new_root_symlink(cid: Cid, metadata: Metadata, name: &str, depth: usize) -> Self {
391        let mut path = PathBuf::new();
392        path.push(name);
393        Self {
394            cid,
395            kind: InnerKind::Symlink,
396            path,
397            metadata,
398            depth,
399        }
400    }
401
402    fn set_path(&mut self, name: &str, depth: usize) {
403        debug_assert_eq!(self.depth, self.path.ancestors().count());
404
405        while self.depth >= depth && self.depth > 0 {
406            assert!(self.path.pop());
407            self.depth = self
408                .depth
409                .checked_sub(1)
410                .expect("undeflowed path components");
411        }
412
413        self.path.push(name);
414        self.depth = depth;
415
416        debug_assert_eq!(self.depth, self.path.ancestors().count());
417    }
418
419    fn as_directory(&mut self, cid: Cid, name: &str, depth: usize, metadata: Metadata) {
420        use InnerKind::*;
421        match self.kind {
422            RootDirectory
423            | BucketAtRoot
424            | Bucket
425            | RootBucket
426            | Directory
427            | File(None, _)
428            | Symlink => {
429                self.cid = cid;
430                self.kind = Directory;
431                self.set_path(name, depth);
432                self.metadata = metadata;
433            }
434            ref x => unreachable!("directory ({}, {}, {}) following {:?}", cid, name, depth, x),
435        }
436    }
437
438    fn as_bucket_root(&mut self, cid: Cid, name: &str, depth: usize, metadata: Metadata) {
439        use InnerKind::*;
440        match self.kind {
441            RootDirectory
442            | BucketAtRoot
443            | Bucket
444            | RootBucket
445            | Directory
446            | File(None, _)
447            | Symlink => {
448                self.cid = cid;
449                self.kind = RootBucket;
450                self.set_path(name, depth);
451                self.metadata = metadata;
452            }
453            ref x => unreachable!(
454                "root bucket ({}, {}, {}) following {:?}",
455                cid, name, depth, x
456            ),
457        }
458    }
459
460    fn as_bucket(&mut self, cid: Cid, name: &str, depth: usize) {
461        use InnerKind::*;
462        match self.kind {
463            BucketAtRoot => {
464                assert_eq!(self.depth, depth, "{:?}", self.path);
465            }
466            RootBucket | Bucket | File(None, _) | Symlink => {
467                self.cid = cid;
468                self.kind = Bucket;
469
470                assert!(name.is_empty());
471                // continuation bucket going bucket -> bucket
472                while self.depth > depth {
473                    assert!(self.path.pop());
474                    self.depth = self
475                        .depth
476                        .checked_sub(1)
477                        .expect("underlowed depth calculation during bucket->bucket");
478                }
479
480                assert_eq!(self.depth, depth, "{:?}", self.path);
481            }
482            ref x => unreachable!("bucket ({}, {}, {}) following {:?}", cid, name, depth, x),
483        }
484    }
485
486    fn as_file(
487        &mut self,
488        cid: Cid,
489        name: &str,
490        depth: usize,
491        metadata: Metadata,
492        step: Option<FileVisit>,
493        file_size: u64,
494    ) {
495        use InnerKind::*;
496        match self.kind {
497            RootDirectory
498            | BucketAtRoot
499            | RootBucket
500            | Bucket
501            | Directory
502            | File(None, _)
503            | Symlink => {
504                self.cid = cid;
505                self.kind = File(step, file_size);
506                self.set_path(name, depth);
507                self.metadata = metadata;
508            }
509            ref x => unreachable!(
510                "file ({}, {}, {}, {}) following {:?}",
511                cid, name, depth, file_size, x
512            ),
513        }
514    }
515
516    fn as_symlink(&mut self, cid: Cid, name: &str, depth: usize, metadata: Metadata) {
517        use InnerKind::*;
518        match self.kind {
519            Bucket
520            | BucketAtRoot
521            | Directory
522            | File(None, _)
523            | RootBucket
524            | RootDirectory
525            | Symlink => {
526                self.cid = cid;
527                self.kind = Symlink;
528                self.set_path(name, depth);
529                self.metadata = metadata;
530            }
531            ref x => unreachable!("symlink ({}, {}, {}) following {:?}", cid, name, depth, x),
532        }
533    }
534}
535
536impl fmt::Debug for InnerEntry {
537    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
538        fmt.debug_struct("InnerEntry")
539            .field("depth", &self.depth)
540            .field("kind", &self.kind)
541            .field("cid", &format_args!("{}", self.cid))
542            .field("path", &self.path)
543            .field("metadata", &self.metadata)
544            .finish()
545    }
546}
547
548/// Representation of the walk progress.
549#[derive(Debug)]
550pub enum ContinuedWalk<'a> {
551    /// Currently looking at a continuation of a HAMT sharded directory. Usually safe to ignore.
552    Bucket(&'a Cid, &'a Path),
553    /// Currently looking at a directory.
554    Directory(&'a Cid, &'a Path, &'a Metadata),
555    /// Currently looking at a file. The first tuple value contains the file bytes accessible
556    /// from the block, which can also be an empty slice.
557    File(FileSegment<'a>, &'a Cid, &'a Path, &'a Metadata, u64),
558    /// Currently looking at a root directory.
559    RootDirectory(&'a Cid, &'a Path, &'a Metadata),
560    /// Currently looking at a symlink. The first tuple value contains the symlink target path. It
561    /// might be convertible to UTF-8, but this is not specified in the spec.
562    Symlink(&'a [u8], &'a Cid, &'a Path, &'a Metadata),
563}
564
565impl ContinuedWalk<'_> {
566    #[cfg(test)]
567    fn path(&self) -> &Path {
568        match self {
569            Self::Bucket(_, p)
570            | Self::Directory(_, p, ..)
571            | Self::File(_, _, p, ..)
572            | Self::RootDirectory(_, p, ..)
573            | Self::Symlink(_, _, p, ..) => p,
574        }
575    }
576}
577
578/// A slice of bytes of a possibly multi-block file. The slice can be accessed via `as_bytes()` or
579/// `AsRef<[u8]>::as_ref()`.
580#[derive(Debug)]
581pub struct FileSegment<'a> {
582    bytes: &'a [u8],
583    first_block: bool,
584    last_block: bool,
585}
586
587impl<'a> FileSegment<'a> {
588    fn first(bytes: &'a [u8], last_block: bool) -> Self {
589        FileSegment {
590            bytes,
591            first_block: true,
592            last_block,
593        }
594    }
595
596    fn later(bytes: &'a [u8], last_block: bool) -> Self {
597        FileSegment {
598            bytes,
599            first_block: false,
600            last_block,
601        }
602    }
603
604    /// Returns `true` if this is the first block in the file, `false` otherwise.
605    ///
606    /// Note: the first block can also be the last one.
607    pub fn is_first(&self) -> bool {
608        self.first_block
609    }
610
611    /// Returns `true` if this is the last block in the file, `false` otherwise.
612    ///
613    /// Note: the last block can also be the first one.
614    pub fn is_last(&self) -> bool {
615        self.last_block
616    }
617
618    /// Returns a slice into the file's bytes, which can be empty, as is the case for any
619    /// intermediate blocks which only contain links to further blocks.
620    pub fn as_bytes(&self) -> &'a [u8] {
621        self.bytes
622    }
623}
624
625impl AsRef<[u8]> for FileSegment<'_> {
626    fn as_ref(&self) -> &[u8] {
627        &self.bytes
628    }
629}
630
631/// Errors which can occur while walking a tree.
632#[derive(Debug)]
633pub enum Error {
634    /// An unsupported type of UnixFS node was encountered. There should be a way to skip these. Of the
635    /// defined types only `Metadata` is unsupported, all undefined types as of 2020-06 are also
636    /// unsupported.
637    UnsupportedType(UnexpectedNodeType),
638
639    /// This error is returned when a file e.g. links to a non-Raw or non-File subtree.
640    UnexpectedType(UnexpectedNodeType),
641
642    /// dag-pb node parsing failed, perhaps the block is not a dag-pb node?
643    DagPbParsingFailed(quick_protobuf::Error),
644
645    /// Failed to parse the unixfs node inside the dag-pb node.
646    UnixFsParsingFailed(quick_protobuf::Error),
647
648    /// dag-pb node contained no data.
649    EmptyDagPbNode,
650
651    /// dag-pb link could not be converted to a Cid
652    InvalidCid(InvalidCidInLink),
653
654    /// A File has an invalid structure
655    File(FileError),
656
657    /// A Directory has an unsupported structure
658    UnsupportedDirectory(UnexpectedDirectoryProperties),
659
660    /// HAMTSharded directory has unsupported properties
661    UnsupportedHAMTShard(ShardError),
662}
663
664impl From<ParsingFailed<'_>> for Error {
665    fn from(e: ParsingFailed<'_>) -> Self {
666        use ParsingFailed::*;
667        match e {
668            InvalidDagPb(e) => Error::DagPbParsingFailed(e),
669            InvalidUnixFs(e, _) => Error::UnixFsParsingFailed(e),
670            NoData(_) => Error::EmptyDagPbNode,
671        }
672    }
673}
674
675impl From<InvalidCidInLink> for Error {
676    fn from(e: InvalidCidInLink) -> Self {
677        Error::InvalidCid(e)
678    }
679}
680
681impl From<FileReadFailed> for Error {
682    fn from(e: FileReadFailed) -> Self {
683        use FileReadFailed::*;
684        match e {
685            File(e) => Error::File(e),
686            UnexpectedType(ut) => Error::UnexpectedType(ut),
687            Read(_) => unreachable!("FileVisit does not parse any blocks"),
688            InvalidCid(l) => Error::InvalidCid(l),
689        }
690    }
691}
692
693impl From<UnexpectedDirectoryProperties> for Error {
694    fn from(e: UnexpectedDirectoryProperties) -> Self {
695        Error::UnsupportedDirectory(e)
696    }
697}
698
699impl From<ShardError> for Error {
700    fn from(e: ShardError) -> Self {
701        Error::UnsupportedHAMTShard(e)
702    }
703}
704
705impl fmt::Display for Error {
706    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
707        use Error::*;
708
709        match self {
710            UnsupportedType(ut) => write!(fmt, "unsupported UnixFs type: {:?}", ut),
711            UnexpectedType(ut) => write!(fmt, "link to unexpected UnixFs type from File: {:?}", ut),
712            DagPbParsingFailed(e) => write!(fmt, "failed to parse the outer dag-pb: {}", e),
713            UnixFsParsingFailed(e) => write!(fmt, "failed to parse the inner UnixFs: {}", e),
714            EmptyDagPbNode => write!(fmt, "failed to parse the inner UnixFs: no data"),
715            InvalidCid(e) => write!(fmt, "link contained an invalid Cid: {}", e),
716            File(e) => write!(fmt, "invalid file: {}", e),
717            UnsupportedDirectory(udp) => write!(fmt, "unsupported directory: {}", udp),
718            UnsupportedHAMTShard(se) => write!(fmt, "unsupported hamtshard: {}", se),
719        }
720    }
721}
722
723impl std::error::Error for Error {}
724
725#[cfg(test)]
726mod tests {
727    use super::*;
728    use crate::test_support::FakeBlockstore;
729    use std::collections::HashMap;
730    use std::path::PathBuf;
731
732    #[test]
733    fn walk_two_file_directory_empty() {
734        two_file_directory_scenario("");
735    }
736
737    #[test]
738    fn walk_two_file_directory_named() {
739        two_file_directory_scenario("foo");
740    }
741
742    fn two_file_directory_scenario(root_name: &str) {
743        println!("new two_file_directory_scenario");
744        let mut counts =
745            walk_everything(root_name, "QmPTotyhVnnfCu9R4qwR4cdhpi5ENaiP8ZJfdqsm8Dw2jB");
746
747        let mut pb = PathBuf::new();
748        pb.push(root_name);
749        counts.checked_removal(&pb, 1);
750
751        pb.push("QmVkvLsSEm2uJx1h5Fqukje8mMPYg393o5C2kMCkF2bBTA");
752        counts.checked_removal(&pb, 1);
753
754        pb.push("foobar.balanced");
755        counts.checked_removal(&pb, 5);
756
757        assert!(pb.pop());
758        pb.push("foobar.trickle");
759        counts.checked_removal(&pb, 5);
760
761        assert!(counts.is_empty(), "{:#?}", counts);
762    }
763
764    #[test]
765    fn sharded_dir_different_root_empty() {
766        sharded_dir_scenario("");
767    }
768
769    #[test]
770    fn sharded_dir_different_root_named() {
771        sharded_dir_scenario("foo");
772    }
773
774    fn sharded_dir_scenario(root_name: &str) {
775        use core::fmt::Write;
776
777        // the hamt sharded directory is such that the root only has buckets so all of the actual files
778        // are at second level buckets, each bucket should have 2 files. the actual files, in fact, constitute a single empty
779        // file, linked from many names.
780
781        let mut counts =
782            walk_everything(root_name, "QmZbFPTnDBMWbQ6iBxQAhuhLz8Nu9XptYS96e7cuf5wvbk");
783        let mut buf = PathBuf::from(root_name);
784
785        counts.checked_removal(&buf, 9);
786
787        let indices = [38, 48, 50, 58, 9, 33, 4, 34, 17, 37, 40, 16, 41, 3, 25, 49];
788        let mut fmtbuf = String::new();
789
790        for (index, i) in indices.iter().enumerate() {
791            fmtbuf.clear();
792            write!(fmtbuf, "long-named-file-{:03}", i).unwrap();
793
794            if index > 0 {
795                buf.pop();
796            }
797            buf.push(&fmtbuf);
798
799            counts.checked_removal(&buf, 1);
800        }
801
802        assert!(counts.is_empty(), "{:#?}", counts);
803    }
804
805    #[test]
806    fn top_level_single_block_file_empty() {
807        single_block_top_level_file_scenario("");
808    }
809
810    #[test]
811    fn top_level_single_block_file_named() {
812        single_block_top_level_file_scenario("empty.txt");
813    }
814
815    fn single_block_top_level_file_scenario(root_name: &str) {
816        let mut counts =
817            walk_everything(root_name, "QmbFMke1KXqnYyBBWxB74N4c5SBnJMVAiMNRcGu6x1AwQH");
818        let buf = PathBuf::from(root_name);
819        counts.checked_removal(&buf, 1);
820    }
821
822    #[test]
823    fn top_level_symlink_empty() {
824        top_level_symlink_scenario("");
825    }
826
827    #[test]
828    fn top_level_symlink_named() {
829        top_level_symlink_scenario("this_links_to_foobar");
830    }
831
832    fn top_level_symlink_scenario(root_name: &str) {
833        let mut counts =
834            walk_everything(root_name, "QmNgQEdXVdLw79nH2bnxLMxnyWMaXrijfqMTiDVat3iyuz");
835        let buf = PathBuf::from(root_name);
836        counts.checked_removal(&buf, 1);
837    }
838
839    #[test]
840    fn top_level_multiblock_file_empty() {
841        top_level_multiblock_file_scenario("");
842    }
843
844    #[test]
845    fn top_level_multiblock_file_named() {
846        top_level_multiblock_file_scenario("foobar_and_newline.txt");
847    }
848
849    fn top_level_multiblock_file_scenario(root_name: &str) {
850        let mut counts =
851            walk_everything(root_name, "QmWfQ48ChJUj4vWKFsUDe4646xCBmXgdmNfhjz9T7crywd");
852        let buf = PathBuf::from(root_name);
853        counts.checked_removal(&buf, 5);
854    }
855
856    #[test]
857    fn test_walked_file_segments() {
858        let blocks = FakeBlockstore::with_fixtures();
859
860        let trickle_foobar =
861            cid::Cid::try_from("QmWfQ48ChJUj4vWKFsUDe4646xCBmXgdmNfhjz9T7crywd").unwrap();
862        let mut walker = Walker::new(trickle_foobar, String::new());
863
864        let mut counter = 0;
865
866        while walker.should_continue() {
867            let (next, _) = walker.pending_links();
868
869            let block = blocks.get_by_cid(&next);
870
871            counter += 1;
872
873            match walker.next(&block, &mut None).unwrap() {
874                ContinuedWalk::File(segment, ..) => {
875                    match counter {
876                        1 => {
877                            // the root block has only links
878                            assert!(segment.as_ref().is_empty());
879                            assert!(segment.is_first());
880                            assert!(!segment.is_last());
881                        }
882                        2..=4 => {
883                            assert_eq!(segment.as_ref().len(), 2);
884                            assert!(!segment.is_first());
885                            assert!(!segment.is_last());
886                        }
887                        5 => {
888                            assert_eq!(segment.as_ref().len(), 1);
889                            assert!(!segment.is_first());
890                            assert!(segment.is_last());
891                        }
892                        _ => unreachable!(),
893                    }
894                }
895                x => unreachable!("{:?}", x),
896            };
897        }
898    }
899
900    trait CountsExt {
901        fn checked_removal(&mut self, key: &PathBuf, expected: usize);
902    }
903
904    impl CountsExt for HashMap<PathBuf, usize> {
905        fn checked_removal(&mut self, key: &PathBuf, expected: usize) {
906            use std::collections::hash_map::Entry::*;
907
908            match self.entry(key.clone()) {
909                Occupied(oe) => {
910                    assert_eq!(oe.remove(), expected);
911                }
912                Vacant(_) => {
913                    panic!(
914                        "no such key {:?} (expected {}) in {:#?}",
915                        key, expected, self
916                    );
917                }
918            }
919        }
920    }
921
922    fn walk_everything(root_name: &str, cid: &str) -> HashMap<PathBuf, usize> {
923        let mut ret = HashMap::new();
924
925        let blocks = FakeBlockstore::with_fixtures();
926
927        let mut cache = None;
928        let mut walker = Walker::new(cid::Cid::try_from(cid).unwrap(), root_name.to_string());
929
930        while walker.should_continue() {
931            let (next, _) = walker.pending_links();
932            let block = blocks.get_by_cid(next);
933            let cw = walker.next(block, &mut cache).unwrap();
934            *ret.entry(PathBuf::from(cw.path())).or_insert(0) += 1;
935        }
936
937        ret
938    }
939}