unixfs_v1/
dir.rs

1use crate::pb::{FlatUnixFs, PBLink, PBNode, ParsingFailed, UnixFsType};
2use crate::{InvalidCidInLink, UnexpectedNodeType};
3use core::convert::TryFrom;
4use core::fmt;
5use libipld::Cid;
6
7mod sharded_lookup;
8pub use sharded_lookup::{Cache, LookupError, ShardError, ShardedLookup};
9
10mod directory;
11pub(crate) use directory::{check_directory_supported, UnexpectedDirectoryProperties};
12
13/// Directory tree builder.
14pub mod builder;
15
16pub(crate) fn check_hamtshard_supported(
17    mut flat: FlatUnixFs<'_>,
18) -> Result<FlatUnixFs<'_>, ShardError> {
19    ShardedLookup::check_supported(&mut flat)?;
20    Ok(flat)
21}
22
23/// Resolves a single path segment on `dag-pb` or UnixFS directories (normal, sharded).
24///
25/// The third parameter can always be substituted with a None but when repeatedly resolving over
26/// multiple path segments, it can be used to cache the work queue used to avoid re-allocating it
27/// between the steps.
28///
29/// Returns on success either a walker which can be used to traverse additional links searching for
30/// the link, or the resolved link once it has been found or NotFound when it cannot be found.
31///
32/// # Note
33///
34/// The returned walker by default borrows the needle but it can be transformed into owned walker
35/// with `ShardedLookup::with_owned_needle` which will allow moving it between tasks and boundaries.
36pub fn resolve<'needle>(
37    block: &[u8],
38    needle: &'needle str,
39    cache: &mut Option<Cache>,
40) -> Result<MaybeResolved<'needle>, ResolveError> {
41    let links = match FlatUnixFs::try_parse(block) {
42        Ok(hamt) if hamt.data.Type == UnixFsType::HAMTShard => {
43            return Ok(ShardedLookup::lookup_or_start(hamt, needle, cache)?)
44        }
45        Ok(flat) if flat.data.Type == UnixFsType::Directory => {
46            check_directory_supported(flat)?.links
47        }
48        Err(ParsingFailed::InvalidUnixFs(_, PBNode { Links: links, .. }))
49        | Err(ParsingFailed::NoData(PBNode { Links: links, .. })) => links,
50        Ok(other) => {
51            // go-ipfs does not resolve links under File, probably it's not supposed to work on
52            // anything else then; returning NotFound would be correct, but perhaps it's even more
53            // correct to return that we don't support this
54            return Err(ResolveError::UnexpectedType(other.data.Type.into()));
55        }
56        Err(ParsingFailed::InvalidDagPb(e)) => return Err(ResolveError::Read(e)),
57    };
58
59    let mut matching = links.into_iter().enumerate().filter_map(|(i, link)| {
60        match link.Name.as_deref().unwrap_or_default() {
61            x if x == needle => Some((i, link)),
62            _ => None,
63        }
64    });
65
66    let first = matching.next();
67
68    if let Some((i, first)) = first {
69        let first = try_convert_cid(i, first)?;
70        match matching.next() {
71            Some((j, second)) => Err(MultipleMatchingLinks::from(((i, first), (j, second))).into()),
72            None => Ok(MaybeResolved::Found(first)),
73        }
74    } else {
75        Ok(MaybeResolved::NotFound)
76    }
77}
78
79fn try_convert_cid(nth: usize, link: PBLink<'_>) -> Result<Cid, InvalidCidInLink> {
80    let hash = link.Hash.as_deref().unwrap_or_default();
81    Cid::try_from(hash).map_err(|e| InvalidCidInLink::from((nth, link, e)))
82}
83
84/// Resolving result type for the successful cases.
85#[derive(Debug)]
86pub enum MaybeResolved<'needle> {
87    /// Link was found for the given segment.
88    Found(Cid),
89    /// The block presented to `resolve` was a HAMT sharded directory and other blocks need to be
90    /// read in order to find the link. `ShardedLookup` will handle the lookup and navigation
91    /// over the shards.
92    NeedToLoadMore(ShardedLookup<'needle>),
93    /// The segment could not be found.
94    NotFound,
95}
96
97/// Resolving can fail similarly as with `ShardedLookup::continue_walk` but in addition to sharded
98/// cases, there can be unexpected directories.
99#[derive(Debug)]
100pub enum ResolveError {
101    /// The target block was a UnixFs node that doesn't support resolving, e.g. a file.
102    UnexpectedType(UnexpectedNodeType),
103    /// A directory had unsupported properties. These are not encountered during walking sharded
104    /// directories.
105    UnexpectedDirProperties(UnexpectedDirectoryProperties),
106    /// Failed to read the block as a dag-pb node. Failure to read an inner UnixFS node is ignored
107    /// and links of the outer dag-pb are processed.
108    Read(quick_protobuf::Error),
109    /// Lookup errors.
110    Lookup(LookupError),
111}
112
113impl From<UnexpectedDirectoryProperties> for ResolveError {
114    fn from(e: UnexpectedDirectoryProperties) -> Self {
115        ResolveError::UnexpectedDirProperties(e)
116    }
117}
118
119impl fmt::Display for ResolveError {
120    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
121        use ResolveError::*;
122        match self {
123            UnexpectedType(ut) => write!(fmt, "unexpected type for UnixFs: {:?}", ut),
124            UnexpectedDirProperties(udp) => write!(fmt, "unexpected directory properties: {}", udp),
125            Read(e) => write!(fmt, "parsing failed: {}", e),
126            Lookup(e) => write!(fmt, "{}", e),
127        }
128    }
129}
130
131impl std::error::Error for ResolveError {
132    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
133        use ResolveError::*;
134        match self {
135            Read(e) => Some(e),
136            Lookup(LookupError::Read(Some(e))) => Some(e),
137            _ => None,
138        }
139    }
140}
141
142impl From<InvalidCidInLink> for ResolveError {
143    fn from(e: InvalidCidInLink) -> ResolveError {
144        ResolveError::Lookup(e.into())
145    }
146}
147
148impl From<MultipleMatchingLinks> for ResolveError {
149    fn from(e: MultipleMatchingLinks) -> ResolveError {
150        ResolveError::Lookup(e.into())
151    }
152}
153
154impl From<ShardError> for ResolveError {
155    fn from(e: ShardError) -> ResolveError {
156        ResolveError::Lookup(e.into())
157    }
158}
159
160impl From<LookupError> for ResolveError {
161    fn from(e: LookupError) -> ResolveError {
162        ResolveError::Lookup(e)
163    }
164}
165
166/// Multiple matching links were found: **at least two**.
167#[derive(Debug)]
168pub enum MultipleMatchingLinks {
169    /// Two valid links were found
170    Two {
171        /// The first link and its index in the links
172        first: (usize, Cid),
173        /// The second link and its index in the links
174        second: (usize, Cid),
175    },
176    /// Two links were matched but one of them could not be converted.
177    OneValid {
178        /// The first link and its index in the links
179        first: (usize, Cid),
180        /// The failure to parse the other link
181        second: InvalidCidInLink,
182    },
183}
184
185impl<'a> From<((usize, Cid), (usize, PBLink<'a>))> for MultipleMatchingLinks {
186    fn from(
187        ((i, first), (j, second)): ((usize, Cid), (usize, PBLink<'a>)),
188    ) -> MultipleMatchingLinks {
189        match try_convert_cid(j, second) {
190            Ok(second) => MultipleMatchingLinks::Two {
191                first: (i, first),
192                second: (j, second),
193            },
194            Err(e) => MultipleMatchingLinks::OneValid {
195                first: (i, first),
196                second: e,
197            },
198        }
199    }
200}
201
202impl MultipleMatchingLinks {
203    /// Takes the first link, ignoring the other(s).
204    pub fn into_inner(self) -> Cid {
205        use MultipleMatchingLinks::*;
206        match self {
207            Two { first, .. } | OneValid { first, .. } => first.1,
208        }
209    }
210}
211
212#[cfg(test)]
213mod tests {
214
215    use super::{resolve, MaybeResolved};
216    use crate::test_support::FakeBlockstore;
217    use core::convert::TryFrom;
218    use hex_literal::hex;
219    use libipld::Cid;
220
221    #[test]
222    fn resolve_paths_from_plain_dagpb() {
223        let payload = hex!("12330a2212206aad27d7e2fc815cd15bf679535062565dc927a831547281fc0af9e5d7e67c74120b6166726963616e2e747874180812340a221220fd36ac5279964db0cba8f7fa45f8c4c44ef5e2ff55da85936a378c96c9c63204120c616d6572696361732e747874180812360a2212207564c20415869d77a8a40ca68a9158e397dd48bdff1325cdb23c5bcd181acd17120e6175737472616c69616e2e7478741808");
224
225        assert!(
226            crate::dagpb::node_data(&payload).unwrap().is_none(),
227            "this payload has no data field"
228        );
229
230        let segments = [
231            (
232                "african.txt",
233                Some("QmVX54jfjB8eRxLVxyQSod6b1FyDh7mR4mQie9j97i2Qk3"),
234            ),
235            (
236                "americas.txt",
237                Some("QmfP6D9bRV4FEYDL4EHZtZG58kDwDfnzmyjuyK5d1pvzbM"),
238            ),
239            (
240                "australian.txt",
241                Some("QmWEuXAjUGyndgr4MKqMBgzMW36XgPgvitt2jsXgtuc7JE"),
242            ),
243            ("not found", None),
244            // this is not a hamt shard
245            ("01african.txt", None),
246        ];
247
248        let mut cache = None;
249
250        for (segment, link) in &segments {
251            let target = link.map(|link| Cid::try_from(link).unwrap());
252
253            let res = resolve(&payload[..], segment, &mut cache);
254
255            match res {
256                Ok(MaybeResolved::Found(cid)) => assert_eq!(Some(cid), target),
257                Ok(MaybeResolved::NotFound) => {
258                    assert!(target.is_none(), "should not have found {:?}", segment)
259                }
260                x => panic!("{:?}", x),
261            }
262        }
263    }
264
265    #[test]
266    fn errors_with_file() {
267        let payload = hex!("0a130802120d666f6f6261720a666f6f626172180d");
268        // MaybeResolved::NotFound would be a possible answer as well, but this perhaps highlights
269        // that we dont know how to resolve through this
270        resolve(&payload[..], "anything", &mut None).unwrap_err();
271    }
272
273    #[test]
274    fn sharded_directory_linking_to_non_sharded() {
275        // created this test case out of doubt that we could fail a traversal as ShardedLookup
276        // expects the linked cids to be hamt shards. However that cannot happen as we only resolve
277        // a single step.
278        let blocks = FakeBlockstore::with_fixtures();
279
280        let block = blocks.get_by_str("QmQXUANxYGpkwMTWQUdZBPx9jqfFP7acNgL4FHRWkndKCe");
281
282        let next = match resolve(block, "non_sharded_dir", &mut None).unwrap() {
283            MaybeResolved::Found(cid) => cid,
284            x => unreachable!("{:?}", x),
285        };
286
287        let block = blocks.get_by_cid(&next);
288
289        let next = match resolve(block, "foobar", &mut None).unwrap() {
290            MaybeResolved::Found(cid) => cid,
291            x => unreachable!("{:?}", x),
292        };
293
294        assert_eq!(
295            &next.to_string(),
296            "QmRgutAxd8t7oGkSm4wmeuByG6M51wcTso6cubDdQtuEfL"
297        );
298    }
299}