rust_unixfs/
dir.rs

1use crate::pb::{FlatUnixFs, PBLink, PBNode, ParsingFailed, UnixFsType};
2use crate::{InvalidCidInLink, UnexpectedNodeType};
3use core::convert::TryFrom;
4use core::fmt;
5use ipld_core::cid::Cid;
6
7mod sharded_lookup;
8pub use sharded_lookup::{Cache, LookupError, ShardError, ShardedLookup};
9
10mod directory;
11pub(crate) use directory::{check_directory_supported, UnexpectedDirectoryProperties};
12
13/// Directory tree builder.
14pub mod builder;
15
16pub(crate) fn check_hamtshard_supported(
17    mut flat: FlatUnixFs<'_>,
18) -> Result<FlatUnixFs<'_>, ShardError> {
19    ShardedLookup::check_supported(&mut flat)?;
20    Ok(flat)
21}
22
23/// Resolves a single path segment on `dag-pb` or UnixFS directories (normal, sharded).
24///
25/// The third parameter can always be substituted with a None but when repeatedly resolving over
26/// multiple path segments, it can be used to cache the work queue used to avoid re-allocating it
27/// between the steps.
28///
29/// Returns on success either a walker which can be used to traverse additional links searching for
30/// the link, or the resolved link once it has been found or NotFound when it cannot be found.
31///
32/// # Note
33///
34/// The returned walker by default borrows the needle but it can be transformed into owned walker
35/// with `ShardedLookup::with_owned_needle` which will allow moving it between tasks and boundaries.
36#[allow(clippy::result_large_err)]
37pub fn resolve<'needle>(
38    block: &[u8],
39    needle: &'needle str,
40    cache: &mut Option<Cache>,
41) -> Result<MaybeResolved<'needle>, ResolveError> {
42    let links = match FlatUnixFs::try_parse(block) {
43        Ok(hamt) if hamt.data.Type == UnixFsType::HAMTShard => {
44            return Ok(ShardedLookup::lookup_or_start(hamt, needle, cache)?)
45        }
46        Ok(flat) if flat.data.Type == UnixFsType::Directory => {
47            check_directory_supported(flat)?.links
48        }
49        Err(ParsingFailed::InvalidUnixFs(_, PBNode { Links: links, .. }))
50        | Err(ParsingFailed::NoData(PBNode { Links: links, .. })) => links,
51        Ok(other) => {
52            // go-ipfs does not resolve links under File, probably it's not supposed to work on
53            // anything else then; returning NotFound would be correct, but perhaps it's even more
54            // correct to return that we don't support this
55            return Err(ResolveError::UnexpectedType(other.data.Type.into()));
56        }
57        Err(ParsingFailed::InvalidDagPb(e)) => return Err(ResolveError::Read(e)),
58    };
59
60    let mut matching = links.into_iter().enumerate().filter_map(|(i, link)| {
61        match link.Name.as_deref().unwrap_or_default() {
62            x if x == needle => Some((i, link)),
63            _ => None,
64        }
65    });
66
67    let first = matching.next();
68
69    if let Some((i, first)) = first {
70        let first = try_convert_cid(i, first)?;
71        match matching.next() {
72            Some((j, second)) => Err(MultipleMatchingLinks::from(((i, first), (j, second))).into()),
73            None => Ok(MaybeResolved::Found(first)),
74        }
75    } else {
76        Ok(MaybeResolved::NotFound)
77    }
78}
79
80fn try_convert_cid(nth: usize, link: PBLink<'_>) -> Result<Cid, InvalidCidInLink> {
81    let hash = link.Hash.as_deref().unwrap_or_default();
82    Cid::try_from(hash).map_err(|e| InvalidCidInLink::from((nth, link, e)))
83}
84
85/// Resolving result type for the successful cases.
86#[derive(Debug)]
87pub enum MaybeResolved<'needle> {
88    /// Link was found for the given segment.
89    Found(Cid),
90    /// The block presented to `resolve` was a HAMT sharded directory and other blocks need to be
91    /// read in order to find the link. `ShardedLookup` will handle the lookup and navigation
92    /// over the shards.
93    NeedToLoadMore(ShardedLookup<'needle>),
94    /// The segment could not be found.
95    NotFound,
96}
97
98/// Resolving can fail similarly as with `ShardedLookup::continue_walk` but in addition to sharded
99/// cases, there can be unexpected directories.
100#[derive(Debug)]
101pub enum ResolveError {
102    /// The target block was a UnixFs node that doesn't support resolving, e.g. a file.
103    UnexpectedType(UnexpectedNodeType),
104    /// A directory had unsupported properties. These are not encountered during walking sharded
105    /// directories.
106    UnexpectedDirProperties(UnexpectedDirectoryProperties),
107    /// Failed to read the block as a dag-pb node. Failure to read an inner UnixFS node is ignored
108    /// and links of the outer dag-pb are processed.
109    Read(quick_protobuf::Error),
110    /// Lookup errors.
111    Lookup(LookupError),
112}
113
114impl From<UnexpectedDirectoryProperties> for ResolveError {
115    fn from(e: UnexpectedDirectoryProperties) -> Self {
116        ResolveError::UnexpectedDirProperties(e)
117    }
118}
119
120impl fmt::Display for ResolveError {
121    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
122        use ResolveError::*;
123        match self {
124            UnexpectedType(ut) => write!(fmt, "unexpected type for UnixFs: {ut:?}"),
125            UnexpectedDirProperties(udp) => write!(fmt, "unexpected directory properties: {udp}"),
126            Read(e) => write!(fmt, "parsing failed: {e}"),
127            Lookup(e) => write!(fmt, "{e}"),
128        }
129    }
130}
131
132impl std::error::Error for ResolveError {
133    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
134        use ResolveError::*;
135        match self {
136            Read(e) => Some(e),
137            Lookup(LookupError::Read(Some(e))) => Some(e),
138            _ => None,
139        }
140    }
141}
142
143impl From<InvalidCidInLink> for ResolveError {
144    fn from(e: InvalidCidInLink) -> ResolveError {
145        ResolveError::Lookup(e.into())
146    }
147}
148
149impl From<MultipleMatchingLinks> for ResolveError {
150    fn from(e: MultipleMatchingLinks) -> ResolveError {
151        ResolveError::Lookup(e.into())
152    }
153}
154
155impl From<ShardError> for ResolveError {
156    fn from(e: ShardError) -> ResolveError {
157        ResolveError::Lookup(e.into())
158    }
159}
160
161impl From<LookupError> for ResolveError {
162    fn from(e: LookupError) -> ResolveError {
163        ResolveError::Lookup(e)
164    }
165}
166
167/// Multiple matching links were found: **at least two**.
168#[derive(Debug)]
169pub enum MultipleMatchingLinks {
170    /// Two valid links were found
171    Two {
172        /// The first link and its index in the links
173        first: (usize, Cid),
174        /// The second link and its index in the links
175        second: (usize, Cid),
176    },
177    /// Two links were matched but one of them could not be converted.
178    OneValid {
179        /// The first link and its index in the links
180        first: (usize, Cid),
181        /// The failure to parse the other link
182        second: InvalidCidInLink,
183    },
184}
185
186impl<'a> From<((usize, Cid), (usize, PBLink<'a>))> for MultipleMatchingLinks {
187    fn from(
188        ((i, first), (j, second)): ((usize, Cid), (usize, PBLink<'a>)),
189    ) -> MultipleMatchingLinks {
190        match try_convert_cid(j, second) {
191            Ok(second) => MultipleMatchingLinks::Two {
192                first: (i, first),
193                second: (j, second),
194            },
195            Err(e) => MultipleMatchingLinks::OneValid {
196                first: (i, first),
197                second: e,
198            },
199        }
200    }
201}
202
203impl MultipleMatchingLinks {
204    /// Takes the first link, ignoring the other(s).
205    pub fn into_inner(self) -> Cid {
206        use MultipleMatchingLinks::*;
207        match self {
208            Two { first, .. } | OneValid { first, .. } => first.1,
209        }
210    }
211}
212
213#[cfg(test)]
214mod tests {
215
216    use super::{resolve, MaybeResolved};
217    use crate::test_support::FakeBlockstore;
218    use core::convert::TryFrom;
219    use hex_literal::hex;
220    use ipld_core::cid::Cid;
221
222    #[test]
223    fn resolve_paths_from_plain_dagpb() {
224        let payload = hex!("12330a2212206aad27d7e2fc815cd15bf679535062565dc927a831547281fc0af9e5d7e67c74120b6166726963616e2e747874180812340a221220fd36ac5279964db0cba8f7fa45f8c4c44ef5e2ff55da85936a378c96c9c63204120c616d6572696361732e747874180812360a2212207564c20415869d77a8a40ca68a9158e397dd48bdff1325cdb23c5bcd181acd17120e6175737472616c69616e2e7478741808");
225
226        assert!(
227            crate::dagpb::node_data(&payload).unwrap().is_none(),
228            "this payload has no data field"
229        );
230
231        let segments = [
232            (
233                "african.txt",
234                Some("QmVX54jfjB8eRxLVxyQSod6b1FyDh7mR4mQie9j97i2Qk3"),
235            ),
236            (
237                "americas.txt",
238                Some("QmfP6D9bRV4FEYDL4EHZtZG58kDwDfnzmyjuyK5d1pvzbM"),
239            ),
240            (
241                "australian.txt",
242                Some("QmWEuXAjUGyndgr4MKqMBgzMW36XgPgvitt2jsXgtuc7JE"),
243            ),
244            ("not found", None),
245            // this is not a hamt shard
246            ("01african.txt", None),
247        ];
248
249        let mut cache = None;
250
251        for (segment, link) in &segments {
252            let target = link.map(|link| Cid::try_from(link).unwrap());
253
254            let res = resolve(&payload[..], segment, &mut cache);
255
256            match res {
257                Ok(MaybeResolved::Found(cid)) => assert_eq!(Some(cid), target),
258                Ok(MaybeResolved::NotFound) => {
259                    assert!(target.is_none(), "should not have found {segment:?}")
260                }
261                x => panic!("{x:?}"),
262            }
263        }
264    }
265
266    #[test]
267    fn errors_with_file() {
268        let payload = hex!("0a130802120d666f6f6261720a666f6f626172180d");
269        // MaybeResolved::NotFound would be a possible answer as well, but this perhaps highlights
270        // that we dont know how to resolve through this
271        resolve(&payload[..], "anything", &mut None).unwrap_err();
272    }
273
274    #[test]
275    fn sharded_directory_linking_to_non_sharded() {
276        // created this test case out of doubt that we could fail a traversal as ShardedLookup
277        // expects the linked cids to be hamt shards. However that cannot happen as we only resolve
278        // a single step.
279        let blocks = FakeBlockstore::with_fixtures();
280
281        let block = blocks.get_by_str("QmQXUANxYGpkwMTWQUdZBPx9jqfFP7acNgL4FHRWkndKCe");
282
283        let next = match resolve(block, "non_sharded_dir", &mut None).unwrap() {
284            MaybeResolved::Found(cid) => cid,
285            x => unreachable!("{:?}", x),
286        };
287
288        let block = blocks.get_by_cid(&next);
289
290        let next = match resolve(block, "foobar", &mut None).unwrap() {
291            MaybeResolved::Found(cid) => cid,
292            x => unreachable!("{:?}", x),
293        };
294
295        assert_eq!(
296            &next.to_string(),
297            "QmRgutAxd8t7oGkSm4wmeuByG6M51wcTso6cubDdQtuEfL"
298        );
299    }
300}