ipfs_unixfs/dir/
sharded_lookup.rs

1use super::{try_convert_cid, MaybeResolved, MultipleMatchingLinks, ResolveError};
2use crate::pb::{FlatUnixFs, PBLink, ParsingFailed, UnixFsType};
3use crate::{InvalidCidInLink, UnexpectedNodeType};
4use alloc::borrow::Cow;
5use alloc::collections::VecDeque;
6use cid::Cid;
7use core::convert::TryFrom;
8use core::fmt;
9
10/// A cache of data structures used while traversing. Reduces allocations when walking over multiple
11/// path segments.
12pub struct Cache {
13    buffer: VecDeque<Cid>,
14}
15
16impl From<VecDeque<Cid>> for Cache {
17    fn from(mut buffer: VecDeque<Cid>) -> Self {
18        buffer.clear();
19        Cache { buffer }
20    }
21}
22impl fmt::Debug for Cache {
23    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
24        write!(fmt, "Cache {{ buffer: {} }}", self.buffer.capacity())
25    }
26}
27
28/// `ShardedLookup` can walk over multiple HAMT sharded directory nodes which allows multiple block
29/// spanning directories.
30pub struct ShardedLookup<'needle> {
31    links: VecDeque<Cid>,
32    // this will be tricky if we ever need to have case-insensitive resolving *but* we can then
33    // make a custom Cow type; important not to expose Cow in any API.
34    needle: Cow<'needle, str>,
35}
36
37impl fmt::Debug for ShardedLookup<'_> {
38    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
39        write!(
40            fmt,
41            "ShardedLookup {{ links: {}, needle: {:?} }}",
42            self.links.len(),
43            self.needle.as_ref(),
44        )
45    }
46}
47
48impl<'needle> ShardedLookup<'needle> {
49    /// Returns the next pending link and an iterator over the rest.
50    pub fn pending_links(&self) -> (&Cid, impl Iterator<Item = &Cid>) {
51        let mut iter = self.links.iter();
52        let first = iter.next().expect("Already validated there are links");
53        (first, iter)
54    }
55
56    /// Continues the walk in the DAG of HAMT buckets searching for the original `needle`.
57    pub fn continue_walk(
58        mut self,
59        next: &[u8],
60        cache: &mut Option<Cache>,
61    ) -> Result<MaybeResolved<'needle>, LookupError> {
62        // just to make sure not to mess this up
63        debug_assert_eq!(Some(self.pending_links().0), self.links.front());
64
65        self.links
66            .pop_front()
67            .expect("Already validated there are links");
68
69        let mut hamt = match FlatUnixFs::try_from(next) {
70            Ok(hamt) if hamt.data.Type == UnixFsType::HAMTShard => hamt,
71            Ok(other) => return Err(LookupError::UnexpectedBucketType(other.data.Type.into())),
72            Err(ParsingFailed::InvalidDagPb(e)) | Err(ParsingFailed::InvalidUnixFs(e, _)) => {
73                *cache = Some(self.links.into());
74                return Err(LookupError::Read(Some(e)));
75            }
76            Err(ParsingFailed::NoData(_)) => {
77                *cache = Some(self.links.into());
78                return Err(LookupError::Read(None));
79            }
80        };
81
82        Self::check_supported(&mut hamt)?;
83
84        let found = Self::partition(
85            hamt.links.into_iter(),
86            self.needle.as_ref(),
87            &mut self.links,
88        )?;
89
90        if let Some(cid) = found {
91            *cache = Some(self.links.into());
92            Ok(MaybeResolved::Found(cid))
93        } else if self.links.is_empty() {
94            *cache = Some(self.links.into());
95            Ok(MaybeResolved::NotFound)
96        } else {
97            Ok(MaybeResolved::NeedToLoadMore(self))
98        }
99    }
100
101    /// Transforms this `ShardedLookup` into a `ShardedLookup<'static>` by taking ownership of the
102    /// needle we are trying to find.
103    pub fn with_owned_needle(self) -> ShardedLookup<'static> {
104        let ShardedLookup { links, needle } = self;
105        let needle = Cow::Owned(needle.into_owned());
106        ShardedLookup { links, needle }
107    }
108
109    /// Finds or starts a lookup of multiple buckets.
110    ///
111    /// Returns the found link, the definitive negative or the means to continue the traversal.
112    pub(crate) fn lookup_or_start(
113        mut hamt: FlatUnixFs<'_>,
114        needle: &'needle str,
115        cache: &mut Option<Cache>,
116    ) -> Result<MaybeResolved<'needle>, LookupError> {
117        Self::check_supported(&mut hamt)?;
118
119        let mut links = cache.take().map(|c| c.buffer).unwrap_or_default();
120
121        let found = Self::partition(hamt.links.into_iter(), needle, &mut links)?;
122
123        if let Some(cid) = found {
124            *cache = Some(links.into());
125            Ok(MaybeResolved::Found(cid))
126        } else if links.is_empty() {
127            *cache = Some(links.into());
128            Ok(MaybeResolved::NotFound)
129        } else {
130            Ok(MaybeResolved::NeedToLoadMore(ShardedLookup {
131                links,
132                needle: Cow::Borrowed(needle),
133            }))
134        }
135    }
136
137    /// Takes the validated object as mutable reference to move data out of it in case of error.
138    ///
139    /// Returns an error if we don't support the properties on the HAMTShard-typed node
140    pub(crate) fn check_supported(hamt: &mut FlatUnixFs<'_>) -> Result<(), ShardError> {
141        assert_eq!(hamt.data.Type, UnixFsType::HAMTShard);
142
143        if hamt.data.fanout != Some(256) || hamt.data.hashType != Some(34) {
144            Err(ShardError::UnsupportedProperties {
145                hash_type: hamt.data.hashType,
146                fanout: hamt.data.fanout,
147            })
148        } else if hamt.data.filesize.is_some() || !hamt.data.blocksizes.is_empty() {
149            Err(ShardError::UnexpectedProperties {
150                filesize: hamt.data.filesize,
151                blocksizes: core::mem::take(&mut hamt.data.blocksizes),
152            })
153        } else {
154            Ok(())
155        }
156    }
157
158    /// Partition the original links based on their kind; if the link:
159    ///
160    ///  - matches the needle uniquely, it will be returned as `Some(cid)`
161    ///  - is a bucket, it is pushed back to the work
162    fn partition<'a>(
163        iter: impl Iterator<Item = PBLink<'a>>,
164        needle: &str,
165        work: &mut VecDeque<Cid>,
166    ) -> Result<Option<Cid>, PartitioningError> {
167        let mut found = None;
168
169        for (i, link) in iter.enumerate() {
170            let name = link.Name.as_deref().unwrap_or_default();
171
172            if name.len() > 2 && &name[2..] == needle {
173                if let Some(first) = found.take() {
174                    return Err(MultipleMatchingLinks::from((first, (i, link))).into());
175                } else {
176                    found = Some((i, try_convert_cid(i, link)?));
177                }
178            } else if name.len() == 2 {
179                // the magic number of two comes from the fanout (256) probably
180                let cid = try_convert_cid(i, link)?;
181                work.push_back(cid);
182            } else {
183                // no match, not interesting for us
184            }
185        }
186
187        Ok(found.map(|(_, cid)| cid))
188    }
189}
190
191pub(crate) enum PartitioningError {
192    Multiple(MultipleMatchingLinks),
193    InvalidCid(InvalidCidInLink),
194}
195
196impl From<InvalidCidInLink> for PartitioningError {
197    fn from(e: InvalidCidInLink) -> PartitioningError {
198        PartitioningError::InvalidCid(e)
199    }
200}
201
202impl From<MultipleMatchingLinks> for PartitioningError {
203    fn from(e: MultipleMatchingLinks) -> PartitioningError {
204        PartitioningError::Multiple(e)
205    }
206}
207
208impl From<PartitioningError> for ResolveError {
209    fn from(e: PartitioningError) -> ResolveError {
210        ResolveError::Lookup(LookupError::from(e))
211    }
212}
213
214impl From<PartitioningError> for LookupError {
215    fn from(e: PartitioningError) -> LookupError {
216        use PartitioningError::*;
217        match e {
218            Multiple(m) => LookupError::Multiple(m),
219            InvalidCid(e) => LookupError::InvalidCid(e),
220        }
221    }
222}
223
224/// Shard does not fit into expectations.
225#[derive(Debug)]
226pub enum ShardError {
227    /// Encountered an HAMT sharded directory which had an unsupported configuration.
228    UnsupportedProperties {
229        /// Unsupported multihash hash.
230        hash_type: Option<u64>,
231        /// Unsupported fanout value.
232        fanout: Option<u64>,
233    },
234    /// Encountered an HAMT sharded directory which had a unexpected properties.
235    UnexpectedProperties {
236        /// Filesize is used with UnixFS files.
237        filesize: Option<u64>,
238        /// Blocksizes are in general used with UnixFS files.
239        blocksizes: Vec<u64>,
240    },
241}
242
243impl fmt::Display for ShardError {
244    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
245        use ShardError::*;
246        match self {
247            UnsupportedProperties { hash_type, fanout } => write!(
248                fmt,
249                "unsupported HAMTShard properties: hash_type={:?}, fanout={:?}",
250                hash_type, fanout
251            ),
252            UnexpectedProperties {
253                filesize,
254                blocksizes,
255            } => write!(
256                fmt,
257                "unexpected HAMTShard properties: filesize=({:?}), {} blocksizes",
258                filesize,
259                blocksizes.len()
260            ),
261        }
262    }
263}
264
265impl std::error::Error for ShardError {}
266
267/// Errors which can occur when looking up a HAMTSharded directory.
268#[derive(Debug)]
269pub enum LookupError {
270    /// Multiple matching links were found
271    Multiple(MultipleMatchingLinks),
272    /// Invalid Cid was matched
273    InvalidCid(InvalidCidInLink),
274    /// Unexpected HAMT shard bucket type
275    UnexpectedBucketType(UnexpectedNodeType),
276    /// Unsupported or unexpected property of the UnixFS node
277    Shard(ShardError),
278    /// Parsing failed or the inner dag-pb data was contained no bytes.
279    Read(Option<quick_protobuf::Error>),
280}
281
282impl LookupError {
283    /// Converts this HAMT lookup error to the more general ResolveError
284    pub fn into_resolve_error(self) -> ResolveError {
285        self.into()
286    }
287}
288
289impl From<MultipleMatchingLinks> for LookupError {
290    fn from(e: MultipleMatchingLinks) -> Self {
291        LookupError::Multiple(e)
292    }
293}
294
295impl From<InvalidCidInLink> for LookupError {
296    fn from(e: InvalidCidInLink) -> Self {
297        LookupError::InvalidCid(e)
298    }
299}
300
301impl From<ShardError> for LookupError {
302    fn from(e: ShardError) -> Self {
303        LookupError::Shard(e)
304    }
305}
306
307impl fmt::Display for LookupError {
308    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
309        use LookupError::*;
310
311        match self {
312            InvalidCid(e) => write!(fmt, "Invalid link: {:?}", e),
313            Multiple(e) => write!(fmt, "Multiple matching links found: {:?}", e),
314            UnexpectedBucketType(ut) => write!(fmt, "unexpected type for HAMT bucket: {:?}", ut),
315            Shard(e) => write!(fmt, "{}", e),
316            Read(Some(e)) => write!(
317                fmt,
318                "failed to parse the block as unixfs or dag-pb node: {}",
319                e
320            ),
321            Read(None) => write!(fmt, "HAMTDirectory not found in empty dag-pb node"),
322        }
323    }
324}
325
326impl std::error::Error for LookupError {
327    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
328        use LookupError::*;
329        match self {
330            Read(Some(e)) => Some(e),
331            _ => None,
332        }
333    }
334}
335
336#[cfg(test)]
337mod tests {
338    use super::{LookupError, MaybeResolved, ShardError, ShardedLookup};
339    use crate::pb::FlatUnixFs;
340    use core::convert::TryFrom;
341    use hex_literal::hex;
342
343    // a directory from some linux kernel tree import: linux-5.5-rc5/tools/testing/selftests/rcutorture/
344    const DIR: &[u8] = &hex!("122e0a2212204baf5104fe53d495223f8e2ba95375a31fda6b18e926cb54edd61f30b5f1de6512053641646f6318b535122c0a221220fd9f545068048e647d5d0b275ed171596e0c1c04b8fed09dc13bee7607e75bc7120242391883c00312330a2212208a4a68f6b88594ce373419586c12d24bde2d519ab636b1d2dcc986eb6265b7a3120a43444d616b6566696c65189601122f0a2212201ededc99d23a7ef43a8f17e6dd8b89934993245ef39e18936a37e412e536ed681205463562696e18c5ad030a280805121f200000000020000200000000000000000004000000000000000000000000002822308002");
345    const FILE: &[u8] = &hex!("0a130802120d666f6f6261720a666f6f626172180d");
346
347    #[test]
348    fn direct_hit() {
349        let parsed = FlatUnixFs::try_from(DIR).unwrap();
350
351        // calling shardedlookup directly makes little sense, but through `resolve` it would make
352        // sense
353
354        // testing this is a bit ... not nice, since we can only find out if there is a negative
355        // hit through exhausting the buckets
356        let found = ShardedLookup::lookup_or_start(parsed, "bin", &mut None);
357
358        match found {
359            Ok(MaybeResolved::Found(cid))
360                if cid.to_string() == "QmQRA3JX9JNSccQpXjuKzMVCpfTaP4XpHbrrefqaQFWf5Z" => {}
361            x => unreachable!("{:?}", x),
362        }
363    }
364
365    #[test]
366    fn found_in_the_other_bucket() {
367        let parsed = FlatUnixFs::try_from(DIR).unwrap();
368
369        // there is a single bin "B9" which would contain "formal" but our implementation cannot
370        // see it, it just guesses to start looking up in other buckets
371        let see_next = ShardedLookup::lookup_or_start(parsed, "formal", &mut None);
372
373        let next = match see_next {
374            Ok(MaybeResolved::NeedToLoadMore(next)) => next,
375            x => unreachable!("{:?}", x),
376        };
377
378        {
379            let (first, mut rest) = next.pending_links();
380
381            // there is only one bin: in other cases we would just walk in BFS order
382            assert_eq!(
383                first.to_string(),
384                "QmfQgmYMYmGQP4X6V3JhTELkQmGVP9kpJgv9duejQ8vWez"
385            );
386            assert!(rest.next().is_none());
387        }
388
389        // then we error on anything other than HAMTShard
390        let err = next.continue_walk(FILE, &mut None).unwrap_err();
391        match err {
392            LookupError::UnexpectedBucketType(ut) if ut.is_file() => {}
393            x => unreachable!("{:?}", x),
394        }
395    }
396
397    #[test]
398    fn unsupported_hash_type_or_fanout() {
399        use crate::pb::{FlatUnixFs, UnixFs, UnixFsType};
400        use alloc::borrow::Cow;
401
402        let example = FlatUnixFs {
403            data: UnixFs {
404                Type: UnixFsType::HAMTShard,
405                Data: Some(Cow::Borrowed(
406                    b"this cannot be interpreted yet but would be an error",
407                )),
408                filesize: None,
409                blocksizes: Vec::new(),
410                hashType: Some(33), // supported 34 or murmur128 le cut as u64?
411                fanout: Some(255),  // supported 256
412                mode: None,         // these are not read by the lookup
413                mtime: None,
414            },
415            links: Vec::new(),
416        };
417
418        let err = ShardedLookup::lookup_or_start(example, "doesnt matter", &mut None).unwrap_err();
419        assert!(
420            matches!(
421                err,
422                LookupError::Shard(ShardError::UnsupportedProperties { .. })
423            ),
424            "{:?}",
425            err
426        );
427    }
428
429    #[test]
430    fn unexpected_properties() {
431        use crate::pb::{FlatUnixFs, UnixFs, UnixFsType};
432        use alloc::borrow::Cow;
433
434        let example = FlatUnixFs {
435            data: UnixFs {
436                Type: UnixFsType::HAMTShard,
437                Data: Some(Cow::Borrowed(
438                    b"this cannot be interpreted yet but would be an error",
439                )),
440                filesize: Some(1),   // err
441                blocksizes: vec![1], // err
442                hashType: Some(34),
443                fanout: Some(256),
444                mode: None,
445                mtime: None,
446            },
447            links: Vec::new(),
448        };
449
450        let err = ShardedLookup::lookup_or_start(example, "doesnt matter", &mut None).unwrap_err();
451        assert!(
452            matches!(
453                err,
454                LookupError::Shard(ShardError::UnexpectedProperties { .. })
455            ),
456            "{:?}",
457            err
458        );
459    }
460}