unixfs_v1/
file.rs

1///! UnixFS file support.
2///!
3///! The module provides low-level File tree visitor support and file importing support. Note: The
4///! [`ipfs_unixfs::walk::Walker`] should typically be used for accessing file content.
5use crate::pb::ParsingFailed;
6use crate::{InvalidCidInLink, Metadata, UnexpectedNodeType};
7use alloc::borrow::Cow;
8use core::fmt;
9
10/// Low level UnixFS file descriptor reader support.
11mod reader;
12
13/// Mid level API for visiting the file tree.
14pub mod visit;
15
16/// File adder capable of constructing UnixFs v1 trees
17pub mod adder;
18
19/// Describes the errors which can happen during a visit or lower level block-by-block walking of
20/// the DAG.
21#[derive(Debug)]
22pub enum FileReadFailed {
23    /// Unsupported UnixFs file; these might exist, but currently there are no workarounds for
24    /// handling them.
25    File(FileError),
26    /// FileReader can only process raw or file type of unixfs content.
27    // This is the raw value instead of the enum by design not to expose the quick-protobuf types
28    UnexpectedType(UnexpectedNodeType),
29    /// Parsing failed
30    Read(Option<quick_protobuf::Error>),
31    /// Link could not be turned into Cid.
32    InvalidCid(InvalidCidInLink),
33}
34
35impl fmt::Display for FileReadFailed {
36    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
37        use FileReadFailed::*;
38
39        match self {
40            File(e) => write!(fmt, "{}", e),
41            UnexpectedType(ut) => write!(fmt, "unexpected type for UnixFs: {:?}", ut),
42            Read(Some(e)) => write!(fmt, "reading failed: {}", e),
43            Read(None) => write!(fmt, "reading failed: missing UnixFS message"),
44            InvalidCid(e) => write!(fmt, "{}", e),
45        }
46    }
47}
48
49impl std::error::Error for FileReadFailed {
50    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
51        use FileReadFailed::*;
52        match self {
53            InvalidCid(e) => Some(e),
54            Read(Some(e)) => Some(e),
55            _ => None,
56        }
57    }
58}
59
60impl<'a> From<ParsingFailed<'a>> for FileReadFailed {
61    fn from(e: ParsingFailed<'a>) -> Self {
62        use ParsingFailed::*;
63        match e {
64            InvalidDagPb(e) => FileReadFailed::Read(Some(e)),
65            InvalidUnixFs(e, _) => FileReadFailed::Read(Some(e)),
66            NoData(_) => FileReadFailed::Read(None),
67        }
68    }
69}
70
71/// Errors which can happen while processing UnixFS type File or Raw blocks.
72#[derive(Debug, PartialEq, Eq)]
73pub enum FileError {
74    /// There are nonequal number of links and blocksizes and thus the file ranges for linked trees
75    /// or blocks cannot be determined.
76    LinksAndBlocksizesMismatch,
77    /// Errored when the filesize is non-zero.
78    NoLinksNoContent,
79    /// Unsupported: non-root block defines metadata.
80    NonRootDefinesMetadata(Metadata),
81    /// A non-leaf node in the tree has no filesize value which is used to determine the file range
82    /// for this tree.
83    IntermediateNodeWithoutFileSize,
84    /// The tree or merkle dag should only collapse or stay the same length.
85    TreeExpandsOnLinks,
86    /// The tree links contain overlapping file segments. This is at least unsupported right now,
87    /// but the larger segment could be collapsed down to the reused part.
88    TreeOverlapsBetweenLinks,
89    /// Reader has been fed a link to earlier range.
90    EarlierLink,
91    /// The tree links contain a hole from a file segment to the next tree. This is at least
92    /// unsupported right now. Zeroes could be generated for the hole.
93    TreeJumpsBetweenLinks,
94    /// These values should not be present for unixfs files with File or Raw. If they have a valid
95    /// meaning, support for such has not been implemented.
96    UnexpectedRawOrFileProperties {
97        /// Hash type, as read from the protobuf descriptor; should only be used with HAMT
98        /// directories.
99        hash_type: Option<u64>,
100        /// Fan out, as read from the protobuf descriptor; should only be used with HAMT
101        /// directories.
102        fanout: Option<u64>,
103    },
104}
105
106impl fmt::Display for FileError {
107    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
108        use FileError::*;
109        match self {
110            LinksAndBlocksizesMismatch => write!(
111                fmt,
112                "different number of links and blocksizes: cannot determine subtree ranges"
113            ),
114            NoLinksNoContent => write!(
115                fmt,
116                "filesize is non-zero while there are no links or content"
117            ),
118            NonRootDefinesMetadata(metadata) => {
119                write!(fmt, "unsupported: non-root defines {:?}", metadata)
120            }
121            IntermediateNodeWithoutFileSize => {
122                write!(fmt, "intermediatery node with links but no filesize")
123            }
124            TreeExpandsOnLinks => write!(
125                fmt,
126                "total size of tree expands through links, it should only get smaller or keep size"
127            ),
128            TreeOverlapsBetweenLinks => write!(fmt, "unsupported: tree contains overlap"),
129            EarlierLink => write!(fmt, "error: earlier link given"),
130            TreeJumpsBetweenLinks => write!(fmt, "unsupported: tree contains holes"),
131            UnexpectedRawOrFileProperties { hash_type, fanout } => write!(
132                fmt,
133                "unsupported: File or Raw with hash_type {:?} or fanount {:?}",
134                hash_type, fanout
135            ),
136        }
137    }
138}
139
140impl std::error::Error for FileError {}
141
142impl From<FileError> for FileReadFailed {
143    fn from(e: FileError) -> Self {
144        Self::File(e)
145    }
146}
147
148/// This exists to help matching the borrowed slice in `Option<Cow<'_, [u8]>>` in this file
149/// or defaulting to empty array. In the processing inside this file, the Cow never represents
150/// owned value.
151///
152/// This at least sounded useful early on as the quick-protobuf produces many Option<Cow> values
153/// which are a bit tricky to handle. We never turn them into Option<Cow::Owned> so we can safely
154/// use these.
155pub(crate) trait UnwrapBorrowedExt<'a> {
156    /// Does not default but requires there to be an borrowed inner value.
157    fn unwrap_borrowed(self) -> &'a [u8];
158
159    /// Unwraps the Cow of [u8] into empty or wrapped slice.
160    fn unwrap_borrowed_or_empty(self) -> &'a [u8]
161    where
162        Self: 'a;
163}
164
165impl<'a> UnwrapBorrowedExt<'a> for Option<Cow<'a, [u8]>> {
166    fn unwrap_borrowed(self) -> &'a [u8] {
167        match self {
168            Some(Cow::Borrowed(x)) => x,
169            Some(Cow::Owned(_)) => panic!("unexpected Cow::Owned"),
170            None => panic!("Unexpected None"),
171        }
172    }
173    fn unwrap_borrowed_or_empty(self) -> &'a [u8] {
174        match self {
175            Some(Cow::Borrowed(x)) => x,
176            None => &[][..],
177            _ => panic!("should not be Cow::Owned"),
178        }
179    }
180}
181
182#[cfg(test)]
183pub(crate) mod tests {
184    use super::{reader::*, visit::*, UnwrapBorrowedExt};
185    use crate::test_support::FakeBlockstore;
186    use hex_literal::hex;
187
188    const CONTENT_FILE: &[u8] = &hex!("0a0d08021207636f6e74656e741807");
189
190    #[test]
191    fn just_content() {
192        let fr = FileReader::from_block(CONTENT_FILE).unwrap();
193        let (content, _) = fr.content();
194        assert!(
195            matches!(content, FileContent::Bytes(b"content")),
196            "{:?}",
197            content
198        );
199    }
200
201    #[test]
202    fn visiting_just_content() {
203        let res = IdleFileVisit::default().start(CONTENT_FILE);
204        assert!(matches!(res, Ok((b"content", _, _, None))), "{:?}", res);
205    }
206
207    #[test]
208    fn visiting_too_large_range_of_singleblock_file() {
209        let res = IdleFileVisit::default()
210            .with_target_range(500_000..600_000)
211            .start(CONTENT_FILE);
212
213        assert!(matches!(res, Ok((b"", _, _, None))), "{:?}", res);
214    }
215
216    #[test]
217    fn empty_file() {
218        let block = &hex!("0a0408021800");
219        let fr = FileReader::from_block(block).unwrap();
220        let (content, _) = fr.content();
221        assert!(matches!(content, FileContent::Bytes(b"")), "{:?}", content);
222    }
223
224    #[test]
225    fn balanced_traversal() {
226        let target = "QmRJHYTNvC3hmd9gJQARxLR1QMEincccBV53bBw524yyq6";
227        let blocks = FakeBlockstore::with_fixtures();
228
229        // filled on root
230        let (mut links_and_ranges, mut traversal) = {
231            let root = FileReader::from_block(blocks.get_by_str(target)).unwrap();
232
233            let (mut links_and_ranges, traversal) = match root.content() {
234                (FileContent::Links(iter), traversal) => {
235                    let links_and_ranges = iter
236                        .map(|(link, range)| (link.Hash.unwrap_borrowed().to_vec(), range))
237                        .collect::<Vec<_>>();
238                    (links_and_ranges, traversal)
239                }
240                x => unreachable!("unexpected {:?}", x),
241            };
242
243            // reverse again to pop again
244            links_and_ranges.reverse();
245            // something 'static to hold on between two blocks
246            (links_and_ranges, traversal)
247        };
248
249        let mut combined: Vec<u8> = Vec::new();
250
251        while let Some((key, range)) = links_and_ranges.pop() {
252            let next = blocks.get_by_raw(&key);
253            let fr = traversal.continue_walk(next, &range).unwrap();
254
255            let (content, next) = fr.content();
256            combined.extend(content.unwrap_content());
257            traversal = next;
258        }
259
260        assert_eq!(combined, b"foobar\n");
261    }
262
263    fn collect_bytes(blocks: &FakeBlockstore, visit: IdleFileVisit, start: &str) -> Vec<u8> {
264        let mut ret = Vec::new();
265
266        let (content, _, _, mut step) = visit.start(blocks.get_by_str(start)).unwrap();
267        ret.extend(content);
268
269        while let Some(visit) = step {
270            let (first, _) = visit.pending_links();
271            let block = blocks.get_by_cid(first);
272
273            let (content, next_step) = visit.continue_walk(block, &mut None).unwrap();
274            ret.extend(content);
275            step = next_step;
276        }
277
278        ret
279    }
280
281    #[test]
282    fn visitor_traversal() {
283        let blocks = FakeBlockstore::with_fixtures();
284
285        let start = "QmRJHYTNvC3hmd9gJQARxLR1QMEincccBV53bBw524yyq6";
286        let bytes = collect_bytes(&blocks, IdleFileVisit::default(), start);
287
288        assert_eq!(&bytes[..], b"foobar\n");
289    }
290
291    #[test]
292    fn scoped_visitor_traversal_from_blockstore() {
293        let blocks = FakeBlockstore::with_fixtures();
294
295        let start = "QmRJHYTNvC3hmd9gJQARxLR1QMEincccBV53bBw524yyq6";
296        let visit = IdleFileVisit::default().with_target_range(1..6);
297        let bytes = collect_bytes(&blocks, visit, start);
298
299        assert_eq!(&bytes[..], b"oobar");
300    }
301
302    #[test]
303    fn less_than_block_scoped_traversal_from_blockstore() {
304        let blocks = FakeBlockstore::with_fixtures();
305
306        let start = "QmRJHYTNvC3hmd9gJQARxLR1QMEincccBV53bBw524yyq6";
307        let visit = IdleFileVisit::default().with_target_range(0..1);
308        let bytes = collect_bytes(&blocks, visit, start);
309
310        assert_eq!(&bytes[..], b"f");
311    }
312
313    #[test]
314    fn scoped_traversal_out_of_bounds_from_blockstore() {
315        let blocks = FakeBlockstore::with_fixtures();
316
317        let start = "QmRJHYTNvC3hmd9gJQARxLR1QMEincccBV53bBw524yyq6";
318        let visit = IdleFileVisit::default().with_target_range(7..20);
319        let bytes = collect_bytes(&blocks, visit, start);
320
321        assert_eq!(&bytes[..], b"");
322    }
323
324    #[test]
325    fn trickle_traversal() {
326        let blocks = FakeBlockstore::with_fixtures();
327
328        let start = "QmWfQ48ChJUj4vWKFsUDe4646xCBmXgdmNfhjz9T7crywd";
329        let bytes = collect_bytes(&blocks, IdleFileVisit::default(), start);
330
331        assert_eq!(&bytes[..], b"foobar\n");
332    }
333}