ipfs_unixfs/file/
reader.rs

1use crate::pb::{FlatUnixFs, PBLink, RangeLinks, UnixFsType};
2use core::convert::TryFrom;
3use core::fmt;
4use core::ops::Range;
5
6use crate::file::{FileError, FileReadFailed, Metadata, UnwrapBorrowedExt};
7
8/// Navigates the UnixFs files, which are either:
9///  - single block files which have everything needed to all of the contents
10///  - multi block files which have trees of trees until Raw leaf blocks
11///
12/// The trees can have different shapes but it doesn't really matter for our depth-first approach.
13/// For seeking, the each sub-tree linking node will have blocksizes for the trees representing
14/// which the original file offsets covered by the tree.
15///
16/// A file doesn't know it's name. It only has a name when part of a directory, and then the name
17/// is on a PbLink::Name. With UnixFs the names are always UTF-8. The root CID is not interesting
18/// either: we just need the root block.
19pub struct FileReader<'a> {
20    offset: u64,
21    end: Ending,
22    links: Vec<PBLink<'a>>,
23    data: &'a [u8],
24    blocksizes: Vec<u64>,
25    metadata: Metadata,
26    file_size: u64,
27}
28
29impl AsRef<Metadata> for FileReader<'_> {
30    fn as_ref(&self) -> &Metadata {
31        &self.metadata
32    }
33}
34
35// TODO: this could be Range ... It just seemed there seems to be "two kinds" of endings but in
36// reality these are closer to two kinds of ranges or spans.
37#[derive(Debug)]
38enum Ending {
39    /// The block represented a subtree without actual content
40    TreeCoverage(u64),
41    /// The block repressented a leaf with actual content
42    Chunk(u64),
43}
44
45impl Ending {
46    /// Checks wheter or not the next range is good to be processed next.
47    fn check_is_suitable_next(&self, offset: u64, next: &Range<u64>) -> Result<(), FileError> {
48        match self {
49            Ending::TreeCoverage(cover_end) if next.start <= offset && &next.end > cover_end => {
50                // tree must be collapsing; we cant have root be some smaller *file* range than
51                // the child
52                Err(FileError::TreeExpandsOnLinks)
53            }
54            Ending::TreeCoverage(cover_end) if &next.start < cover_end && &next.end > cover_end => {
55                // when moving to sibling at the same height or above, its coverage must start
56                // from where we stopped
57                //
58                // This has been separated instead of making the TreeExpandsOnLinks more general as
59                // this might be a reasonable way with unixfs to reuse lower trees but no such
60                // example has been found at least.
61                Err(FileError::TreeOverlapsBetweenLinks)
62            }
63            Ending::TreeCoverage(_) if next.start < offset => Err(FileError::EarlierLink),
64            Ending::Chunk(chunk_end) if &next.start != chunk_end => {
65                // when continuing on from leaf node to either tree at above or a chunk at
66                // next, the next must continue where we stopped
67                Err(FileError::TreeJumpsBetweenLinks)
68            }
69            _ => Ok(()),
70        }
71    }
72}
73
74impl<'a> FileReader<'a> {
75    /// Method for starting the file traversal. `data` is the raw data from unixfs block.
76    pub fn from_block(data: &'a [u8]) -> Result<Self, FileReadFailed> {
77        let inner = FlatUnixFs::try_from(data)?;
78        let metadata = Metadata::from(&inner.data);
79        Self::from_parts(inner, 0, metadata)
80    }
81
82    pub(crate) fn from_parsed(inner: FlatUnixFs<'a>) -> Result<Self, FileReadFailed> {
83        let metadata = Metadata::from(&inner.data);
84        Self::from_parts(inner, 0, metadata)
85    }
86
87    /// Called by Traversal to continue traversing a file tree traversal.
88    fn from_continued(
89        traversal: Traversal,
90        offset: u64,
91        data: &'a [u8],
92    ) -> Result<Self, FileReadFailed> {
93        let inner = FlatUnixFs::try_from(data)?;
94
95        if inner.data.mode.is_some() || inner.data.mtime.is_some() {
96            let metadata = Metadata::from(&inner.data);
97            return Err(FileError::NonRootDefinesMetadata(metadata).into());
98        }
99
100        Self::from_parts(inner, offset, traversal.metadata)
101    }
102
103    fn from_parts(
104        inner: FlatUnixFs<'a>,
105        offset: u64,
106        metadata: Metadata,
107    ) -> Result<Self, FileReadFailed> {
108        let empty_or_no_content = inner
109            .data
110            .Data
111            .as_ref()
112            .map(|cow| cow.as_ref().is_empty())
113            .unwrap_or(true);
114        let is_zero_bytes = inner.data.filesize.unwrap_or(0) == 0;
115
116        if inner.data.Type != UnixFsType::File && inner.data.Type != UnixFsType::Raw {
117            Err(FileReadFailed::UnexpectedType(inner.data.Type.into()))
118        } else if inner.links.len() != inner.data.blocksizes.len() {
119            Err(FileError::LinksAndBlocksizesMismatch.into())
120        } else if empty_or_no_content && !is_zero_bytes && inner.links.is_empty() {
121            Err(FileError::NoLinksNoContent.into())
122        } else {
123            // raw and file seem to be same except the raw is preferred in trickle dag
124            let data = inner.data.Data.unwrap_borrowed_or_empty();
125
126            if inner.data.hashType.is_some() || inner.data.fanout.is_some() {
127                return Err(FileError::UnexpectedRawOrFileProperties {
128                    hash_type: inner.data.hashType,
129                    fanout: inner.data.fanout,
130                }
131                .into());
132            }
133
134            let end = if inner.links.is_empty() {
135                // can unwrap because `data` is all of the data
136                let filesize = inner.data.filesize.unwrap_or(data.len() as u64);
137                Ending::Chunk(offset + filesize)
138            } else {
139                match inner.data.filesize {
140                    Some(filesize) => Ending::TreeCoverage(offset + filesize),
141                    None => return Err(FileError::IntermediateNodeWithoutFileSize.into()),
142                }
143            };
144
145            Ok(Self {
146                offset,
147                end,
148                links: inner.links,
149                data,
150                blocksizes: inner.data.blocksizes,
151                metadata,
152                file_size: inner.data.filesize.unwrap(),
153            })
154        }
155    }
156
157    /// Returns a moved tuple of the content (bytes or links) and a traversal, which can be used to
158    /// continue the traversal from the next block.
159    pub fn content(
160        self,
161    ) -> (
162        FileContent<'a, impl Iterator<Item = (PBLink<'a>, Range<u64>)>>,
163        Traversal,
164    ) {
165        let traversal = Traversal {
166            last_ending: self.end,
167            last_offset: self.offset,
168
169            metadata: self.metadata,
170            file_size: self.file_size,
171        };
172
173        let fc = if self.links.is_empty() {
174            FileContent::Bytes(self.data)
175        } else {
176            let zipped = self.links.into_iter().zip(self.blocksizes.into_iter());
177            FileContent::Links(RangeLinks::from_links_and_blocksizes(
178                zipped,
179                Some(self.offset),
180            ))
181        };
182
183        (fc, traversal)
184    }
185}
186
187/// Carrier of validation data used between blocks during a walk on the merkle tree.
188#[derive(Debug)]
189pub struct Traversal {
190    last_ending: Ending,
191    last_offset: u64,
192    file_size: u64,
193
194    metadata: Metadata,
195}
196
197impl Traversal {
198    /// Continues the walk on the merkle tree with the given block contents. The block contents is
199    /// not validated and the range is expected to be the next from previous call to
200    /// FileContent::Links iterator.
201    ///
202    /// When calling this directly, it is good to note that repeatedly calling this with the same
203    /// block contents will not be detected, and will instead grow the internal Vec of links until
204    /// memory runs out.
205    pub fn continue_walk<'a>(
206        self,
207        next_block: &'a [u8],
208        tree_range: &Range<u64>,
209    ) -> Result<FileReader<'a>, FileReadFailed> {
210        self.last_ending
211            .check_is_suitable_next(self.last_offset, tree_range)?;
212        FileReader::from_continued(self, tree_range.start, next_block)
213    }
214
215    /// Returns the total size of the file.
216    pub fn file_size(&self) -> u64 {
217        self.file_size
218    }
219}
220
221impl AsRef<Metadata> for Traversal {
222    fn as_ref(&self) -> &Metadata {
223        &self.metadata
224    }
225}
226
227/// Files in unixfs merkle trees can either contain content of the file, or can contain links to
228/// other parts of the tree.
229pub enum FileContent<'a, I>
230where
231    I: Iterator<Item = (PBLink<'a>, Range<u64>)> + 'a,
232{
233    /// When reaching the leaf level of a DAG we finally find the actual content. For empty files
234    /// without content this will be an empty slice.
235    Bytes(&'a [u8]),
236    /// The content of the file is spread over a number of blocks; iteration must follow from index
237    /// depth-first from the first link to reach the given the bytes in the given byte offset
238    /// range.
239    Links(I),
240}
241
242#[cfg(test)]
243impl<'a, I> FileContent<'a, I>
244where
245    I: Iterator<Item = (PBLink<'a>, Range<u64>)>,
246{
247    /// Returns the content as bytes, or panics if there were links instead.
248    pub fn unwrap_content(self) -> &'a [u8] {
249        match self {
250            FileContent::Bytes(x) => x,
251            y => panic!("Expected FileContent::Bytes, found: {:?}", y),
252        }
253    }
254}
255
256impl<'a, I> fmt::Debug for FileContent<'a, I>
257where
258    I: Iterator<Item = (PBLink<'a>, Range<u64>)>,
259{
260    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
261        match self {
262            FileContent::Bytes(bytes) => write!(fmt, "Bytes({} bytes)", bytes.len()),
263            FileContent::Links(iter) => write!(fmt, "Links({:?})", iter.size_hint()),
264        }
265    }
266}
267
268#[cfg(test)]
269mod tests {
270    use super::Ending;
271    use crate::file::FileError;
272
273    #[test]
274    fn collapsing_tree() {
275        // this is pretty much how I planned the ending might be useful but it's perhaps a bit
276        // confusing as it's only the half of the range
277        Ending::TreeCoverage(100)
278            .check_is_suitable_next(0, &(0..100))
279            .unwrap();
280        Ending::TreeCoverage(100)
281            .check_is_suitable_next(0, &(0..10))
282            .unwrap();
283        Ending::TreeCoverage(100)
284            .check_is_suitable_next(0, &(0..2))
285            .unwrap();
286        Ending::Chunk(2)
287            .check_is_suitable_next(0, &(2..10))
288            .unwrap();
289        Ending::TreeCoverage(10)
290            .check_is_suitable_next(2, &(2..10))
291            .unwrap();
292        Ending::TreeCoverage(10)
293            .check_is_suitable_next(2, &(10..20))
294            .unwrap();
295        Ending::Chunk(10)
296            .check_is_suitable_next(2, &(10..100))
297            .unwrap();
298    }
299
300    #[test]
301    fn expanding_tree() {
302        let res = Ending::TreeCoverage(100).check_is_suitable_next(10, &(0..102));
303        assert!(
304            matches!(res, Err(FileError::TreeExpandsOnLinks)),
305            "{:?}",
306            res
307        );
308
309        let res = Ending::TreeCoverage(100).check_is_suitable_next(0, &(0..102));
310        assert!(
311            matches!(res, Err(FileError::TreeExpandsOnLinks)),
312            "{:?}",
313            res
314        );
315    }
316
317    #[test]
318    fn overlap() {
319        let res = Ending::TreeCoverage(100).check_is_suitable_next(10, &(88..102));
320        assert!(
321            matches!(res, Err(FileError::TreeOverlapsBetweenLinks)),
322            "{:?}",
323            res
324        );
325    }
326
327    #[test]
328    fn hole() {
329        let res = Ending::Chunk(100).check_is_suitable_next(0, &(101..105));
330        assert!(
331            matches!(res, Err(FileError::TreeJumpsBetweenLinks)),
332            "{:?}",
333            res
334        );
335    }
336
337    #[test]
338    fn wrong_next() {
339        let res = Ending::TreeCoverage(200).check_is_suitable_next(100, &(0..100));
340        assert!(matches!(res, Err(FileError::EarlierLink)), "{:?}", res);
341
342        let res = Ending::TreeCoverage(101).check_is_suitable_next(100, &(0..100));
343        assert!(matches!(res, Err(FileError::EarlierLink)), "{:?}", res);
344
345        let res = Ending::TreeCoverage(100).check_is_suitable_next(100, &(0..100));
346        assert!(matches!(res, Err(FileError::EarlierLink)), "{:?}", res);
347    }
348}