ipfs_unixfs/
pb.rs

1use alloc::borrow::Cow;
2use core::convert::TryFrom;
3use core::fmt;
4use core::ops::Range;
5use quick_protobuf::{errors::Result as ProtobufResult, Writer, WriterBackend};
6
7pub(crate) mod merkledag;
8pub(crate) use merkledag::PBLink;
9pub(crate) use merkledag::PBNode;
10
11pub(crate) mod unixfs;
12pub(crate) use unixfs::mod_Data::DataType as UnixFsType;
13pub(crate) use unixfs::Data as UnixFs;
14
15/// Failure cases for nested serialization, which allows recovery of the outer `PBNode` when desired.
16#[derive(Debug)]
17pub(crate) enum ParsingFailed<'a> {
18    InvalidDagPb(quick_protobuf::Error),
19    NoData(PBNode<'a>),
20    InvalidUnixFs(quick_protobuf::Error, PBNode<'a>),
21}
22
23impl fmt::Display for ParsingFailed<'_> {
24    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
25        use ParsingFailed::*;
26        match self {
27            InvalidDagPb(e) => write!(fmt, "failed to read the block as dag-pb: {}", e),
28            InvalidUnixFs(e, _) => write!(
29                fmt,
30                "failed to read the dag-pb PBNode::Data as UnixFS message: {}",
31                e
32            ),
33            NoData(_) => write!(fmt, "dag-pb PBNode::Data was missing or empty"),
34        }
35    }
36}
37
38impl std::error::Error for ParsingFailed<'_> {
39    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
40        use ParsingFailed::*;
41
42        match self {
43            InvalidDagPb(e) => Some(e),
44            InvalidUnixFs(e, _) => Some(e),
45            NoData(_) => None,
46        }
47    }
48}
49
50// This has been aliased as UnixFs<'a>
51impl<'a> TryFrom<&'a merkledag::PBNode<'a>> for unixfs::Data<'a> {
52    type Error = quick_protobuf::Error;
53
54    fn try_from(node: &'a merkledag::PBNode<'a>) -> Result<Self, Self::Error> {
55        UnixFs::try_from(node.Data.as_deref())
56    }
57}
58
59// This has been aliased as UnixFs<'a>
60impl<'a> TryFrom<Option<&'a [u8]>> for unixfs::Data<'a> {
61    type Error = quick_protobuf::Error;
62
63    fn try_from(data: Option<&'a [u8]>) -> Result<Self, Self::Error> {
64        use quick_protobuf::{BytesReader, MessageRead};
65
66        let data = data.unwrap_or_default();
67        let mut reader = BytesReader::from_bytes(data);
68        UnixFs::from_reader(&mut reader, data)
69    }
70}
71
72// These should be derived by the pb-rs
73impl<'a> TryFrom<&'a [u8]> for merkledag::PBNode<'a> {
74    type Error = quick_protobuf::Error;
75
76    fn try_from(data: &'a [u8]) -> Result<Self, Self::Error> {
77        use quick_protobuf::{BytesReader, MessageRead};
78        merkledag::PBNode::from_reader(&mut BytesReader::from_bytes(data), data)
79    }
80}
81
82/// Combined dag-pb (or MerkleDAG) with UnixFs payload.
83#[derive(Debug)]
84pub(crate) struct FlatUnixFs<'a> {
85    pub(crate) links: Vec<PBLink<'a>>,
86    pub(crate) data: UnixFs<'a>,
87}
88
89impl<'a> quick_protobuf::message::MessageWrite for FlatUnixFs<'a> {
90    fn get_size(&self) -> usize {
91        use quick_protobuf::sizeofs::sizeof_len;
92        let links = self
93            .links
94            .iter()
95            .map(|s| 1 + sizeof_len(s.get_size()))
96            .sum::<usize>();
97
98        let body = 1 + sizeof_len(self.data.get_size());
99
100        links + body
101    }
102
103    fn write_message<W: WriterBackend>(&self, w: &mut Writer<W>) -> ProtobufResult<()> {
104        // this has been monkeyd after PBNode::write_message
105        //
106        // important to note that while protobuf isn't so picky when reading on field order, dag-pb
107        // is, at least to produce the same Cids.
108        for link in &self.links {
109            w.write_with_tag(18, |w| w.write_message(link))?;
110        }
111        // writing the self.data directly saves us the trouble of serializing it first to a vec,
112        // then using the vec to write this field.
113        w.write_with_tag(10, |w| w.write_message(&self.data))?;
114        Ok(())
115    }
116}
117
118impl<'a> FlatUnixFs<'a> {
119    pub(crate) fn try_parse(data: &'a [u8]) -> Result<Self, ParsingFailed<'a>> {
120        Self::try_from(data)
121    }
122}
123
124impl<'a> TryFrom<&'a [u8]> for FlatUnixFs<'a> {
125    type Error = ParsingFailed<'a>;
126
127    fn try_from(data: &'a [u8]) -> Result<Self, Self::Error> {
128        let node = merkledag::PBNode::try_from(data).map_err(ParsingFailed::InvalidDagPb)?;
129
130        let data = match node.Data {
131            Some(Cow::Borrowed(bytes)) if !bytes.is_empty() => Some(bytes),
132            Some(Cow::Owned(_)) => unreachable!(),
133            Some(Cow::Borrowed(_)) | None => return Err(ParsingFailed::NoData(node)),
134        };
135
136        match UnixFs::try_from(data) {
137            Ok(data) => Ok(FlatUnixFs {
138                links: node.Links,
139                data,
140            }),
141            Err(e) => Err(ParsingFailed::InvalidUnixFs(e, node)),
142        }
143    }
144}
145
146#[cfg(test)]
147impl<'a> FlatUnixFs<'a> {
148    pub fn range_links(&'a self) -> impl Iterator<Item = (PBLink<'a>, Range<u64>)> {
149        assert_eq!(self.links.len(), self.data.blocksizes.len());
150
151        let zipped = self
152            .links
153            .clone()
154            .into_iter()
155            .zip(self.data.blocksizes.iter().copied());
156
157        // important: we have validated links.len() == blocksizes.len()
158        RangeLinks::from_links_and_blocksizes(zipped, Some(0))
159    }
160}
161
162pub(crate) struct RangeLinks<I> {
163    inner: I,
164    base: u64,
165}
166
167impl<'a, I> RangeLinks<I>
168where
169    I: Iterator<Item = (PBLink<'a>, u64)>,
170{
171    /// `start_offset` is the offset of the current tree when walking the graph.
172    pub fn from_links_and_blocksizes(zipped: I, start_offset: Option<u64>) -> RangeLinks<I> {
173        RangeLinks {
174            inner: zipped,
175            base: start_offset.unwrap_or(0),
176        }
177    }
178}
179
180impl<'a, I> Iterator for RangeLinks<I>
181where
182    I: Iterator<Item = (PBLink<'a>, u64)>,
183{
184    type Item = (PBLink<'a>, Range<u64>);
185
186    fn next(&mut self) -> Option<Self::Item> {
187        self.inner.next().map(|(link, blocksize)| {
188            let returned_base = self.base;
189            self.base += blocksize;
190            (link, returned_base..(returned_base + blocksize))
191        })
192    }
193
194    fn size_hint(&self) -> (usize, Option<usize>) {
195        self.inner.size_hint()
196    }
197}
198
199#[cfg(test)]
200mod test {
201    use super::{FlatUnixFs, PBNode, UnixFs, UnixFsType};
202    use alloc::borrow::Cow;
203    use core::convert::TryFrom;
204    use hex_literal::hex;
205
206    #[test]
207    fn parse_content() {
208        use quick_protobuf::{BytesReader, MessageRead};
209        let input = hex!("0a0d08021207636f6e74656e741807");
210
211        let mut reader = BytesReader::from_bytes(&input);
212        let dagnode =
213            PBNode::from_reader(&mut reader, &input).expect("parse outer merkledag::PBNode");
214        assert!(dagnode.Links.is_empty());
215
216        let unixfs_data = UnixFs::try_from(&dagnode).expect("parse inner unixfs::Data");
217        assert_eq!(unixfs_data.Type, UnixFsType::File);
218        assert_eq!(unixfs_data.Data, Some(Cow::Borrowed(&b"content"[..])));
219        assert_eq!(unixfs_data.filesize, Some(7));
220        println!("{:?}", unixfs_data);
221    }
222
223    #[test]
224    fn linux_tarxz_range_links() {
225        let input = hex!("122b0a2212203822560f945fd3c74522de3448512a7e45cb53f0a9a1e12161da4667531ec12e120018aed4e015122b0a2212208594eb4dd5d67e573d506cd950ac59863b9afb024a590d7fe49b42fbcb44af43120018aed4e015122b0a221220745a70b6cd7ec3e46d16fb15b5e1e5db256f6a7a52d0b359f8f49b242665e17b120018b4e7e8090a1608021888c1a835208080e015208080e0152088c1e809");
226
227        let flat = FlatUnixFs::try_from(&input[..]).unwrap();
228
229        let mut expected_ranges = vec![
230            0..45_613_056,
231            45_613_056..91_226_112,
232            91_226_112..111_812_744,
233        ];
234
235        expected_ranges.reverse();
236
237        for (link, range) in flat.range_links() {
238            assert_eq!(link.Name, Some(Cow::Borrowed("")));
239            // Tsize is the subtree size, which must always be larger than the file segments
240            // because of encoding
241            assert!(link.Tsize >= Some(range.end - range.start));
242            assert_eq!(Some(range), expected_ranges.pop());
243        }
244    }
245}