rust_unixfs/
pb.rs

1use alloc::borrow::Cow;
2use core::convert::TryFrom;
3use core::fmt;
4use core::ops::Range;
5use quick_protobuf::{errors::Result as ProtobufResult, Writer, WriterBackend};
6
7pub(crate) mod merkledag;
8pub(crate) use merkledag::PBLink;
9pub(crate) use merkledag::PBNode;
10
11pub(crate) mod unixfs;
12pub(crate) use unixfs::mod_Data::DataType as UnixFsType;
13pub(crate) use unixfs::Data as UnixFs;
14
15/// Failure cases for nested serialization, which allows recovery of the outer `PBNode` when desired.
16#[derive(Debug)]
17pub(crate) enum ParsingFailed<'a> {
18    InvalidDagPb(quick_protobuf::Error),
19    NoData(PBNode<'a>),
20    InvalidUnixFs(quick_protobuf::Error, PBNode<'a>),
21}
22
23impl fmt::Display for ParsingFailed<'_> {
24    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
25        use ParsingFailed::*;
26        match self {
27            InvalidDagPb(e) => write!(fmt, "failed to read the block as dag-pb: {e}"),
28            InvalidUnixFs(e, _) => write!(
29                fmt,
30                "failed to read the dag-pb PBNode::Data as UnixFS message: {e}"
31            ),
32            NoData(_) => write!(fmt, "dag-pb PBNode::Data was missing or empty"),
33        }
34    }
35}
36
37impl std::error::Error for ParsingFailed<'_> {
38    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
39        use ParsingFailed::*;
40
41        match self {
42            InvalidDagPb(e) => Some(e),
43            InvalidUnixFs(e, _) => Some(e),
44            NoData(_) => None,
45        }
46    }
47}
48
49// This has been aliased as UnixFs<'a>
50impl<'a> TryFrom<&'a merkledag::PBNode<'a>> for unixfs::Data<'a> {
51    type Error = quick_protobuf::Error;
52
53    fn try_from(node: &'a merkledag::PBNode<'a>) -> Result<Self, Self::Error> {
54        UnixFs::try_from(node.Data.as_deref())
55    }
56}
57
58// This has been aliased as UnixFs<'a>
59impl<'a> TryFrom<Option<&'a [u8]>> for unixfs::Data<'a> {
60    type Error = quick_protobuf::Error;
61
62    fn try_from(data: Option<&'a [u8]>) -> Result<Self, Self::Error> {
63        use quick_protobuf::{BytesReader, MessageRead};
64
65        let data = data.unwrap_or_default();
66        let mut reader = BytesReader::from_bytes(data);
67        UnixFs::from_reader(&mut reader, data)
68    }
69}
70
71// These should be derived by the pb-rs
72impl<'a> TryFrom<&'a [u8]> for merkledag::PBNode<'a> {
73    type Error = quick_protobuf::Error;
74
75    fn try_from(data: &'a [u8]) -> Result<Self, Self::Error> {
76        use quick_protobuf::{BytesReader, MessageRead};
77        merkledag::PBNode::from_reader(&mut BytesReader::from_bytes(data), data)
78    }
79}
80
81/// Combined dag-pb (or MerkleDAG) with UnixFs payload.
82#[derive(Debug)]
83pub(crate) struct FlatUnixFs<'a> {
84    pub(crate) links: Vec<PBLink<'a>>,
85    pub(crate) data: UnixFs<'a>,
86}
87
88impl<'a> quick_protobuf::message::MessageWrite for FlatUnixFs<'a> {
89    fn get_size(&self) -> usize {
90        use quick_protobuf::sizeofs::sizeof_len;
91        let links = self
92            .links
93            .iter()
94            .map(|s| 1 + sizeof_len(s.get_size()))
95            .sum::<usize>();
96
97        let body = 1 + sizeof_len(self.data.get_size());
98
99        links + body
100    }
101
102    fn write_message<W: WriterBackend>(&self, w: &mut Writer<W>) -> ProtobufResult<()> {
103        // this has been monkeyd after PBNode::write_message
104        //
105        // important to note that while protobuf isn't so picky when reading on field order, dag-pb
106        // is, at least to produce the same Cids.
107        for link in &self.links {
108            w.write_with_tag(18, |w| w.write_message(link))?;
109        }
110        // writing the self.data directly saves us the trouble of serializing it first to a vec,
111        // then using the vec to write this field.
112        w.write_with_tag(10, |w| w.write_message(&self.data))?;
113        Ok(())
114    }
115}
116
117impl<'a> FlatUnixFs<'a> {
118    pub(crate) fn try_parse(data: &'a [u8]) -> Result<Self, ParsingFailed<'a>> {
119        Self::try_from(data)
120    }
121}
122
123impl<'a> TryFrom<&'a [u8]> for FlatUnixFs<'a> {
124    type Error = ParsingFailed<'a>;
125
126    fn try_from(data: &'a [u8]) -> Result<Self, Self::Error> {
127        let node = merkledag::PBNode::try_from(data).map_err(ParsingFailed::InvalidDagPb)?;
128
129        let data = match node.Data {
130            Some(Cow::Borrowed(bytes)) if !bytes.is_empty() => Some(bytes),
131            Some(Cow::Owned(_)) => unreachable!(),
132            Some(Cow::Borrowed(_)) | None => return Err(ParsingFailed::NoData(node)),
133        };
134
135        match UnixFs::try_from(data) {
136            Ok(data) => Ok(FlatUnixFs {
137                links: node.Links,
138                data,
139            }),
140            Err(e) => Err(ParsingFailed::InvalidUnixFs(e, node)),
141        }
142    }
143}
144
145#[cfg(test)]
146impl<'a> FlatUnixFs<'a> {
147    pub fn range_links(&'a self) -> impl Iterator<Item = (PBLink<'a>, Range<u64>)> {
148        assert_eq!(self.links.len(), self.data.blocksizes.len());
149
150        let zipped = self
151            .links
152            .clone()
153            .into_iter()
154            .zip(self.data.blocksizes.iter().copied());
155
156        // important: we have validated links.len() == blocksizes.len()
157        RangeLinks::from_links_and_blocksizes(zipped, Some(0))
158    }
159}
160
161pub(crate) struct RangeLinks<I> {
162    inner: I,
163    base: u64,
164}
165
166impl<'a, I> RangeLinks<I>
167where
168    I: Iterator<Item = (PBLink<'a>, u64)>,
169{
170    /// `start_offset` is the offset of the current tree when walking the graph.
171    pub fn from_links_and_blocksizes(zipped: I, start_offset: Option<u64>) -> RangeLinks<I> {
172        RangeLinks {
173            inner: zipped,
174            base: start_offset.unwrap_or(0),
175        }
176    }
177}
178
179impl<'a, I> Iterator for RangeLinks<I>
180where
181    I: Iterator<Item = (PBLink<'a>, u64)>,
182{
183    type Item = (PBLink<'a>, Range<u64>);
184
185    fn next(&mut self) -> Option<Self::Item> {
186        self.inner.next().map(|(link, blocksize)| {
187            let returned_base = self.base;
188            self.base += blocksize;
189            (link, returned_base..(returned_base + blocksize))
190        })
191    }
192
193    fn size_hint(&self) -> (usize, Option<usize>) {
194        self.inner.size_hint()
195    }
196}
197
198#[cfg(test)]
199mod test {
200    use super::{FlatUnixFs, PBNode, UnixFs, UnixFsType};
201    use alloc::borrow::Cow;
202    use core::convert::TryFrom;
203    use hex_literal::hex;
204
205    #[test]
206    fn parse_content() {
207        use quick_protobuf::{BytesReader, MessageRead};
208        let input = hex!("0a0d08021207636f6e74656e741807");
209
210        let mut reader = BytesReader::from_bytes(&input);
211        let dagnode =
212            PBNode::from_reader(&mut reader, &input).expect("parse outer merkledag::PBNode");
213        assert!(dagnode.Links.is_empty());
214
215        let unixfs_data = UnixFs::try_from(&dagnode).expect("parse inner unixfs::Data");
216        assert_eq!(unixfs_data.Type, UnixFsType::File);
217        assert_eq!(unixfs_data.Data, Some(Cow::Borrowed(&b"content"[..])));
218        assert_eq!(unixfs_data.filesize, Some(7));
219        println!("{unixfs_data:?}");
220    }
221
222    #[test]
223    fn linux_tarxz_range_links() {
224        let input = hex!("122b0a2212203822560f945fd3c74522de3448512a7e45cb53f0a9a1e12161da4667531ec12e120018aed4e015122b0a2212208594eb4dd5d67e573d506cd950ac59863b9afb024a590d7fe49b42fbcb44af43120018aed4e015122b0a221220745a70b6cd7ec3e46d16fb15b5e1e5db256f6a7a52d0b359f8f49b242665e17b120018b4e7e8090a1608021888c1a835208080e015208080e0152088c1e809");
225
226        let flat = FlatUnixFs::try_from(&input[..]).unwrap();
227
228        let mut expected_ranges = vec![
229            0..45_613_056,
230            45_613_056..91_226_112,
231            91_226_112..111_812_744,
232        ];
233
234        expected_ranges.reverse();
235
236        for (link, range) in flat.range_links() {
237            assert_eq!(link.Name, Some(Cow::Borrowed("")));
238            // Tsize is the subtree size, which must always be larger than the file segments
239            // because of encoding
240            assert!(link.Tsize >= Some(range.end - range.start));
241            assert_eq!(Some(range), expected_ranges.pop());
242        }
243    }
244}