unixfs_v1/
pb.rs

1use alloc::borrow::Cow;
2use core::convert::TryFrom;
3use core::fmt;
4use core::ops::Range;
5use quick_protobuf::{errors::Result as ProtobufResult, Writer, WriterBackend};
6
7pub(crate) mod merkledag;
8pub use merkledag::PBLink;
9pub use merkledag::PBNode;
10
11pub(crate) mod unixfs;
12pub use unixfs::mod_Data::DataType as UnixFsType;
13pub use unixfs::Data as UnixFs;
14
15/// DAG-PB multicodec code
16pub(crate) const DAG_PB: u64 = 0x70;
17
18/// Failure cases for nested serialization, which allows recovery of the outer `PBNode` when desired.
19#[derive(Debug)]
20pub enum ParsingFailed<'a> {
21    InvalidDagPb(quick_protobuf::Error),
22    NoData(PBNode<'a>),
23    InvalidUnixFs(quick_protobuf::Error, PBNode<'a>),
24}
25
26impl fmt::Display for ParsingFailed<'_> {
27    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
28        use ParsingFailed::*;
29        match self {
30            InvalidDagPb(e) => write!(fmt, "failed to read the block as dag-pb: {}", e),
31            InvalidUnixFs(e, _) => write!(
32                fmt,
33                "failed to read the dag-pb PBNode::Data as UnixFS message: {}",
34                e
35            ),
36            NoData(_) => write!(fmt, "dag-pb PBNode::Data was missing or empty"),
37        }
38    }
39}
40
41impl std::error::Error for ParsingFailed<'_> {
42    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
43        use ParsingFailed::*;
44
45        match self {
46            InvalidDagPb(e) => Some(e),
47            InvalidUnixFs(e, _) => Some(e),
48            NoData(_) => None,
49        }
50    }
51}
52
53// This has been aliased as UnixFs<'a>
54impl<'a> TryFrom<&'a merkledag::PBNode<'a>> for unixfs::Data<'a> {
55    type Error = quick_protobuf::Error;
56
57    fn try_from(node: &'a merkledag::PBNode<'a>) -> Result<Self, Self::Error> {
58        UnixFs::try_from(node.Data.as_deref())
59    }
60}
61
62// This has been aliased as UnixFs<'a>
63impl<'a> TryFrom<Option<&'a [u8]>> for unixfs::Data<'a> {
64    type Error = quick_protobuf::Error;
65
66    fn try_from(data: Option<&'a [u8]>) -> Result<Self, Self::Error> {
67        use quick_protobuf::{BytesReader, MessageRead};
68
69        let data = data.unwrap_or_default();
70        let mut reader = BytesReader::from_bytes(data);
71        UnixFs::from_reader(&mut reader, data)
72    }
73}
74
75// These should be derived by the pb-rs
76impl<'a> TryFrom<&'a [u8]> for merkledag::PBNode<'a> {
77    type Error = quick_protobuf::Error;
78
79    fn try_from(data: &'a [u8]) -> Result<Self, Self::Error> {
80        use quick_protobuf::{BytesReader, MessageRead};
81        merkledag::PBNode::from_reader(&mut BytesReader::from_bytes(data), data)
82    }
83}
84
85/// Combined dag-pb (or MerkleDAG) with UnixFs payload.
86#[derive(Debug)]
87pub struct FlatUnixFs<'a> {
88    pub links: Vec<PBLink<'a>>,
89    pub data: UnixFs<'a>,
90}
91
92impl<'a> quick_protobuf::message::MessageWrite for FlatUnixFs<'a> {
93    fn get_size(&self) -> usize {
94        use quick_protobuf::sizeofs::sizeof_len;
95        let links = self
96            .links
97            .iter()
98            .map(|s| 1 + sizeof_len(s.get_size()))
99            .sum::<usize>();
100
101        let body = 1 + sizeof_len(self.data.get_size());
102
103        links + body
104    }
105
106    fn write_message<W: WriterBackend>(&self, w: &mut Writer<W>) -> ProtobufResult<()> {
107        // this has been monkeyd after PBNode::write_message
108        //
109        // important to note that while protobuf isn't so picky when reading on field order, dag-pb
110        // is, at least to produce the same Cids.
111        for link in &self.links {
112            w.write_with_tag(18, |w| w.write_message(link))?;
113        }
114        // writing the self.data directly saves us the trouble of serializing it first to a vec,
115        // then using the vec to write this field.
116        w.write_with_tag(10, |w| w.write_message(&self.data))?;
117        Ok(())
118    }
119}
120
121impl<'a> FlatUnixFs<'a> {
122    pub fn try_parse(data: &'a [u8]) -> Result<Self, ParsingFailed<'a>> {
123        Self::try_from(data)
124    }
125}
126
127impl<'a> TryFrom<&'a [u8]> for FlatUnixFs<'a> {
128    type Error = ParsingFailed<'a>;
129
130    fn try_from(data: &'a [u8]) -> Result<Self, Self::Error> {
131        let node = merkledag::PBNode::try_from(data).map_err(ParsingFailed::InvalidDagPb)?;
132
133        let data = match node.Data {
134            Some(Cow::Borrowed(bytes)) if !bytes.is_empty() => Some(bytes),
135            Some(Cow::Owned(_)) => unreachable!(),
136            Some(Cow::Borrowed(_)) | None => return Err(ParsingFailed::NoData(node)),
137        };
138
139        match UnixFs::try_from(data) {
140            Ok(data) => Ok(FlatUnixFs {
141                links: node.Links,
142                data,
143            }),
144            Err(e) => Err(ParsingFailed::InvalidUnixFs(e, node)),
145        }
146    }
147}
148
149#[cfg(test)]
150impl<'a> FlatUnixFs<'a> {
151    pub fn range_links(&'a self) -> impl Iterator<Item = (PBLink<'a>, Range<u64>)> {
152        assert_eq!(self.links.len(), self.data.blocksizes.len());
153
154        let zipped = self
155            .links
156            .clone()
157            .into_iter()
158            .zip(self.data.blocksizes.iter().copied());
159
160        // important: we have validated links.len() == blocksizes.len()
161        RangeLinks::from_links_and_blocksizes(zipped, Some(0))
162    }
163}
164
165pub(crate) struct RangeLinks<I> {
166    inner: I,
167    base: u64,
168}
169
170impl<'a, I> RangeLinks<I>
171where
172    I: Iterator<Item = (PBLink<'a>, u64)>,
173{
174    /// `start_offset` is the offset of the current tree when walking the graph.
175    pub fn from_links_and_blocksizes(zipped: I, start_offset: Option<u64>) -> RangeLinks<I> {
176        RangeLinks {
177            inner: zipped,
178            base: start_offset.unwrap_or(0),
179        }
180    }
181}
182
183impl<'a, I> Iterator for RangeLinks<I>
184where
185    I: Iterator<Item = (PBLink<'a>, u64)>,
186{
187    type Item = (PBLink<'a>, Range<u64>);
188
189    fn next(&mut self) -> Option<Self::Item> {
190        self.inner.next().map(|(link, blocksize)| {
191            let returned_base = self.base;
192            self.base += blocksize;
193            (link, returned_base..(returned_base + blocksize))
194        })
195    }
196
197    fn size_hint(&self) -> (usize, Option<usize>) {
198        self.inner.size_hint()
199    }
200}
201
202#[cfg(test)]
203mod test {
204    use super::{FlatUnixFs, PBNode, UnixFs, UnixFsType};
205    use alloc::borrow::Cow;
206    use core::convert::TryFrom;
207    use hex_literal::hex;
208
209    #[test]
210    fn parse_content() {
211        use quick_protobuf::{BytesReader, MessageRead};
212        let input = hex!("0a0d08021207636f6e74656e741807");
213
214        let mut reader = BytesReader::from_bytes(&input);
215        let dagnode =
216            PBNode::from_reader(&mut reader, &input).expect("parse outer merkledag::PBNode");
217        assert!(dagnode.Links.is_empty());
218
219        let unixfs_data = UnixFs::try_from(&dagnode).expect("parse inner unixfs::Data");
220        assert_eq!(unixfs_data.Type, UnixFsType::File);
221        assert_eq!(unixfs_data.Data, Some(Cow::Borrowed(&b"content"[..])));
222        assert_eq!(unixfs_data.filesize, Some(7));
223        println!("{:?}", unixfs_data);
224    }
225
226    #[test]
227    fn linux_tarxz_range_links() {
228        let input = hex!("122b0a2212203822560f945fd3c74522de3448512a7e45cb53f0a9a1e12161da4667531ec12e120018aed4e015122b0a2212208594eb4dd5d67e573d506cd950ac59863b9afb024a590d7fe49b42fbcb44af43120018aed4e015122b0a221220745a70b6cd7ec3e46d16fb15b5e1e5db256f6a7a52d0b359f8f49b242665e17b120018b4e7e8090a1608021888c1a835208080e015208080e0152088c1e809");
229
230        let flat = FlatUnixFs::try_from(&input[..]).unwrap();
231
232        let mut expected_ranges = vec![
233            0..45_613_056,
234            45_613_056..91_226_112,
235            91_226_112..111_812_744,
236        ];
237
238        expected_ranges.reverse();
239
240        for (link, range) in flat.range_links() {
241            assert_eq!(link.Name, Some(Cow::Borrowed("")));
242            // Tsize is the subtree size, which must always be larger than the file segments
243            // because of encoding
244            assert!(link.Tsize >= Some(range.end - range.start));
245            assert_eq!(Some(range), expected_ranges.pop());
246        }
247    }
248}