git_chunk/file/
decode.rs

1use std::{convert::TryInto, ops::Range};
2
3mod error {
4    /// The value returned by [crate::FileRef::from_bytes()
5    #[derive(Debug, thiserror::Error)]
6    #[allow(missing_docs)]
7    pub enum Error {
8        #[error("Sentinel value encountered while still processing chunks.")]
9        EarlySentinelValue,
10        #[error("Sentinel value wasn't found, saw {:?}", std::str::from_utf8(actual.as_ref()).unwrap_or("<non-ascii>"))]
11        MissingSentinelValue { actual: crate::Id },
12        #[error("The chunk offset {offset} went past the file of length {file_length} - was it truncated?")]
13        ChunkSizeOutOfBounds {
14            offset: crate::file::Offset,
15            file_length: u64,
16        },
17        #[error("All chunk offsets must be incrementing.")]
18        NonIncrementalChunkOffsets,
19        #[error("The chunk of kind {:?} was encountered more than once", std::str::from_utf8(kind.as_ref()).unwrap_or("<non-ascii>"))]
20        DuplicateChunk { kind: crate::Id },
21        #[error("The table of contents would be {expected} bytes, but got only {actual}")]
22        TocTooSmall { actual: usize, expected: usize },
23        #[error("Empty chunk indices are not allowed as the point of chunked files is to have chunks.")]
24        Empty,
25    }
26}
27pub use error::Error;
28
29use crate::{file, file::index};
30
31impl file::Index {
32    /// Provided a mapped file at the beginning via `data`, starting at `toc_offset` decode all chunk information to return
33    /// an index with `num_chunks` chunks.
34    pub fn from_bytes(data: &[u8], toc_offset: usize, num_chunks: u32) -> Result<Self, Error> {
35        if num_chunks == 0 {
36            return Err(Error::Empty);
37        }
38
39        let data_len: u64 = data.len() as u64;
40        let mut chunks = Vec::with_capacity(num_chunks as usize);
41        let mut toc_entry = &data[toc_offset..];
42        let expected_min_size = (num_chunks as usize + 1) * file::Index::ENTRY_SIZE;
43        if toc_entry.len() < expected_min_size {
44            return Err(Error::TocTooSmall {
45                expected: expected_min_size,
46                actual: toc_entry.len(),
47            });
48        }
49
50        for _ in 0..num_chunks {
51            let (kind, offset) = toc_entry.split_at(4);
52            let kind = to_kind(kind);
53            if kind == crate::SENTINEL {
54                return Err(Error::EarlySentinelValue);
55            }
56            if chunks.iter().any(|c: &index::Entry| c.kind == kind) {
57                return Err(Error::DuplicateChunk { kind });
58            }
59
60            let offset = be_u64(offset);
61            if offset > data_len {
62                return Err(Error::ChunkSizeOutOfBounds {
63                    offset,
64                    file_length: data_len,
65                });
66            }
67            toc_entry = &toc_entry[file::Index::ENTRY_SIZE..];
68            let next_offset = be_u64(&toc_entry[4..]);
69            if next_offset > data_len {
70                return Err(Error::ChunkSizeOutOfBounds {
71                    offset: next_offset,
72                    file_length: data_len,
73                });
74            }
75            if next_offset <= offset {
76                return Err(Error::NonIncrementalChunkOffsets);
77            }
78            chunks.push(index::Entry {
79                kind,
80                offset: Range {
81                    start: offset,
82                    end: next_offset,
83                },
84            })
85        }
86
87        let sentinel = to_kind(&toc_entry[..4]);
88        if sentinel != crate::SENTINEL {
89            return Err(Error::MissingSentinelValue { actual: sentinel });
90        }
91
92        Ok(file::Index {
93            chunks,
94            will_write: false,
95        })
96    }
97}
98
99fn to_kind(data: &[u8]) -> crate::Id {
100    data[..4].try_into().unwrap()
101}
102
103fn be_u64(data: &[u8]) -> u64 {
104    u64::from_be_bytes(data[..8].try_into().unwrap())
105}