Skip to main content

gix_pack/multi_index/
init.rs

1use std::path::{Path, PathBuf};
2
3use crate::multi_index::{File, Version, chunk};
4
5mod error {
6    use crate::multi_index::chunk;
7
8    /// The error returned by [File::at()][super::File::at()].
9    #[derive(Debug, thiserror::Error)]
10    #[allow(missing_docs)]
11    pub enum Error {
12        #[error("Could not open multi-index file at '{path}'")]
13        Io {
14            source: std::io::Error,
15            path: std::path::PathBuf,
16        },
17        #[error("{message}")]
18        Corrupt { message: &'static str },
19        #[error("Unsupported multi-index version: {version})")]
20        UnsupportedVersion { version: u8 },
21        #[error("Unsupported hash kind: {kind})")]
22        UnsupportedObjectHash { kind: u8 },
23        #[error(transparent)]
24        ChunkFileQuery(#[from] gix_error::Message),
25        #[error(transparent)]
26        ChunkFileDecode(#[from] gix_error::ValidationError),
27        #[error("The multi-pack fan doesn't have the correct size of 256 * 4 bytes")]
28        MultiPackFanSize,
29        #[error(transparent)]
30        PackNames(#[from] chunk::index_names::decode::Error),
31        #[error("multi-index chunk {:?} has invalid size: {message}", String::from_utf8_lossy(.id))]
32        InvalidChunkSize { id: gix_chunk::Id, message: &'static str },
33    }
34}
35
36pub use error::Error;
37
38/// Initialization
39impl File<crate::MMap> {
40    /// Open the multi-index file at the given `path`.
41    ///
42    /// `alloc_limit_bytes` bounds each allocation caused by user-controlled on-disk data, useful for untrusted input.
43    /// Use `None` to disable the limit.
44    pub fn at(path: impl AsRef<Path>, alloc_limit_bytes: Option<usize>) -> Result<Self, Error> {
45        Self::at_inner(path.as_ref(), alloc_limit_bytes)
46    }
47
48    fn at_inner(path: &Path, alloc_limit_bytes: Option<usize>) -> Result<Self, Error> {
49        let data = crate::mmap::read_only(path).map_err(|source| Error::Io {
50            source,
51            path: path.to_owned(),
52        })?;
53        Self::from_data(data, path.to_owned(), alloc_limit_bytes)
54    }
55}
56
57impl<T> File<T>
58where
59    T: crate::FileData,
60{
61    /// Instantiate a multi-index file from `data` as assumed to be read or memory-mapped from `path`.
62    ///
63    /// `alloc_limit_bytes` bounds each allocation caused by untrusted on-disk multi-index data.
64    /// Use `None` to disable the limit.
65    ///
66    ///  It is used to reject reserving the output `Vec<PathBuf>` if its capacity estimate exceeds the limit,
67    ///  and to reject any single path entry whose byte length exceeds the limit before turning it into a `PathBuf`.
68    pub fn from_data(data: T, path: PathBuf, alloc_limit_bytes: Option<usize>) -> Result<Self, Error> {
69        const TRAILER_LEN: usize = gix_hash::Kind::shortest().len_in_bytes(); /* trailing hash */
70        if data.len()
71            < Self::HEADER_LEN
72                + gix_chunk::file::Index::size_for_entries(4 /*index names, fan, offsets, oids*/)
73                + chunk::fanout::SIZE
74                + TRAILER_LEN
75        {
76            return Err(Error::Corrupt {
77                message: "multi-index file is truncated and too short",
78            });
79        }
80
81        let (version, object_hash, num_chunks, num_indices) = {
82            let (signature, data) = data.split_at(4);
83            if signature != Self::SIGNATURE {
84                return Err(Error::Corrupt {
85                    message: "Invalid signature",
86                });
87            }
88            let (version, data) = data.split_at(1);
89            let version = match version[0] {
90                1 => Version::V1,
91                version => return Err(Error::UnsupportedVersion { version }),
92            };
93
94            let (object_hash, data) = data.split_at(1);
95            let object_hash = gix_hash::Kind::try_from(object_hash[0])
96                .map_err(|unknown| Error::UnsupportedObjectHash { kind: unknown })?;
97            let (num_chunks, data) = data.split_at(1);
98            let num_chunks = num_chunks[0];
99
100            let (_num_base_files, data) = data.split_at(1); // TODO: handle base files once it's clear what this does
101
102            let (num_indices, _) = data.split_at(4);
103            let num_indices = crate::read_u32(num_indices);
104
105            (version, object_hash, num_chunks, num_indices)
106        };
107
108        let chunks = gix_chunk::file::Index::from_bytes(&data, Self::HEADER_LEN, u32::from(num_chunks))?;
109
110        let index_names = chunks.data_by_id(&data, chunk::index_names::ID)?;
111        let index_names = chunk::index_names::from_bytes(index_names, num_indices, alloc_limit_bytes)?;
112
113        let fan = chunks.data_by_id(&data, chunk::fanout::ID)?;
114        let fan = chunk::fanout::from_bytes(fan).ok_or(Error::MultiPackFanSize)?;
115        let num_objects = fan[255];
116        validate_fan(&fan)?;
117
118        let lookup = chunks.validated_usize_offset_by_id(chunk::lookup::ID, |offset| {
119            chunk::lookup::is_valid(&offset, object_hash, num_objects)
120                .then_some(offset)
121                .ok_or(Error::InvalidChunkSize {
122                    id: chunk::lookup::ID,
123                    message: "The chunk with alphabetically ordered object ids doesn't have the correct size",
124                })
125        })??;
126        let offsets = chunks.validated_usize_offset_by_id(chunk::offsets::ID, |offset| {
127            chunk::offsets::is_valid(&offset, num_objects)
128                .then_some(offset)
129                .ok_or(Error::InvalidChunkSize {
130                    id: chunk::offsets::ID,
131                    message: "The chunk with offsets into the pack doesn't have the correct size",
132                })
133        })??;
134        let large_offsets = chunks
135            .validated_usize_offset_by_id(chunk::large_offsets::ID, |offset| {
136                chunk::large_offsets::is_valid(&offset)
137                    .then_some(offset)
138                    .ok_or(Error::InvalidChunkSize {
139                        id: chunk::large_offsets::ID,
140                        message: "The chunk with large offsets into the pack doesn't have the correct size",
141                    })
142            })
143            .ok()
144            .transpose()?;
145
146        let checksum_offset = chunks.highest_offset() as usize;
147        let trailer = &data[checksum_offset..];
148        if trailer.len() != object_hash.len_in_bytes() {
149            return Err(Error::Corrupt {
150                message: "Trailing checksum didn't have the expected size or there were unknown bytes after the checksum.",
151            });
152        }
153
154        Ok(File {
155            data,
156            path,
157            version,
158            hash_len: object_hash.len_in_bytes(),
159            object_hash,
160            fan,
161            index_names,
162            lookup_ofs: lookup.start,
163            offsets_ofs: offsets.start,
164            large_offsets_ofs: large_offsets.map(|r| r.start),
165            num_objects,
166            num_indices,
167        })
168    }
169}
170
171fn validate_fan(fan: &[u32; 256]) -> Result<(), Error> {
172    if !crate::fan_is_monotonically_increasing(fan) {
173        return Err(Error::Corrupt {
174            message: "multi-index fan-out table must be monotonically increasing",
175        });
176    }
177    Ok(())
178}