gix_commitgraph/file/
init.rs

1use std::path::{Path, PathBuf};
2
3use bstr::ByteSlice;
4
5use crate::{
6    file::{
7        ChunkId, BASE_GRAPHS_LIST_CHUNK_ID, COMMIT_DATA_CHUNK_ID, COMMIT_DATA_ENTRY_SIZE_SANS_HASH,
8        EXTENDED_EDGES_LIST_CHUNK_ID, FAN_LEN, HEADER_LEN, OID_FAN_CHUNK_ID, OID_LOOKUP_CHUNK_ID, SIGNATURE,
9    },
10    File,
11};
12
13/// The error used in [`File::at()`].
14#[derive(thiserror::Error, Debug)]
15#[allow(missing_docs)]
16pub enum Error {
17    #[error("Commit-graph {:?} chunk contains {from_chunk} base graphs, but commit-graph file header claims {from_header} base graphs", BASE_GRAPHS_LIST_CHUNK_ID.as_bstr())]
18    BaseGraphMismatch { from_header: u8, from_chunk: u32 },
19    #[error("Commit-graph {:?} chunk contains {chunk1_commits} commits, but {:?} chunk contains {chunk2_commits} commits", .chunk1_id.as_bstr(), .chunk2_id.as_bstr())]
20    CommitCountMismatch {
21        chunk1_id: ChunkId,
22        chunk1_commits: u32,
23        chunk2_id: ChunkId,
24        chunk2_commits: u32,
25    },
26    #[error("{0}")]
27    Corrupt(String),
28    // This error case is disabled, as git allows extra garbage in the extra edges list?
29    // #[error("The last entry in commit-graph's extended edges list does is not marked as being terminal")]
30    // ExtraEdgesOverflow,
31    #[error("Could not open commit-graph file at '{}'", .path.display())]
32    Io {
33        #[source]
34        err: std::io::Error,
35        path: std::path::PathBuf,
36    },
37    #[error("{0}")]
38    Trailer(String),
39    #[error("Commit-graph file uses unsupported hash version: {0}")]
40    UnsupportedHashVersion(u8),
41    #[error("Unsupported commit-graph file version: {0}")]
42    UnsupportedVersion(u8),
43    #[error(transparent)]
44    ChunkFileDecode(#[from] gix_chunk::file::decode::Error),
45    #[error(transparent)]
46    MissingChunk(#[from] gix_chunk::file::index::offset_by_kind::Error),
47    #[error("Commit-graph chunk {:?} has invalid size: {msg}", .id.as_bstr())]
48    InvalidChunkSize { id: ChunkId, msg: String },
49}
50
51const MIN_FILE_SIZE: usize = HEADER_LEN
52    + gix_chunk::file::Index::size_for_entries(3 /*OIDF, OIDL, CDAT*/)
53    + FAN_LEN * 4 /* FANOUT TABLE CHUNK OIDF */
54    + gix_hash::Kind::shortest().len_in_bytes();
55
56impl File {
57    /// Try to parse the commit graph file at `path`.
58    pub fn at(path: impl AsRef<Path>) -> Result<File, Error> {
59        Self::try_from(path.as_ref())
60    }
61
62    /// A lower-level constructor which constructs a new instance directly from the mapping in `data`,
63    /// assuming that it originated from `path`.
64    ///
65    /// Note that `path` is only used for verification of the hash its basename contains, but otherwise
66    /// is not of importance.
67    pub fn new(data: memmap2::Mmap, path: PathBuf) -> Result<File, Error> {
68        let data_size = data.len();
69        if data_size < MIN_FILE_SIZE {
70            return Err(Error::Corrupt(
71                "Commit-graph file too small even for an empty graph".to_owned(),
72            ));
73        }
74
75        let mut ofs = 0;
76        if &data[ofs..ofs + SIGNATURE.len()] != SIGNATURE {
77            return Err(Error::Corrupt(
78                "Commit-graph file does not start with expected signature".to_owned(),
79            ));
80        }
81        ofs += SIGNATURE.len();
82
83        match data[ofs] {
84            1 => (),
85            x => {
86                return Err(Error::UnsupportedVersion(x));
87            }
88        }
89        ofs += 1;
90
91        let object_hash = gix_hash::Kind::try_from(data[ofs]).map_err(Error::UnsupportedHashVersion)?;
92        ofs += 1;
93
94        let chunk_count = data[ofs];
95        // Can assert chunk_count >= MIN_CHUNKS here, but later OIDF+OIDL+CDAT presence checks make
96        // it redundant.
97        ofs += 1;
98
99        let base_graph_count = data[ofs];
100        ofs += 1;
101
102        let chunks = gix_chunk::file::Index::from_bytes(&data, ofs, u32::from(chunk_count))?;
103
104        let base_graphs_list_offset = chunks
105            .validated_usize_offset_by_id(BASE_GRAPHS_LIST_CHUNK_ID, |chunk_range| {
106                let chunk_size = chunk_range.len();
107                if chunk_size % object_hash.len_in_bytes() != 0 {
108                    return Err(Error::InvalidChunkSize {
109                        id: BASE_GRAPHS_LIST_CHUNK_ID,
110                        msg: format!(
111                            "chunk size {} is not a multiple of {}",
112                            chunk_size,
113                            object_hash.len_in_bytes()
114                        ),
115                    });
116                }
117                let chunk_base_graph_count: u32 = (chunk_size / object_hash.len_in_bytes())
118                    .try_into()
119                    .expect("base graph count to fit in 32-bits");
120                if chunk_base_graph_count != u32::from(base_graph_count) {
121                    return Err(Error::BaseGraphMismatch {
122                        from_chunk: chunk_base_graph_count,
123                        from_header: base_graph_count,
124                    });
125                }
126                Ok(chunk_range.start)
127            })
128            .ok()
129            .transpose()?;
130
131        let (commit_data_offset, commit_data_count) =
132            chunks.validated_usize_offset_by_id(COMMIT_DATA_CHUNK_ID, |chunk_range| {
133                let chunk_size = chunk_range.len();
134
135                let entry_size = object_hash.len_in_bytes() + COMMIT_DATA_ENTRY_SIZE_SANS_HASH;
136                if chunk_size % entry_size != 0 {
137                    return Err(Error::InvalidChunkSize {
138                        id: COMMIT_DATA_CHUNK_ID,
139                        msg: format!("chunk size {chunk_size} is not a multiple of {entry_size}"),
140                    });
141                }
142                Ok((
143                    chunk_range.start,
144                    (chunk_size / entry_size)
145                        .try_into()
146                        .expect("number of commits in CDAT chunk to fit in 32 bits"),
147                ))
148            })??;
149
150        let fan_offset = chunks.validated_usize_offset_by_id(OID_FAN_CHUNK_ID, |chunk_range| {
151            let chunk_size = chunk_range.len();
152
153            let expected_size = 4 * FAN_LEN;
154            if chunk_size != expected_size {
155                return Err(Error::InvalidChunkSize {
156                    id: OID_FAN_CHUNK_ID,
157                    msg: format!("expected chunk length {expected_size}, got {chunk_size}"),
158                });
159            }
160            Ok(chunk_range.start)
161        })??;
162
163        let (oid_lookup_offset, oid_lookup_count) =
164            chunks.validated_usize_offset_by_id(OID_LOOKUP_CHUNK_ID, |chunk_range| {
165                let chunk_size = chunk_range.len();
166
167                if chunk_size % object_hash.len_in_bytes() != 0 {
168                    return Err(Error::InvalidChunkSize {
169                        id: OID_LOOKUP_CHUNK_ID,
170                        msg: format!(
171                            "chunk size {} is not a multiple of {}",
172                            chunk_size,
173                            object_hash.len_in_bytes()
174                        ),
175                    });
176                }
177                Ok((
178                    chunk_range.start,
179                    (chunk_size / object_hash.len_in_bytes())
180                        .try_into()
181                        .expect("number of commits in OIDL chunk to fit in 32 bits"),
182                ))
183            })??;
184
185        let extra_edges_list_range = chunks.usize_offset_by_id(EXTENDED_EDGES_LIST_CHUNK_ID).ok();
186
187        let trailer = &data[chunks.highest_offset() as usize..];
188        if trailer.len() != object_hash.len_in_bytes() {
189            return Err(Error::Trailer(format!(
190                "Expected commit-graph trailer to contain {} bytes, got {}",
191                object_hash.len_in_bytes(),
192                trailer.len()
193            )));
194        }
195
196        if base_graph_count > 0 && base_graphs_list_offset.is_none() {
197            return Err(gix_chunk::file::index::offset_by_kind::Error {
198                kind: BASE_GRAPHS_LIST_CHUNK_ID,
199            }
200            .into());
201        }
202
203        let (fan, _) = read_fan(&data[fan_offset..]);
204        if oid_lookup_count != fan[255] {
205            return Err(Error::CommitCountMismatch {
206                chunk1_id: OID_FAN_CHUNK_ID,
207                chunk1_commits: fan[255],
208                chunk2_id: OID_LOOKUP_CHUNK_ID,
209                chunk2_commits: oid_lookup_count,
210            });
211        }
212        if commit_data_count != fan[255] {
213            return Err(Error::CommitCountMismatch {
214                chunk1_id: OID_FAN_CHUNK_ID,
215                chunk1_commits: fan[255],
216                chunk2_id: COMMIT_DATA_CHUNK_ID,
217                chunk2_commits: commit_data_count,
218            });
219        }
220        Ok(File {
221            base_graph_count,
222            base_graphs_list_offset,
223            commit_data_offset,
224            data,
225            extra_edges_list_range,
226            fan,
227            oid_lookup_offset,
228            path,
229            hash_len: object_hash.len_in_bytes(),
230            object_hash,
231        })
232    }
233}
234
235impl TryFrom<&Path> for File {
236    type Error = Error;
237
238    fn try_from(path: &Path) -> Result<Self, Self::Error> {
239        let data = std::fs::File::open(path)
240            .and_then(|file| {
241                // SAFETY: we have to take the risk of somebody changing the file underneath. Git never writes into the same file.
242                #[allow(unsafe_code)]
243                unsafe {
244                    memmap2::MmapOptions::new().map_copy_read_only(&file)
245                }
246            })
247            .map_err(|e| Error::Io {
248                err: e,
249                path: path.to_owned(),
250            })?;
251        Self::new(data, path.to_owned())
252    }
253}
254
255// Copied from gix-odb/pack/index/init.rs
256fn read_fan(d: &[u8]) -> ([u32; FAN_LEN], usize) {
257    assert!(d.len() >= FAN_LEN * 4);
258
259    let mut fan = [0; FAN_LEN];
260    for (c, f) in d.chunks_exact(4).zip(fan.iter_mut()) {
261        *f = u32::from_be_bytes(c.try_into().unwrap());
262    }
263    (fan, FAN_LEN * 4)
264}