git_commitgraph/file/
init.rs

1use std::{
2    convert::{TryFrom, TryInto},
3    path::Path,
4};
5
6use bstr::ByteSlice;
7use memmap2::Mmap;
8
9use crate::file::{
10    ChunkId, File, BASE_GRAPHS_LIST_CHUNK_ID, COMMIT_DATA_CHUNK_ID, COMMIT_DATA_ENTRY_SIZE_SANS_HASH,
11    EXTENDED_EDGES_LIST_CHUNK_ID, FAN_LEN, HEADER_LEN, OID_FAN_CHUNK_ID, OID_LOOKUP_CHUNK_ID, SIGNATURE,
12};
13
14/// The error used in [`File::at()`].
15#[derive(thiserror::Error, Debug)]
16#[allow(missing_docs)]
17pub enum Error {
18    #[error("Commit-graph {:?} chunk contains {from_chunk} base graphs, but commit-graph file header claims {from_header} base graphs", BASE_GRAPHS_LIST_CHUNK_ID.as_bstr())]
19    BaseGraphMismatch { from_header: u8, from_chunk: u32 },
20    #[error("Commit-graph {:?} chunk contains {chunk1_commits} commits, but {:?} chunk contains {chunk2_commits} commits", .chunk1_id.as_bstr(), .chunk2_id.as_bstr())]
21    CommitCountMismatch {
22        chunk1_id: ChunkId,
23        chunk1_commits: u32,
24        chunk2_id: ChunkId,
25        chunk2_commits: u32,
26    },
27    #[error("{0}")]
28    Corrupt(String),
29    // This error case is disabled, as git allows extra garbage in the extra edges list?
30    // #[error("The last entry in commit-graph's extended edges list does is not marked as being terminal")]
31    // ExtraEdgesOverflow,
32    #[error("Could not open commit-graph file at '{}'", .path.display())]
33    Io {
34        #[source]
35        err: std::io::Error,
36        path: std::path::PathBuf,
37    },
38    #[error("{0}")]
39    Trailer(String),
40    #[error("Commit-graph file uses unsupported hash version: {0}")]
41    UnsupportedHashVersion(u8),
42    #[error("Unsupported commit-graph file version: {0}")]
43    UnsupportedVersion(u8),
44    #[error(transparent)]
45    ChunkFileDecode(#[from] git_chunk::file::decode::Error),
46    #[error(transparent)]
47    MissingChunk(#[from] git_chunk::file::index::offset_by_kind::Error),
48    #[error("Commit-graph chunk {:?} has invalid size: {msg}", .id.as_bstr())]
49    InvalidChunkSize { id: ChunkId, msg: String },
50}
51
52const MIN_FILE_SIZE: usize = HEADER_LEN
53    + git_chunk::file::Index::size_for_entries(3 /*OIDF, OIDL, CDAT*/)
54    + FAN_LEN * 4 /* FANOUT TABLE CHUNK OIDF */
55    + git_hash::Kind::shortest().len_in_bytes();
56
57impl File {
58    /// Try to parse the commit graph file at `path`.
59    pub fn at(path: impl AsRef<Path>) -> Result<File, Error> {
60        Self::try_from(path.as_ref())
61    }
62}
63
64impl TryFrom<&Path> for File {
65    type Error = Error;
66
67    fn try_from(path: &Path) -> Result<Self, Self::Error> {
68        let data = std::fs::File::open(path)
69            .and_then(|file| {
70                // SAFETY: we have to take the risk of somebody changing the file underneath. Git never writes into the same file.
71                #[allow(unsafe_code)]
72                unsafe {
73                    Mmap::map(&file)
74                }
75            })
76            .map_err(|e| Error::Io {
77                err: e,
78                path: path.to_owned(),
79            })?;
80        let data_size = data.len();
81        if data_size < MIN_FILE_SIZE {
82            return Err(Error::Corrupt(
83                "Commit-graph file too small even for an empty graph".to_owned(),
84            ));
85        }
86
87        let mut ofs = 0;
88        if &data[ofs..ofs + SIGNATURE.len()] != SIGNATURE {
89            return Err(Error::Corrupt(
90                "Commit-graph file does not start with expected signature".to_owned(),
91            ));
92        }
93        ofs += SIGNATURE.len();
94
95        match data[ofs] {
96            1 => (),
97            x => {
98                return Err(Error::UnsupportedVersion(x));
99            }
100        };
101        ofs += 1;
102
103        let object_hash = git_hash::Kind::try_from(data[ofs]).map_err(Error::UnsupportedHashVersion)?;
104        ofs += 1;
105
106        let chunk_count = data[ofs];
107        // Can assert chunk_count >= MIN_CHUNKS here, but later OIDF+OIDL+CDAT presence checks make
108        // it redundant.
109        ofs += 1;
110
111        let base_graph_count = data[ofs];
112        ofs += 1;
113
114        let chunks = git_chunk::file::Index::from_bytes(&data, ofs, chunk_count as u32)?;
115
116        let base_graphs_list_offset = chunks
117            .validated_usize_offset_by_id(BASE_GRAPHS_LIST_CHUNK_ID, |chunk_range| {
118                let chunk_size = chunk_range.len();
119                if chunk_size % object_hash.len_in_bytes() != 0 {
120                    return Err(Error::InvalidChunkSize {
121                        id: BASE_GRAPHS_LIST_CHUNK_ID,
122                        msg: format!(
123                            "chunk size {} is not a multiple of {}",
124                            chunk_size,
125                            object_hash.len_in_bytes()
126                        ),
127                    });
128                }
129                let chunk_base_graph_count: u32 = (chunk_size / object_hash.len_in_bytes())
130                    .try_into()
131                    .expect("base graph count to fit in 32-bits");
132                if chunk_base_graph_count != u32::from(base_graph_count) {
133                    return Err(Error::BaseGraphMismatch {
134                        from_chunk: chunk_base_graph_count,
135                        from_header: base_graph_count,
136                    });
137                }
138                Ok(chunk_range.start)
139            })
140            .ok()
141            .transpose()?;
142
143        let (commit_data_offset, commit_data_count) =
144            chunks.validated_usize_offset_by_id(COMMIT_DATA_CHUNK_ID, |chunk_range| {
145                let chunk_size = chunk_range.len();
146
147                let entry_size = object_hash.len_in_bytes() + COMMIT_DATA_ENTRY_SIZE_SANS_HASH;
148                if chunk_size % entry_size != 0 {
149                    return Err(Error::InvalidChunkSize {
150                        id: COMMIT_DATA_CHUNK_ID,
151                        msg: format!("chunk size {chunk_size} is not a multiple of {entry_size}"),
152                    });
153                }
154                Ok((
155                    chunk_range.start,
156                    (chunk_size / entry_size)
157                        .try_into()
158                        .expect("number of commits in CDAT chunk to fit in 32 bits"),
159                ))
160            })??;
161
162        let fan_offset = chunks.validated_usize_offset_by_id(OID_FAN_CHUNK_ID, |chunk_range| {
163            let chunk_size = chunk_range.len();
164
165            let expected_size = 4 * FAN_LEN;
166            if chunk_size != expected_size {
167                return Err(Error::InvalidChunkSize {
168                    id: OID_FAN_CHUNK_ID,
169                    msg: format!("expected chunk length {expected_size}, got {chunk_size}"),
170                });
171            }
172            Ok(chunk_range.start)
173        })??;
174
175        let (oid_lookup_offset, oid_lookup_count) =
176            chunks.validated_usize_offset_by_id(OID_LOOKUP_CHUNK_ID, |chunk_range| {
177                let chunk_size = chunk_range.len();
178
179                if chunk_size % object_hash.len_in_bytes() != 0 {
180                    return Err(Error::InvalidChunkSize {
181                        id: OID_LOOKUP_CHUNK_ID,
182                        msg: format!(
183                            "chunk size {} is not a multiple of {}",
184                            chunk_size,
185                            object_hash.len_in_bytes()
186                        ),
187                    });
188                }
189                Ok((
190                    chunk_range.start,
191                    (chunk_size / object_hash.len_in_bytes())
192                        .try_into()
193                        .expect("number of commits in OIDL chunk to fit in 32 bits"),
194                ))
195            })??;
196
197        let extra_edges_list_range = chunks.usize_offset_by_id(EXTENDED_EDGES_LIST_CHUNK_ID).ok();
198
199        let trailer = &data[chunks.highest_offset() as usize..];
200        if trailer.len() != object_hash.len_in_bytes() {
201            return Err(Error::Trailer(format!(
202                "Expected commit-graph trailer to contain {} bytes, got {}",
203                object_hash.len_in_bytes(),
204                trailer.len()
205            )));
206        }
207
208        if base_graph_count > 0 && base_graphs_list_offset.is_none() {
209            return Err(git_chunk::file::index::offset_by_kind::Error {
210                kind: BASE_GRAPHS_LIST_CHUNK_ID,
211            }
212            .into());
213        }
214
215        let (fan, _) = read_fan(&data[fan_offset..]);
216        if oid_lookup_count != fan[255] {
217            return Err(Error::CommitCountMismatch {
218                chunk1_id: OID_FAN_CHUNK_ID,
219                chunk1_commits: fan[255],
220                chunk2_id: OID_LOOKUP_CHUNK_ID,
221                chunk2_commits: oid_lookup_count,
222            });
223        }
224        if commit_data_count != fan[255] {
225            return Err(Error::CommitCountMismatch {
226                chunk1_id: OID_FAN_CHUNK_ID,
227                chunk1_commits: fan[255],
228                chunk2_id: COMMIT_DATA_CHUNK_ID,
229                chunk2_commits: commit_data_count,
230            });
231        }
232        Ok(File {
233            base_graph_count,
234            base_graphs_list_offset,
235            commit_data_offset,
236            data,
237            extra_edges_list_range,
238            fan,
239            oid_lookup_offset,
240            path: path.to_owned(),
241            hash_len: object_hash.len_in_bytes(),
242            object_hash,
243        })
244    }
245}
246
247// Copied from git-odb/pack/index/init.rs
248fn read_fan(d: &[u8]) -> ([u32; FAN_LEN], usize) {
249    let mut fan = [0; FAN_LEN];
250    for (c, f) in d.chunks(4).zip(fan.iter_mut()) {
251        *f = u32::from_be_bytes(c.try_into().unwrap());
252    }
253    (fan, FAN_LEN * 4)
254}