gix_commitgraph/file/
init.rs

1use std::path::{Path, PathBuf};
2
3use gix_error::{message, ErrorExt, Exn, Message, ResultExt};
4
5use crate::{
6    file::{
7        BASE_GRAPHS_LIST_CHUNK_ID, COMMIT_DATA_CHUNK_ID, COMMIT_DATA_ENTRY_SIZE_SANS_HASH,
8        EXTENDED_EDGES_LIST_CHUNK_ID, FAN_LEN, HEADER_LEN, OID_FAN_CHUNK_ID, OID_LOOKUP_CHUNK_ID, SIGNATURE,
9    },
10    File,
11};
12
13const MIN_FILE_SIZE: usize = HEADER_LEN
14    + gix_chunk::file::Index::size_for_entries(3 /*OIDF, OIDL, CDAT*/)
15    + FAN_LEN * 4 /* FANOUT TABLE CHUNK OIDF */
16    + gix_hash::Kind::shortest().len_in_bytes();
17
18impl File {
19    /// Try to parse the commit graph file at `path`.
20    pub fn at(path: impl AsRef<Path>) -> Result<File, Exn<Message>> {
21        Self::try_from(path.as_ref())
22    }
23
24    /// A lower-level constructor which constructs a new instance directly from the mapping in `data`,
25    /// assuming that it originated from `path`.
26    ///
27    /// Note that `path` is only used for verification of the hash its basename contains, but otherwise
28    /// is not of importance.
29    pub fn new(data: memmap2::Mmap, path: PathBuf) -> Result<File, Exn<Message>> {
30        let data_size = data.len();
31        if data_size < MIN_FILE_SIZE {
32            return Err(message("Commit-graph file too small even for an empty graph").raise());
33        }
34
35        let mut ofs = 0;
36        if &data[ofs..ofs + SIGNATURE.len()] != SIGNATURE {
37            return Err(message("Commit-graph file does not start with expected signature").raise());
38        }
39        ofs += SIGNATURE.len();
40
41        match data[ofs] {
42            1 => (),
43            x => {
44                return Err(message!("Unsupported commit-graph file version: {x}").raise());
45            }
46        }
47        ofs += 1;
48
49        let object_hash = gix_hash::Kind::try_from(data[ofs])
50            .map_err(|v| message!("Commit-graph file uses unsupported hash version: {v}").raise())?;
51        ofs += 1;
52
53        let chunk_count = data[ofs];
54        // Can assert chunk_count >= MIN_CHUNKS here, but later OIDF+OIDL+CDAT presence checks make
55        // it redundant.
56        ofs += 1;
57
58        let base_graph_count = data[ofs];
59        ofs += 1;
60
61        let chunks = gix_chunk::file::Index::from_bytes(&data, ofs, u32::from(chunk_count))
62            .or_raise(|| message!("Couldn't read commit-graph file with {chunk_count} chunks at offset {ofs}"))?;
63
64        let base_graphs_list_offset = chunks
65            .validated_usize_offset_by_id(BASE_GRAPHS_LIST_CHUNK_ID, |chunk_range| {
66                let chunk_size = chunk_range.len();
67                if chunk_size % object_hash.len_in_bytes() != 0 {
68                    return Err(message!("Commit-graph chunk {BASE_GRAPHS_LIST_CHUNK_ID:?} has invalid size: {msg}",
69                        msg = format!(
70                            "chunk size {} is not a multiple of {}",
71                            chunk_size,
72                            object_hash.len_in_bytes()
73                        ),
74                    ).raise());
75                }
76                let chunk_base_graph_count: u32 = (chunk_size / object_hash.len_in_bytes())
77                    .try_into()
78                    .expect("base graph count to fit in 32-bits");
79                if chunk_base_graph_count != u32::from(base_graph_count) {
80                    return Err(message!("Commit-graph {BASE_GRAPHS_LIST_CHUNK_ID:?} chunk contains {chunk_base_graph_count} base graphs, but commit-graph file header claims {base_graph_count} base graphs").raise())
81                }
82                Ok(chunk_range.start)
83            })
84            .ok()
85            .transpose()?;
86
87        let (commit_data_offset, commit_data_count): (_, u32) = chunks
88            .validated_usize_offset_by_id(COMMIT_DATA_CHUNK_ID, |chunk_range| {
89                let chunk_size = chunk_range.len();
90
91                let entry_size = object_hash.len_in_bytes() + COMMIT_DATA_ENTRY_SIZE_SANS_HASH;
92                if chunk_size % entry_size != 0 {
93                    return Err(message!("Commit-graph chunk {COMMIT_DATA_CHUNK_ID:?} has invalid size: chunk size {chunk_size} is not a multiple of {entry_size}").raise())
94                }
95                Ok((
96                    chunk_range.start,
97                    (chunk_size / entry_size)
98                        .try_into()
99                        .expect("number of commits in CDAT chunk to fit in 32 bits"),
100                ))
101            })??;
102
103        let fan_offset = chunks
104            .validated_usize_offset_by_id(OID_FAN_CHUNK_ID, |chunk_range| {
105                let chunk_size = chunk_range.len();
106
107                let expected_size = 4 * FAN_LEN;
108                if chunk_size != expected_size {
109                    return Err(message!("Commit-graph chunk {OID_FAN_CHUNK_ID:?} has invalid size: expected chunk length {expected_size}, got {chunk_size}").raise())
110                }
111                Ok(chunk_range.start)
112            })?
113            .or_raise(|| message("Error getting offset for OID fan chunk"))?;
114
115        let (oid_lookup_offset, oid_lookup_count): (_, u32) = chunks
116            .validated_usize_offset_by_id(OID_LOOKUP_CHUNK_ID, |chunk_range| {
117                let chunk_size = chunk_range.len();
118
119                if chunk_size % object_hash.len_in_bytes() != 0 {
120                    return Err(message!("Commit-graph chunk {OID_LOOKUP_CHUNK_ID:?} has invalid size: chunk size {chunk_size} is not a multiple of {hash_len}", hash_len = object_hash.len_in_bytes()).raise())
121                }
122                Ok((
123                    chunk_range.start,
124                    (chunk_size / object_hash.len_in_bytes())
125                        .try_into()
126                        .expect("number of commits in OIDL chunk to fit in 32 bits"),
127                ))
128            })?
129            .or_raise(|| message("Error getting offset for OID lookup chunk"))?;
130
131        let extra_edges_list_range = chunks.usize_offset_by_id(EXTENDED_EDGES_LIST_CHUNK_ID).ok();
132
133        let trailer = &data[chunks.highest_offset() as usize..];
134        if trailer.len() != object_hash.len_in_bytes() {
135            return Err(message!(
136                "Expected commit-graph trailer to contain {} bytes, got {}",
137                object_hash.len_in_bytes(),
138                trailer.len()
139            )
140            .raise());
141        }
142
143        if base_graph_count > 0 && base_graphs_list_offset.is_none() {
144            return Err(message!("Chunk named {BASE_GRAPHS_LIST_CHUNK_ID:?} was not found in chunk file index").into());
145        }
146
147        let (fan, _) = read_fan(&data[fan_offset..]);
148        if oid_lookup_count != fan[255] {
149            return Err(message!("Commit-graph {OID_FAN_CHUNK_ID:?} chunk contains {chunk1_commits} commits, but {OID_LOOKUP_CHUNK_ID:?} chunk contains {chunk2_commits} commits",
150                chunk1_commits = fan[255],
151                chunk2_commits = oid_lookup_count,
152            ).raise());
153        }
154        if commit_data_count != fan[255] {
155            return Err(
156                message!("Commit-graph {OID_FAN_CHUNK_ID:?} chunk contains {chunk1_commits} commits, but {COMMIT_DATA_CHUNK_ID:?} chunk contains {chunk2_commits} commits",
157                    chunk1_commits = fan[255],
158                    chunk2_commits = commit_data_count,
159                ).raise(),
160            );
161        }
162        Ok(File {
163            base_graph_count,
164            base_graphs_list_offset,
165            commit_data_offset,
166            data,
167            extra_edges_list_range,
168            fan,
169            oid_lookup_offset,
170            path,
171            hash_len: object_hash.len_in_bytes(),
172            object_hash,
173        })
174    }
175}
176
177impl TryFrom<&Path> for File {
178    type Error = Exn<Message>;
179
180    fn try_from(path: &Path) -> Result<Self, Self::Error> {
181        let data = std::fs::File::open(path)
182            .and_then(|file| {
183                // SAFETY: we have to take the risk of somebody changing the file underneath. Git never writes into the same file.
184                #[allow(unsafe_code)]
185                unsafe {
186                    memmap2::MmapOptions::new().map_copy_read_only(&file)
187                }
188            })
189            .or_raise(|| message!("Could not open commit-graph file at '{path}'", path = path.display()))?;
190        Self::new(data, path.to_owned())
191    }
192}
193
194// Copied from gix-odb/pack/index/init.rs
195fn read_fan(d: &[u8]) -> ([u32; FAN_LEN], usize) {
196    assert!(d.len() >= FAN_LEN * 4);
197
198    let mut fan = [0; FAN_LEN];
199    for (c, f) in d.chunks_exact(4).zip(fan.iter_mut()) {
200        *f = u32::from_be_bytes(c.try_into().unwrap());
201    }
202    (fan, FAN_LEN * 4)
203}