1use std::path::{Path, PathBuf};
2
3use bstr::ByteSlice;
4
5use crate::{
6 file::{
7 ChunkId, BASE_GRAPHS_LIST_CHUNK_ID, COMMIT_DATA_CHUNK_ID, COMMIT_DATA_ENTRY_SIZE_SANS_HASH,
8 EXTENDED_EDGES_LIST_CHUNK_ID, FAN_LEN, HEADER_LEN, OID_FAN_CHUNK_ID, OID_LOOKUP_CHUNK_ID, SIGNATURE,
9 },
10 File,
11};
12
13#[derive(thiserror::Error, Debug)]
15#[allow(missing_docs)]
16pub enum Error {
17 #[error("Commit-graph {:?} chunk contains {from_chunk} base graphs, but commit-graph file header claims {from_header} base graphs", BASE_GRAPHS_LIST_CHUNK_ID.as_bstr())]
18 BaseGraphMismatch { from_header: u8, from_chunk: u32 },
19 #[error("Commit-graph {:?} chunk contains {chunk1_commits} commits, but {:?} chunk contains {chunk2_commits} commits", .chunk1_id.as_bstr(), .chunk2_id.as_bstr())]
20 CommitCountMismatch {
21 chunk1_id: ChunkId,
22 chunk1_commits: u32,
23 chunk2_id: ChunkId,
24 chunk2_commits: u32,
25 },
26 #[error("{0}")]
27 Corrupt(String),
28 #[error("Could not open commit-graph file at '{}'", .path.display())]
32 Io {
33 #[source]
34 err: std::io::Error,
35 path: std::path::PathBuf,
36 },
37 #[error("{0}")]
38 Trailer(String),
39 #[error("Commit-graph file uses unsupported hash version: {0}")]
40 UnsupportedHashVersion(u8),
41 #[error("Unsupported commit-graph file version: {0}")]
42 UnsupportedVersion(u8),
43 #[error(transparent)]
44 ChunkFileDecode(#[from] gix_chunk::file::decode::Error),
45 #[error(transparent)]
46 MissingChunk(#[from] gix_chunk::file::index::offset_by_kind::Error),
47 #[error("Commit-graph chunk {:?} has invalid size: {msg}", .id.as_bstr())]
48 InvalidChunkSize { id: ChunkId, msg: String },
49}
50
51const MIN_FILE_SIZE: usize = HEADER_LEN
52 + gix_chunk::file::Index::size_for_entries(3 )
53 + FAN_LEN * 4 + gix_hash::Kind::shortest().len_in_bytes();
55
56impl File {
57 pub fn at(path: impl AsRef<Path>) -> Result<File, Error> {
59 Self::try_from(path.as_ref())
60 }
61
62 pub fn new(data: memmap2::Mmap, path: PathBuf) -> Result<File, Error> {
68 let data_size = data.len();
69 if data_size < MIN_FILE_SIZE {
70 return Err(Error::Corrupt(
71 "Commit-graph file too small even for an empty graph".to_owned(),
72 ));
73 }
74
75 let mut ofs = 0;
76 if &data[ofs..ofs + SIGNATURE.len()] != SIGNATURE {
77 return Err(Error::Corrupt(
78 "Commit-graph file does not start with expected signature".to_owned(),
79 ));
80 }
81 ofs += SIGNATURE.len();
82
83 match data[ofs] {
84 1 => (),
85 x => {
86 return Err(Error::UnsupportedVersion(x));
87 }
88 }
89 ofs += 1;
90
91 let object_hash = gix_hash::Kind::try_from(data[ofs]).map_err(Error::UnsupportedHashVersion)?;
92 ofs += 1;
93
94 let chunk_count = data[ofs];
95 ofs += 1;
98
99 let base_graph_count = data[ofs];
100 ofs += 1;
101
102 let chunks = gix_chunk::file::Index::from_bytes(&data, ofs, u32::from(chunk_count))?;
103
104 let base_graphs_list_offset = chunks
105 .validated_usize_offset_by_id(BASE_GRAPHS_LIST_CHUNK_ID, |chunk_range| {
106 let chunk_size = chunk_range.len();
107 if chunk_size % object_hash.len_in_bytes() != 0 {
108 return Err(Error::InvalidChunkSize {
109 id: BASE_GRAPHS_LIST_CHUNK_ID,
110 msg: format!(
111 "chunk size {} is not a multiple of {}",
112 chunk_size,
113 object_hash.len_in_bytes()
114 ),
115 });
116 }
117 let chunk_base_graph_count: u32 = (chunk_size / object_hash.len_in_bytes())
118 .try_into()
119 .expect("base graph count to fit in 32-bits");
120 if chunk_base_graph_count != u32::from(base_graph_count) {
121 return Err(Error::BaseGraphMismatch {
122 from_chunk: chunk_base_graph_count,
123 from_header: base_graph_count,
124 });
125 }
126 Ok(chunk_range.start)
127 })
128 .ok()
129 .transpose()?;
130
131 let (commit_data_offset, commit_data_count) =
132 chunks.validated_usize_offset_by_id(COMMIT_DATA_CHUNK_ID, |chunk_range| {
133 let chunk_size = chunk_range.len();
134
135 let entry_size = object_hash.len_in_bytes() + COMMIT_DATA_ENTRY_SIZE_SANS_HASH;
136 if chunk_size % entry_size != 0 {
137 return Err(Error::InvalidChunkSize {
138 id: COMMIT_DATA_CHUNK_ID,
139 msg: format!("chunk size {chunk_size} is not a multiple of {entry_size}"),
140 });
141 }
142 Ok((
143 chunk_range.start,
144 (chunk_size / entry_size)
145 .try_into()
146 .expect("number of commits in CDAT chunk to fit in 32 bits"),
147 ))
148 })??;
149
150 let fan_offset = chunks.validated_usize_offset_by_id(OID_FAN_CHUNK_ID, |chunk_range| {
151 let chunk_size = chunk_range.len();
152
153 let expected_size = 4 * FAN_LEN;
154 if chunk_size != expected_size {
155 return Err(Error::InvalidChunkSize {
156 id: OID_FAN_CHUNK_ID,
157 msg: format!("expected chunk length {expected_size}, got {chunk_size}"),
158 });
159 }
160 Ok(chunk_range.start)
161 })??;
162
163 let (oid_lookup_offset, oid_lookup_count) =
164 chunks.validated_usize_offset_by_id(OID_LOOKUP_CHUNK_ID, |chunk_range| {
165 let chunk_size = chunk_range.len();
166
167 if chunk_size % object_hash.len_in_bytes() != 0 {
168 return Err(Error::InvalidChunkSize {
169 id: OID_LOOKUP_CHUNK_ID,
170 msg: format!(
171 "chunk size {} is not a multiple of {}",
172 chunk_size,
173 object_hash.len_in_bytes()
174 ),
175 });
176 }
177 Ok((
178 chunk_range.start,
179 (chunk_size / object_hash.len_in_bytes())
180 .try_into()
181 .expect("number of commits in OIDL chunk to fit in 32 bits"),
182 ))
183 })??;
184
185 let extra_edges_list_range = chunks.usize_offset_by_id(EXTENDED_EDGES_LIST_CHUNK_ID).ok();
186
187 let trailer = &data[chunks.highest_offset() as usize..];
188 if trailer.len() != object_hash.len_in_bytes() {
189 return Err(Error::Trailer(format!(
190 "Expected commit-graph trailer to contain {} bytes, got {}",
191 object_hash.len_in_bytes(),
192 trailer.len()
193 )));
194 }
195
196 if base_graph_count > 0 && base_graphs_list_offset.is_none() {
197 return Err(gix_chunk::file::index::offset_by_kind::Error {
198 kind: BASE_GRAPHS_LIST_CHUNK_ID,
199 }
200 .into());
201 }
202
203 let (fan, _) = read_fan(&data[fan_offset..]);
204 if oid_lookup_count != fan[255] {
205 return Err(Error::CommitCountMismatch {
206 chunk1_id: OID_FAN_CHUNK_ID,
207 chunk1_commits: fan[255],
208 chunk2_id: OID_LOOKUP_CHUNK_ID,
209 chunk2_commits: oid_lookup_count,
210 });
211 }
212 if commit_data_count != fan[255] {
213 return Err(Error::CommitCountMismatch {
214 chunk1_id: OID_FAN_CHUNK_ID,
215 chunk1_commits: fan[255],
216 chunk2_id: COMMIT_DATA_CHUNK_ID,
217 chunk2_commits: commit_data_count,
218 });
219 }
220 Ok(File {
221 base_graph_count,
222 base_graphs_list_offset,
223 commit_data_offset,
224 data,
225 extra_edges_list_range,
226 fan,
227 oid_lookup_offset,
228 path,
229 hash_len: object_hash.len_in_bytes(),
230 object_hash,
231 })
232 }
233}
234
235impl TryFrom<&Path> for File {
236 type Error = Error;
237
238 fn try_from(path: &Path) -> Result<Self, Self::Error> {
239 let data = std::fs::File::open(path)
240 .and_then(|file| {
241 #[allow(unsafe_code)]
243 unsafe {
244 memmap2::MmapOptions::new().map_copy_read_only(&file)
245 }
246 })
247 .map_err(|e| Error::Io {
248 err: e,
249 path: path.to_owned(),
250 })?;
251 Self::new(data, path.to_owned())
252 }
253}
254
255fn read_fan(d: &[u8]) -> ([u32; FAN_LEN], usize) {
257 assert!(d.len() >= FAN_LEN * 4);
258
259 let mut fan = [0; FAN_LEN];
260 for (c, f) in d.chunks_exact(4).zip(fan.iter_mut()) {
261 *f = u32::from_be_bytes(c.try_into().unwrap());
262 }
263 (fan, FAN_LEN * 4)
264}