1use std::path::{Path, PathBuf};
4use std::sync::{Arc, Mutex};
5
6use crate::bloom::{
7 bloom_filter_contains, bloom_keyvec_for_path, BloomBuildOutcome, BloomFilterSettings,
8};
9use crate::error::Error;
10use crate::objects::ObjectId;
11use crate::odb::Odb;
12
13fn warn_once_for_disabled_bloom_layer(id: &str) -> bool {
17 use std::collections::HashSet;
18 use std::sync::OnceLock;
19 static SEEN: OnceLock<Mutex<HashSet<String>>> = OnceLock::new();
20 let set = SEEN.get_or_init(|| Mutex::new(HashSet::new()));
21 match set.lock() {
22 Ok(mut guard) => guard.insert(id.to_string()),
23 Err(_) => true,
24 }
25}
26
27fn warn_once_for_base_chunk_too_small(id: &str) -> bool {
30 use std::collections::HashSet;
31 use std::sync::OnceLock;
32 static SEEN: OnceLock<Mutex<HashSet<String>>> = OnceLock::new();
33 let set = SEEN.get_or_init(|| Mutex::new(HashSet::new()));
34 match set.lock() {
35 Ok(mut guard) => guard.insert(id.to_string()),
36 Err(_) => true,
37 }
38}
39
40const SIGNATURE: &[u8; 4] = b"CGPH";
41const GRAPH_VERSION: u8 = 1;
42
43const CHUNK_OID_FANOUT: u32 = 0x4f49_4446; const CHUNK_OID_LOOKUP: u32 = 0x4f49_444c; const CHUNK_COMMIT_DATA: u32 = 0x4344_4154; const CHUNK_GENERATION_DATA: u32 = 0x4744_4132; const CHUNK_GENERATION_DATA_OVERFLOW: u32 = 0x4744_4f32; const CHUNK_EXTRA_EDGES: u32 = 0x4544_4745; const CHUNK_BLOOM_INDEXES: u32 = 0x4249_4458; const CHUNK_BLOOM_DATA: u32 = 0x4244_4154; const CHUNK_BASE_GRAPHS: u32 = 0x4241_5345; const BLOOM_HEADER: usize = crate::bloom::BLOOMDATA_HEADER_LEN;
54
55const CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW: u32 = 1u32 << 31;
57
58fn warn_path_for_graph_file(path: &Path) -> String {
59 let s = path.to_string_lossy();
60 if let Some(idx) = s.find(".git/") {
61 return s[idx..].replace('\\', "/");
62 }
63 s.replace('\\', "/")
64}
65
66#[derive(Debug, Clone)]
68pub struct CommitGraphLayer {
69 pub path: PathBuf,
70 body: Vec<u8>,
71 num_commits: u32,
72 oid_lookup_off: usize,
73 #[allow(dead_code)]
74 chunk_commit_data_off: usize,
75 #[allow(dead_code)]
76 chunk_generation_data: Option<usize>,
77 read_generation_data: bool,
78 chunk_bloom_indexes: Option<usize>,
79 chunk_bloom_data: Option<(usize, usize)>,
80 bloom_settings: Option<BloomFilterSettings>,
81 bloom_disabled: bool,
82 base_layers_declared: u32,
84 base_chunk_size: usize,
87 hash_len: usize,
89}
90
91const fn commit_graph_hash_len(hash_version: u8) -> Option<usize> {
93 match hash_version {
94 1 => Some(20),
95 2 => Some(32),
96 _ => None,
97 }
98}
99
100impl CommitGraphLayer {
101 pub fn try_parse(path: PathBuf, raw: Vec<u8>) -> Result<Self, Error> {
103 if raw.len() < 28 {
104 return Err(Error::CorruptObject(
105 "commit-graph file too small".to_owned(),
106 ));
107 }
108 let hash_len = match commit_graph_hash_len(raw[5]) {
111 Some(n) => n,
112 None => {
113 return Err(Error::CorruptObject(format!(
114 "commit-graph version/hash not supported (version {} hash {})",
115 raw[4], raw[5]
116 )))
117 }
118 };
119 let body = raw[..raw.len() - hash_len].to_vec();
120 if body.len() < 8 || &body[0..4] != SIGNATURE {
121 return Err(Error::CorruptObject(
122 "commit-graph has bad signature".to_owned(),
123 ));
124 }
125 if body[4] != GRAPH_VERSION {
126 return Err(Error::CorruptObject(format!(
127 "commit-graph version/hash not supported (version {} hash {})",
128 body[4], body[5]
129 )));
130 }
131 let num_chunks = body[6] as usize;
132 let toc_start = 8;
133 let toc_end = toc_start + (num_chunks + 1) * 12;
134 if body.len() < toc_end {
135 return Err(Error::CorruptObject(
136 "commit-graph truncated at chunk table".to_owned(),
137 ));
138 }
139
140 let mut fanout_off = None;
141 let mut oid_lookup_off = None;
142 let mut commit_data_off = None;
143 let mut generation_off = None;
144 let mut generation_overflow_off = None;
145 let mut bloom_idx_off = None;
146 let mut bloom_data_range = None;
147 let mut base_graphs_off = None;
148 let mut chunk_offsets: Vec<usize> = Vec::new();
149 let mut toc_entries: Vec<(u32, usize)> = Vec::with_capacity(num_chunks);
150
151 for i in 0..num_chunks {
152 let e = toc_start + i * 12;
153 let id = u32::from_be_bytes(
154 body[e..e + 4]
155 .try_into()
156 .map_err(|_| Error::CorruptObject("commit-graph bad TOC".to_owned()))?,
157 );
158 let off = u64::from_be_bytes(
159 body[e + 4..e + 12]
160 .try_into()
161 .map_err(|_| Error::CorruptObject("commit-graph bad TOC".to_owned()))?,
162 ) as usize;
163 toc_entries.push((id, off));
164 chunk_offsets.push(off);
165 match id {
166 CHUNK_OID_FANOUT => fanout_off = Some(off),
167 CHUNK_OID_LOOKUP => oid_lookup_off = Some(off),
168 CHUNK_COMMIT_DATA => commit_data_off = Some(off),
169 CHUNK_GENERATION_DATA => generation_off = Some(off),
170 CHUNK_GENERATION_DATA_OVERFLOW => generation_overflow_off = Some(off),
171 CHUNK_BLOOM_INDEXES => bloom_idx_off = Some(off),
172 CHUNK_BASE_GRAPHS => base_graphs_off = Some(off),
173 CHUNK_BLOOM_DATA => {
174 let end = if i + 1 < num_chunks {
175 let e2 = toc_start + (i + 1) * 12;
176 u64::from_be_bytes(body[e2 + 4..e2 + 12].try_into().unwrap_or([0u8; 8]))
177 as usize
178 } else {
179 let term = toc_start + num_chunks * 12;
180 u64::from_be_bytes(body[term + 4..term + 12].try_into().unwrap_or([0u8; 8]))
181 as usize
182 };
183 bloom_data_range = Some((off, end.saturating_sub(off)));
184 }
185 _ => {}
186 }
187 }
188 let file_end = u64::from_be_bytes(
189 body[toc_start + num_chunks * 12 + 4..toc_start + num_chunks * 12 + 12]
190 .try_into()
191 .map_err(|_| Error::CorruptObject("commit-graph bad file end".to_owned()))?,
192 ) as usize;
193 chunk_offsets.push(file_end);
194 chunk_offsets.sort_unstable();
195 chunk_offsets.dedup();
196
197 fn chunk_byte_range(
198 start: usize,
199 toc_entries: &[(u32, usize)],
200 file_end: usize,
201 ) -> Result<usize, Error> {
202 let mut ends: Vec<usize> = toc_entries
203 .iter()
204 .map(|&(_, o)| o)
205 .filter(|&o| o > start)
206 .collect();
207 ends.sort_unstable();
208 let end = ends.first().copied().unwrap_or(file_end);
209 if end < start {
210 return Err(Error::CorruptObject(
211 "commit-graph chunk layout invalid".to_owned(),
212 ));
213 }
214 Ok(end)
215 }
216
217 if let Some(gda) = generation_off {
218 let gda_end = chunk_byte_range(gda, &toc_entries, file_end)?;
219 let gda_len = gda_end.saturating_sub(gda);
220 let num_commits = fanout_off
221 .and_then(|fo| {
222 let slice = body.get(fo + 255 * 4..fo + 256 * 4)?;
223 Some(u32::from_be_bytes(slice.try_into().ok()?))
224 })
225 .ok_or_else(|| Error::CorruptObject("commit-graph missing fanout".to_owned()))?;
226 let expected = num_commits as usize * 4;
227 if gda_len < expected {
228 return Err(Error::CorruptObject(
229 "commit-graph generation data chunk is too small".to_owned(),
230 ));
231 }
232 let gda_slice = body.get(gda..gda + expected).ok_or_else(|| {
233 Error::CorruptObject("commit-graph generation data OOB".to_owned())
234 })?;
235 let mut max_overflow_idx: Option<u32> = None;
236 for w in 0..num_commits as usize {
237 let v =
238 u32::from_be_bytes(gda_slice[w * 4..w * 4 + 4].try_into().map_err(|_| {
239 Error::CorruptObject("commit-graph GDA2 corrupt".to_owned())
240 })?);
241 if v & CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW != 0 {
242 let pos = v ^ CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW;
243 max_overflow_idx = Some(match max_overflow_idx {
244 None => pos,
245 Some(m) => m.max(pos),
246 });
247 }
248 }
249 if let Some(pos) = max_overflow_idx {
250 let Some(gdo_start) = generation_overflow_off else {
251 return Err(Error::CorruptObject(
252 "commit-graph requires overflow generation data but has none".to_owned(),
253 ));
254 };
255 let gdo_end = chunk_byte_range(gdo_start, &toc_entries, file_end)?;
256 let overflow_bytes = gdo_end.saturating_sub(gdo_start);
257 let n_slots = overflow_bytes / 8;
258 if n_slots <= pos as usize {
259 return Err(Error::CorruptObject(
260 "commit-graph overflow generation data is too small".to_owned(),
261 ));
262 }
263 }
264 }
265 let bidx_len = bloom_idx_off.and_then(|b| {
266 chunk_offsets
267 .iter()
268 .find(|&&o| o > b)
269 .map(|&next| next.saturating_sub(b))
270 });
271
272 let fanout_off = fanout_off.ok_or_else(|| {
273 Error::CorruptObject("commit-graph missing OID fanout chunk".to_owned())
274 })?;
275 let oid_lookup_off = oid_lookup_off.ok_or_else(|| {
276 Error::CorruptObject("commit-graph missing OID lookup chunk".to_owned())
277 })?;
278 let commit_data_off = commit_data_off.ok_or_else(|| {
279 Error::CorruptObject("commit-graph missing commit data chunk".to_owned())
280 })?;
281 if fanout_off + 256 * 4 > body.len() || oid_lookup_off + 4 > body.len() {
282 return Err(Error::CorruptObject(
283 "commit-graph chunk extends past end of file".to_owned(),
284 ));
285 }
286 let num_commits = u32::from_be_bytes(
287 body[fanout_off + 255 * 4..fanout_off + 256 * 4]
288 .try_into()
289 .map_err(|_| Error::CorruptObject("commit-graph fanout corrupt".to_owned()))?,
290 );
291 if oid_lookup_off + num_commits as usize * hash_len > body.len() {
292 return Err(Error::CorruptObject(
293 "commit-graph OID lookup extends past end of file".to_owned(),
294 ));
295 }
296 let graph_data_width = hash_len + 16;
297 if commit_data_off + num_commits as usize * graph_data_width > body.len() {
298 return Err(Error::CorruptObject(
299 "commit-graph commit data extends past end of file".to_owned(),
300 ));
301 }
302
303 let read_generation_data = generation_off.is_some();
304 let mut bloom_settings = None;
305 let mut chunk_bloom_data = None;
306 if let (Some(_bidx), Some((bdat_off, bdat_len))) = (bloom_idx_off, bloom_data_range) {
307 if bdat_len < BLOOM_HEADER {
308 eprintln!(
309 "warning: ignoring too-small changed-path chunk ({} < {}) in commit-graph file",
310 bdat_len, BLOOM_HEADER
311 );
312 } else if bdat_off + bdat_len <= body.len() {
313 let hdr = &body[bdat_off..bdat_off + BLOOM_HEADER];
314 let hash_version: [u8; 4] = hdr[0..4]
315 .try_into()
316 .map_err(|_| Error::CorruptObject("Bloom header corrupt".to_owned()))?;
317 let num_hashes: [u8; 4] = hdr[4..8]
318 .try_into()
319 .map_err(|_| Error::CorruptObject("Bloom header corrupt".to_owned()))?;
320 let bits_per_entry: [u8; 4] = hdr[8..12]
321 .try_into()
322 .map_err(|_| Error::CorruptObject("Bloom header corrupt".to_owned()))?;
323 bloom_settings = Some(BloomFilterSettings {
324 hash_version: u32::from_be_bytes(hash_version),
325 num_hashes: u32::from_be_bytes(num_hashes),
326 bits_per_entry: u32::from_be_bytes(bits_per_entry),
327 max_changed_paths: 512,
328 });
329 chunk_bloom_data = Some((bdat_off, bdat_len));
330 }
331 }
332
333 let bloom_indexes_ok = if let (Some(bidx), Some(bsize)) = (bloom_idx_off, bidx_len) {
334 if bsize / 4 != num_commits as usize {
335 eprintln!("warning: commit-graph changed-path index chunk is too small");
336 false
337 } else if bidx + bsize > body.len() {
338 eprintln!("warning: commit-graph changed-path index chunk is too small");
339 false
340 } else {
341 true
342 }
343 } else {
344 false
345 };
346 let bloom_pair_ok = bloom_settings.is_some()
347 && chunk_bloom_data.is_some()
348 && bloom_indexes_ok
349 && chunk_bloom_data.is_some_and(|(_, len)| len >= BLOOM_HEADER);
350
351 let (chunk_bloom_indexes, bloom_settings) = if bloom_pair_ok {
352 (bloom_idx_off, bloom_settings)
353 } else {
354 (None, None)
355 };
356 let chunk_bloom_data = if bloom_pair_ok {
357 chunk_bloom_data
358 } else {
359 None
360 };
361
362 let base_layers_declared = body[7] as u32;
363 let base_chunk_size = match base_graphs_off {
364 Some(off) => {
365 let end = chunk_byte_range(off, &toc_entries, file_end)?;
366 end.saturating_sub(off)
367 }
368 None => 0,
369 };
370
371 Ok(Self {
372 path,
373 body,
374 num_commits,
375 oid_lookup_off,
376 chunk_commit_data_off: commit_data_off,
377 chunk_generation_data: generation_off,
378 read_generation_data,
379 chunk_bloom_indexes,
380 chunk_bloom_data,
381 bloom_settings,
382 bloom_disabled: false,
383 base_layers_declared,
384 base_chunk_size,
385 hash_len,
386 })
387 }
388
389 fn parse(path: PathBuf, raw: Vec<u8>) -> Option<Self> {
390 Self::try_parse(path, raw).ok()
391 }
392
393 fn oid_at_lex(&self, lex_index: u32) -> Option<ObjectId> {
394 if lex_index >= self.num_commits {
395 return None;
396 }
397 let off = self.oid_lookup_off + lex_index as usize * self.hash_len;
398 ObjectId::from_bytes(self.body.get(off..off + self.hash_len)?).ok()
399 }
400
401 fn bsearch_oid(&self, oid: &ObjectId) -> Option<u32> {
402 let mut lo = 0u32;
403 let mut hi = self.num_commits;
404 let bytes = oid.as_bytes();
405 while lo < hi {
406 let mid = (lo + hi) / 2;
407 let off = self.oid_lookup_off + mid as usize * self.hash_len;
408 let slice = &self.body[off..off + self.hash_len];
409 match slice.cmp(bytes) {
410 std::cmp::Ordering::Less => lo = mid + 1,
411 std::cmp::Ordering::Greater => hi = mid,
412 std::cmp::Ordering::Equal => return Some(mid),
413 }
414 }
415 None
416 }
417
418 fn disable_bloom(&mut self) {
419 self.chunk_bloom_indexes = None;
420 self.chunk_bloom_data = None;
421 self.bloom_settings = None;
422 self.bloom_disabled = true;
423 }
424
425 fn layer_display_id(&self) -> String {
426 self.path
427 .file_stem()
428 .and_then(|s| s.to_str())
429 .map(|s| s.strip_prefix("graph-").unwrap_or(s).to_string())
430 .unwrap_or_else(|| "commit-graph".to_string())
431 }
432
433 fn bloom_filter_slice(&self, lex_index: u32) -> Option<&[u8]> {
434 let _settings = self.bloom_settings.as_ref()?;
435 let bidx_base = self.chunk_bloom_indexes?;
436 let (bdat_off, bdat_total) = self.chunk_bloom_data?;
437 let graph_warn = warn_path_for_graph_file(self.path.as_path());
438 if lex_index >= self.num_commits {
439 return None;
440 }
441 let payload_len = bdat_total.saturating_sub(BLOOM_HEADER);
442 let end_rel = u32::from_be_bytes(
443 self.body[bidx_base + lex_index as usize * 4..bidx_base + lex_index as usize * 4 + 4]
444 .try_into()
445 .ok()?,
446 ) as usize;
447 let start_rel = if lex_index == 0 {
448 0usize
449 } else {
450 u32::from_be_bytes(
451 self.body[bidx_base + (lex_index as usize - 1) * 4
452 ..bidx_base + (lex_index as usize - 1) * 4 + 4]
453 .try_into()
454 .ok()?,
455 ) as usize
456 };
457 let max_payload = payload_len;
463 if end_rel > max_payload {
464 eprintln!(
465 "warning: ignoring out-of-range offset ({end_rel}) for changed-path filter at pos {} of {} (chunk size: {bdat_total})",
466 lex_index,
467 graph_warn,
468 bdat_total = bdat_total
469 );
470 return None;
471 }
472 if start_rel > max_payload {
473 eprintln!(
474 "warning: ignoring out-of-range offset ({start_rel}) for changed-path filter at pos {} of {} (chunk size: {bdat_total})",
475 lex_index.saturating_sub(1),
476 graph_warn,
477 bdat_total = bdat_total
478 );
479 return None;
480 }
481 if end_rel < start_rel {
482 eprintln!(
483 "warning: ignoring decreasing changed-path index offsets ({start_rel} > {end_rel}) for positions {} and {} of {}",
484 lex_index.saturating_sub(1),
485 lex_index,
486 graph_warn
487 );
488 return None;
489 }
490 let data_base = bdat_off + BLOOM_HEADER;
491 let abs_start = data_base + start_rel;
492 let abs_end = data_base + end_rel;
493 if abs_end > bdat_off + bdat_total || abs_start > abs_end {
494 return None;
495 }
496 Some(&self.body[abs_start..abs_end])
497 }
498}
499
500#[derive(Debug, Clone, Copy, PartialEq, Eq)]
502pub enum BloomPrecheck {
503 Inapplicable,
505 NotInGraph,
507 FilterNotPresent,
509 DefinitelyNot,
511 Maybe,
513}
514
515#[derive(Debug, Default, Clone)]
517pub struct BloomWalkStats {
518 pub filter_not_present: u32,
519 pub maybe: u32,
520 pub definitely_not: u32,
521 pub false_positive: u32,
522}
523
524impl BloomWalkStats {
525 pub fn record_precheck(&mut self, pre: BloomPrecheck) {
526 match pre {
527 BloomPrecheck::Inapplicable | BloomPrecheck::NotInGraph => {}
528 BloomPrecheck::FilterNotPresent => self.filter_not_present += 1,
529 BloomPrecheck::DefinitelyNot => self.definitely_not += 1,
530 BloomPrecheck::Maybe => self.maybe += 1,
531 }
532 }
533
534 pub fn record_false_positive(&mut self) {
535 self.false_positive += 1;
536 }
537}
538
539pub type BloomWalkStatsHandle = Arc<Mutex<BloomWalkStats>>;
541
542#[derive(Debug, Clone)]
544pub struct CommitGraphChain {
545 layers: Vec<CommitGraphLayer>,
546}
547
548impl CommitGraphChain {
549 #[must_use]
551 pub fn top_layer_bloom_settings(&self) -> Option<BloomFilterSettings> {
552 self.layers.first()?.bloom_settings
553 }
554
555 #[must_use]
557 pub fn total_commits(&self) -> u32 {
558 self.layers.iter().map(|l| l.num_commits).sum()
559 }
560
561 #[must_use]
563 pub fn layer_paths_oldest_first(&self) -> Vec<PathBuf> {
564 self.layers.iter().rev().map(|l| l.path.clone()).collect()
565 }
566
567 #[must_use]
569 pub fn num_layers(&self) -> usize {
570 self.layers.len()
571 }
572
573 #[must_use]
575 pub fn layer_commit_counts_tip_first(&self) -> Vec<u32> {
576 self.layers.iter().map(|l| l.num_commits).collect()
577 }
578
579 #[must_use]
581 pub fn layer_has_generation_data_tip_first(&self) -> Vec<bool> {
582 self.layers
583 .iter()
584 .map(|l| l.chunk_generation_data.is_some())
585 .collect()
586 }
587
588 #[must_use]
590 pub fn layer_hashes_tip_first(&self) -> Vec<String> {
591 self.layers.iter().map(|l| l.layer_display_id()).collect()
592 }
593
594 #[must_use]
598 pub fn layer_object_dirs_tip_first(&self) -> Vec<PathBuf> {
599 self.layers
600 .iter()
601 .map(|l| {
602 let p = l.path.as_path();
605 let info = if p
606 .parent()
607 .and_then(|d| d.file_name())
608 .map(|n| n == "commit-graphs")
609 .unwrap_or(false)
610 {
611 p.parent().and_then(|d| d.parent())
612 } else {
613 p.parent()
614 };
615 info.and_then(|d| d.parent())
616 .map(Path::to_path_buf)
617 .unwrap_or_else(|| PathBuf::from("."))
618 })
619 .collect()
620 }
621
622 #[must_use]
626 pub fn sub_chain_tip_first(&self, start: usize, end: usize) -> Option<Self> {
627 let end = end.min(self.layers.len());
628 if start >= end {
629 return None;
630 }
631 Some(Self {
632 layers: self.layers[start..end].to_vec(),
633 })
634 }
635
636 #[must_use]
638 pub fn layer_oids(&self, tip_first_idx: usize) -> Vec<ObjectId> {
639 let Some(layer) = self.layers.get(tip_first_idx) else {
640 return Vec::new();
641 };
642 let mut out = Vec::with_capacity(layer.num_commits as usize);
643 for i in 0..layer.num_commits {
644 if let Some(oid) = layer.oid_at_lex(i) {
645 out.push(oid);
646 }
647 }
648 out
649 }
650
651 pub fn try_load(objects_dir: &Path) -> Result<Option<Self>, Error> {
656 let info = objects_dir.join("info");
657 let chain_path = info.join("commit-graphs").join("commit-graph-chain");
658 if chain_path.is_file() {
659 let content = std::fs::read_to_string(&chain_path).map_err(Error::from)?;
660 let mut layers = Vec::new();
661 for line in content.lines() {
662 let h = line.trim();
663 if h.len() != 40 {
664 continue;
665 }
666 let graph_path = info.join("commit-graphs").join(format!("graph-{h}.graph"));
667 let raw = std::fs::read(&graph_path).map_err(Error::from)?;
668 let layer = CommitGraphLayer::try_parse(graph_path, raw)?;
669 let n = layers.len();
676 if n > 0 && layer.base_chunk_size / layer.hash_len < n {
677 if warn_once_for_base_chunk_too_small(&layer.layer_display_id()) {
678 eprintln!("warning: commit-graph base graphs chunk is too small");
679 }
680 break;
681 }
682 layers.push(layer);
683 }
684 if layers.is_empty() {
685 return Ok(None);
686 }
687 layers.reverse();
691 let mut chain = Self { layers };
692 chain.validate_bloom_compatibility();
693 return Ok(Some(chain));
694 }
695 let single = info.join("commit-graph");
696 if single.is_file() {
697 let raw = std::fs::read(&single).map_err(Error::from)?;
698 let layer = CommitGraphLayer::try_parse(single.clone(), raw)?;
699 let mut chain = Self {
700 layers: vec![layer],
701 };
702 chain.validate_bloom_compatibility();
703 return Ok(Some(chain));
704 }
705 Ok(None)
706 }
707
708 pub fn load(objects_dir: &Path) -> Option<Self> {
710 Self::try_load(objects_dir).ok().flatten()
711 }
712
713 pub fn try_load_across(
737 objects_dir: &Path,
738 alt_dirs: &[PathBuf],
739 ) -> Result<Option<Self>, Error> {
740 let layer_dir = |dir: &Path| dir.join("info").join("commit-graphs");
741 let resolve_layer = |hash: &str| -> Option<PathBuf> {
742 let name = format!("graph-{hash}.graph");
743 let local = layer_dir(objects_dir).join(&name);
744 if local.is_file() {
745 return Some(local);
746 }
747 alt_dirs
748 .iter()
749 .map(|d| layer_dir(d).join(&name))
750 .find(|p| p.is_file())
751 };
752
753 let local_chain = layer_dir(objects_dir).join("commit-graph-chain");
761 let local_single = objects_dir.join("info").join("commit-graph");
762 let chain_path = if local_chain.is_file() {
763 local_chain
764 } else if local_single.is_file() {
765 return Self::try_load(objects_dir);
766 } else {
767 match alt_dirs
768 .iter()
769 .map(PathBuf::as_path)
770 .find(|d| layer_dir(d).join("commit-graph-chain").is_file())
771 {
772 Some(owner) => layer_dir(owner).join("commit-graph-chain"),
773 None => return Ok(None),
774 }
775 };
776
777 let content = std::fs::read_to_string(&chain_path).map_err(Error::from)?;
778 let mut layers = Vec::new();
779 for line in content.lines() {
780 let h = line.trim();
781 if h.len() != 40 {
782 continue;
783 }
784 let graph_path = resolve_layer(h).ok_or_else(|| {
785 Error::from(std::io::Error::new(
786 std::io::ErrorKind::NotFound,
787 format!("commit-graph layer graph-{h}.graph not found"),
788 ))
789 })?;
790 let raw = std::fs::read(&graph_path).map_err(Error::from)?;
791 let layer = CommitGraphLayer::try_parse(graph_path, raw)?;
792 let n = layers.len();
793 if n > 0 && layer.base_chunk_size / layer.hash_len < n {
794 if warn_once_for_base_chunk_too_small(&layer.layer_display_id()) {
795 eprintln!("warning: commit-graph base graphs chunk is too small");
796 }
797 break;
798 }
799 layers.push(layer);
800 }
801 if layers.is_empty() {
802 return Ok(None);
803 }
804 layers.reverse();
805 let mut chain = Self { layers };
806 chain.validate_bloom_compatibility();
807 Ok(Some(chain))
808 }
809
810 fn validate_bloom_compatibility(&mut self) {
811 let mut ref_settings: Option<BloomFilterSettings> = None;
817 for layer in &mut self.layers {
818 let Some(bs) = layer.bloom_settings else {
819 continue;
820 };
821 match ref_settings {
822 None => ref_settings = Some(bs),
823 Some(r) => {
824 if r.hash_version != bs.hash_version
825 || r.num_hashes != bs.num_hashes
826 || r.bits_per_entry != bs.bits_per_entry
827 {
828 let id = layer.layer_display_id();
829 if warn_once_for_disabled_bloom_layer(&id) {
835 eprintln!(
836 "warning: disabling Bloom filters for commit-graph layer '{id}' due to incompatible settings"
837 );
838 }
839 layer.disable_bloom();
840 }
841 }
842 }
843 }
844 }
845
846 pub fn existing_filter_bytes(
852 &self,
853 oid: &ObjectId,
854 want: &BloomFilterSettings,
855 ) -> Option<Vec<u8>> {
856 let (layer_idx, lex) = self.find_commit(oid)?;
857 let layer = &self.layers[layer_idx];
858 let settings = layer.bloom_settings.as_ref()?;
859 if settings.hash_version != want.hash_version
860 || settings.num_hashes != want.num_hashes
861 || settings.bits_per_entry != want.bits_per_entry
862 {
863 return None;
864 }
865 match layer.bloom_filter_slice(lex) {
872 Some(s) if !s.is_empty() => Some(s.to_vec()),
873 _ => None,
874 }
875 }
876
877 pub fn upgradable_filter_bytes(
883 &self,
884 oid: &ObjectId,
885 want: &BloomFilterSettings,
886 ) -> Option<Vec<u8>> {
887 let (layer_idx, lex) = self.find_commit(oid)?;
888 let layer = &self.layers[layer_idx];
889 let settings = layer.bloom_settings.as_ref()?;
890 if settings.hash_version == want.hash_version {
891 return None;
892 }
893 if settings.num_hashes != want.num_hashes || settings.bits_per_entry != want.bits_per_entry
894 {
895 return None;
896 }
897 match layer.bloom_filter_slice(lex) {
898 Some(s) if !s.is_empty() => Some(s.to_vec()),
899 _ => None,
900 }
901 }
902
903 pub fn find_commit(&self, oid: &ObjectId) -> Option<(usize, u32)> {
905 for (i, layer) in self.layers.iter().enumerate() {
906 if let Some(lex) = layer.bsearch_oid(oid) {
907 return Some((i, lex));
908 }
909 }
910 None
911 }
912
913 pub fn global_position(&self, oid: &ObjectId) -> Option<u32> {
915 let (layer_idx, lex) = self.find_commit(oid)?;
916 let below: u32 = self.layers[layer_idx + 1..]
917 .iter()
918 .map(|l| l.num_commits)
919 .sum();
920 Some(below + lex)
921 }
922
923 pub fn all_oids_in_order(&self) -> Vec<ObjectId> {
925 let mut out = Vec::new();
926 for layer in self.layers.iter().rev() {
927 for i in 0..layer.num_commits {
928 if let Some(oid) = layer.oid_at_lex(i) {
929 out.push(oid);
930 }
931 }
932 }
933 out
934 }
935
936 pub fn bloom_precheck_for_paths(
938 &self,
939 _odb: &Odb,
940 oid: ObjectId,
941 pathspecs: &[String],
942 bloom_cwd: Option<&str>,
943 requested_hash_version: i32,
944 read_changed_paths: bool,
945 ) -> std::result::Result<BloomPrecheck, crate::error::Error> {
946 if !read_changed_paths {
947 return Ok(BloomPrecheck::Inapplicable);
948 }
949 let Some((layer_idx, lex)) = self.find_commit(&oid) else {
950 return Ok(BloomPrecheck::NotInGraph);
951 };
952 let layer = &self.layers[layer_idx];
953 let Some(settings) = layer.bloom_settings.as_ref() else {
954 return Ok(BloomPrecheck::FilterNotPresent);
955 };
956 let effective_version = if requested_hash_version < 0 {
957 settings.hash_version as i32
958 } else {
959 requested_hash_version
960 };
961 if effective_version != settings.hash_version as i32 {
962 return Ok(BloomPrecheck::FilterNotPresent);
963 }
964
965 let filter = match layer.bloom_filter_slice(lex) {
971 Some(s) => s,
972 None => return Ok(BloomPrecheck::FilterNotPresent),
973 };
974 if filter.is_empty() {
975 return Ok(BloomPrecheck::FilterNotPresent);
976 }
977
978 let mut any_pathspec_maybe = false;
981 let mut checked_any_keys = false;
982 for spec in pathspecs {
983 if spec.is_empty() || crate::pathspec::pathspec_is_exclude(spec) {
984 continue;
985 }
986 let Some(norm) = crate::pathspec::bloom_lookup_prefix_with_cwd(spec, bloom_cwd) else {
987 continue;
988 };
989 let keys = bloom_keyvec_for_path(norm.as_str(), settings);
990 if keys.is_empty() {
991 continue;
992 }
993 checked_any_keys = true;
994 let mut all_keys_maybe = true;
995 for key in &keys {
996 match bloom_filter_contains(key, filter, settings) {
997 Ok(true) => {}
998 Ok(false) => {
999 all_keys_maybe = false;
1000 break;
1001 }
1002 Err(()) => {
1003 all_keys_maybe = true;
1004 break;
1005 }
1006 }
1007 }
1008 if all_keys_maybe {
1009 any_pathspec_maybe = true;
1010 break;
1011 }
1012 }
1013 if checked_any_keys && !any_pathspec_maybe {
1014 return Ok(BloomPrecheck::DefinitelyNot);
1015 }
1016 Ok(BloomPrecheck::Maybe)
1017 }
1018}
1019
1020pub fn diff_changed_paths_for_bloom(
1022 odb: &Odb,
1023 parent_tree: Option<ObjectId>,
1024 commit_tree: ObjectId,
1025) -> crate::error::Result<(Vec<String>, usize)> {
1026 use crate::diff::diff_trees;
1027 let entries = diff_trees(odb, parent_tree.as_ref(), Some(&commit_tree), "")?;
1028 let raw_len = entries.len();
1029 let mut paths = Vec::new();
1030 for e in entries {
1031 let p = e.path().to_string();
1032 if !p.is_empty() {
1033 paths.push(p);
1034 }
1035 }
1036 Ok((paths, raw_len))
1037}
1038
1039pub use crate::bloom::collect_changed_paths_for_bloom;
1041
1042pub fn bloom_filter_for_commit_write(
1044 odb: &Odb,
1045 parents: &[ObjectId],
1046 tree_oid: ObjectId,
1047 settings: &BloomFilterSettings,
1048) -> crate::error::Result<(Vec<u8>, BloomBuildOutcome)> {
1049 let (changed_paths_vec, raw_count) = if let Some(first_parent) = parents.first() {
1053 let p = load_commit_tree(odb, *first_parent)?;
1054 diff_changed_paths_for_bloom(odb, Some(p), tree_oid)?
1055 } else {
1056 diff_changed_paths_for_bloom(odb, None, tree_oid)?
1057 };
1058 let set = collect_changed_paths_for_bloom(&changed_paths_vec);
1059 Ok(crate::bloom::build_bloom_filter_data(
1060 &set, raw_count, settings,
1061 ))
1062}
1063
1064fn load_commit_tree(odb: &Odb, commit_oid: ObjectId) -> crate::error::Result<ObjectId> {
1065 let obj = odb.read(&commit_oid)?;
1066 let c = crate::objects::parse_commit(&obj.data)?;
1067 Ok(c.tree)
1068}
1069
1070fn tree_has_high_bit_paths(odb: &Odb, tree_oid: ObjectId) -> bool {
1075 let Ok(obj) = odb.read(&tree_oid) else {
1076 return true;
1078 };
1079 let Ok(entries) = crate::objects::parse_tree(&obj.data) else {
1080 return true;
1081 };
1082 for e in &entries {
1083 if e.name.iter().any(|&b| b & 0x80 != 0) {
1084 return true;
1085 }
1086 if e.mode == 0o040000 && tree_has_high_bit_paths(odb, e.oid) {
1087 return true;
1088 }
1089 }
1090 false
1091}
1092
1093pub fn commit_tree_has_high_bit_paths(odb: &Odb, commit_oid: ObjectId) -> bool {
1096 match load_commit_tree(odb, commit_oid) {
1097 Ok(tree) => tree_has_high_bit_paths(odb, tree),
1098 Err(_) => true,
1099 }
1100}
1101
1102pub fn parse_graph_file(path: &Path) -> Option<ParsedGraphDump> {
1104 let raw = std::fs::read(path).ok()?;
1105 if raw.len() < 28 {
1106 return None;
1107 }
1108 let hash_len = commit_graph_hash_len(raw[5])?;
1109 let body = &raw[..raw.len() - hash_len];
1110 if body.len() < 8 || &body[0..4] != SIGNATURE {
1111 return None;
1112 }
1113 let header_word = u32::from_be_bytes(body[0..4].try_into().ok()?);
1114 let num_chunks = body[6] as usize;
1115 let toc_start = 8;
1116 let mut present: std::collections::HashSet<u32> = std::collections::HashSet::new();
1117 for i in 0..num_chunks {
1118 let e = toc_start + i * 12;
1119 let id = u32::from_be_bytes(body[e..e + 4].try_into().ok()?);
1120 present.insert(id);
1121 }
1122 let mut chunk_names: Vec<String> = Vec::new();
1125 for (id, label) in [
1126 (CHUNK_OID_FANOUT, "oid_fanout"),
1127 (CHUNK_OID_LOOKUP, "oid_lookup"),
1128 (CHUNK_COMMIT_DATA, "commit_metadata"),
1129 (CHUNK_GENERATION_DATA, "generation_data"),
1130 (CHUNK_GENERATION_DATA_OVERFLOW, "generation_data_overflow"),
1131 (CHUNK_EXTRA_EDGES, "extra_edges"),
1132 (CHUNK_BLOOM_INDEXES, "bloom_indexes"),
1133 (CHUNK_BLOOM_DATA, "bloom_data"),
1134 ] {
1135 if present.contains(&id) {
1136 chunk_names.push(label.to_string());
1137 }
1138 }
1139 let layer = CommitGraphLayer::parse(path.to_path_buf(), raw.clone())?;
1140 let bloom_opt = layer.bloom_settings.map(|s| {
1141 format!(
1142 " bloom({},{},{})",
1143 s.hash_version, s.bits_per_entry, s.num_hashes
1144 )
1145 });
1146 let mut options = String::new();
1147 if let Some(b) = bloom_opt {
1148 options.push_str(&b);
1149 }
1150 if layer.read_generation_data {
1151 options.push_str(" read_generation_data");
1152 }
1153 Some(ParsedGraphDump {
1154 header_word,
1155 version: body[4],
1156 hash_ver: body[5],
1157 num_chunks: body[6],
1158 reserved: body[7],
1159 num_commits: layer.num_commits,
1160 chunks: chunk_names.join(" "),
1161 options,
1162 })
1163}
1164
1165pub struct ParsedGraphDump {
1166 pub header_word: u32,
1167 pub version: u8,
1168 pub hash_ver: u8,
1169 pub num_chunks: u8,
1170 pub reserved: u8,
1171 pub num_commits: u32,
1172 pub chunks: String,
1173 pub options: String,
1174}
1175
1176pub fn dump_bloom_filters(path: &Path) -> Option<Vec<String>> {
1178 let raw = std::fs::read(path).ok()?;
1179 let layer = CommitGraphLayer::parse(path.to_path_buf(), raw)?;
1180 let mut out = Vec::new();
1181 for i in 0..layer.num_commits {
1182 let slice = layer.bloom_filter_slice(i).unwrap_or(&[]);
1183 if slice.is_empty() {
1184 out.push(String::new());
1185 } else {
1186 let hex: String = slice.iter().map(|b| format!("{b:02x}")).collect();
1187 out.push(hex);
1188 }
1189 }
1190 Some(out)
1191}