1use std::path::{Path, PathBuf};
4use std::sync::{Arc, Mutex};
5
6use crate::bloom::{
7 bloom_filter_contains, bloom_keyvec_for_path, BloomBuildOutcome, BloomFilterSettings,
8};
9use crate::error::Error;
10use crate::objects::ObjectId;
11use crate::odb::Odb;
12
13fn warn_once_for_disabled_bloom_layer(id: &str) -> bool {
17 use std::collections::HashSet;
18 use std::sync::OnceLock;
19 static SEEN: OnceLock<Mutex<HashSet<String>>> = OnceLock::new();
20 let set = SEEN.get_or_init(|| Mutex::new(HashSet::new()));
21 match set.lock() {
22 Ok(mut guard) => guard.insert(id.to_string()),
23 Err(_) => true,
24 }
25}
26
27fn warn_once_for_base_chunk_too_small(id: &str) -> bool {
30 use std::collections::HashSet;
31 use std::sync::OnceLock;
32 static SEEN: OnceLock<Mutex<HashSet<String>>> = OnceLock::new();
33 let set = SEEN.get_or_init(|| Mutex::new(HashSet::new()));
34 match set.lock() {
35 Ok(mut guard) => guard.insert(id.to_string()),
36 Err(_) => true,
37 }
38}
39
40const SIGNATURE: &[u8; 4] = b"CGPH";
41const GRAPH_VERSION: u8 = 1;
42const HASH_VERSION_SHA1: u8 = 1;
43const HASH_LEN: usize = 20;
44
45const CHUNK_OID_FANOUT: u32 = 0x4f49_4446; const CHUNK_OID_LOOKUP: u32 = 0x4f49_444c; const CHUNK_COMMIT_DATA: u32 = 0x4344_4154; const CHUNK_GENERATION_DATA: u32 = 0x4744_4132; const CHUNK_GENERATION_DATA_OVERFLOW: u32 = 0x4744_4f32; const CHUNK_EXTRA_EDGES: u32 = 0x4544_4745; const CHUNK_BLOOM_INDEXES: u32 = 0x4249_4458; const CHUNK_BLOOM_DATA: u32 = 0x4244_4154; const CHUNK_BASE_GRAPHS: u32 = 0x4241_5345; const BLOOM_HEADER: usize = crate::bloom::BLOOMDATA_HEADER_LEN;
56
57const CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW: u32 = 1u32 << 31;
59
60fn warn_path_for_graph_file(path: &Path) -> String {
61 let s = path.to_string_lossy();
62 if let Some(idx) = s.find(".git/") {
63 return s[idx..].replace('\\', "/");
64 }
65 s.replace('\\', "/")
66}
67
68#[derive(Debug, Clone)]
70pub struct CommitGraphLayer {
71 pub path: PathBuf,
72 body: Vec<u8>,
73 num_commits: u32,
74 oid_lookup_off: usize,
75 #[allow(dead_code)]
76 chunk_commit_data_off: usize,
77 #[allow(dead_code)]
78 chunk_generation_data: Option<usize>,
79 read_generation_data: bool,
80 chunk_bloom_indexes: Option<usize>,
81 chunk_bloom_data: Option<(usize, usize)>,
82 bloom_settings: Option<BloomFilterSettings>,
83 bloom_disabled: bool,
84 base_layers_declared: u32,
86 base_chunk_size: usize,
89}
90
91impl CommitGraphLayer {
92 pub fn try_parse(path: PathBuf, raw: Vec<u8>) -> Result<Self, Error> {
94 if raw.len() < 28 {
95 return Err(Error::CorruptObject(
96 "commit-graph file too small".to_owned(),
97 ));
98 }
99 let body = raw[..raw.len() - HASH_LEN].to_vec();
100 if body.len() < 8 || &body[0..4] != SIGNATURE {
101 return Err(Error::CorruptObject(
102 "commit-graph has bad signature".to_owned(),
103 ));
104 }
105 if body[4] != GRAPH_VERSION || body[5] != HASH_VERSION_SHA1 {
106 return Err(Error::CorruptObject(format!(
107 "commit-graph version/hash not supported (version {} hash {})",
108 body[4], body[5]
109 )));
110 }
111 let num_chunks = body[6] as usize;
112 let toc_start = 8;
113 let toc_end = toc_start + (num_chunks + 1) * 12;
114 if body.len() < toc_end {
115 return Err(Error::CorruptObject(
116 "commit-graph truncated at chunk table".to_owned(),
117 ));
118 }
119
120 let mut fanout_off = None;
121 let mut oid_lookup_off = None;
122 let mut commit_data_off = None;
123 let mut generation_off = None;
124 let mut generation_overflow_off = None;
125 let mut bloom_idx_off = None;
126 let mut bloom_data_range = None;
127 let mut base_graphs_off = None;
128 let mut chunk_offsets: Vec<usize> = Vec::new();
129 let mut toc_entries: Vec<(u32, usize)> = Vec::with_capacity(num_chunks);
130
131 for i in 0..num_chunks {
132 let e = toc_start + i * 12;
133 let id = u32::from_be_bytes(
134 body[e..e + 4]
135 .try_into()
136 .map_err(|_| Error::CorruptObject("commit-graph bad TOC".to_owned()))?,
137 );
138 let off = u64::from_be_bytes(
139 body[e + 4..e + 12]
140 .try_into()
141 .map_err(|_| Error::CorruptObject("commit-graph bad TOC".to_owned()))?,
142 ) as usize;
143 toc_entries.push((id, off));
144 chunk_offsets.push(off);
145 match id {
146 CHUNK_OID_FANOUT => fanout_off = Some(off),
147 CHUNK_OID_LOOKUP => oid_lookup_off = Some(off),
148 CHUNK_COMMIT_DATA => commit_data_off = Some(off),
149 CHUNK_GENERATION_DATA => generation_off = Some(off),
150 CHUNK_GENERATION_DATA_OVERFLOW => generation_overflow_off = Some(off),
151 CHUNK_BLOOM_INDEXES => bloom_idx_off = Some(off),
152 CHUNK_BASE_GRAPHS => base_graphs_off = Some(off),
153 CHUNK_BLOOM_DATA => {
154 let end = if i + 1 < num_chunks {
155 let e2 = toc_start + (i + 1) * 12;
156 u64::from_be_bytes(body[e2 + 4..e2 + 12].try_into().unwrap_or([0u8; 8]))
157 as usize
158 } else {
159 let term = toc_start + num_chunks * 12;
160 u64::from_be_bytes(body[term + 4..term + 12].try_into().unwrap_or([0u8; 8]))
161 as usize
162 };
163 bloom_data_range = Some((off, end.saturating_sub(off)));
164 }
165 _ => {}
166 }
167 }
168 let file_end = u64::from_be_bytes(
169 body[toc_start + num_chunks * 12 + 4..toc_start + num_chunks * 12 + 12]
170 .try_into()
171 .map_err(|_| Error::CorruptObject("commit-graph bad file end".to_owned()))?,
172 ) as usize;
173 chunk_offsets.push(file_end);
174 chunk_offsets.sort_unstable();
175 chunk_offsets.dedup();
176
177 fn chunk_byte_range(
178 start: usize,
179 toc_entries: &[(u32, usize)],
180 file_end: usize,
181 ) -> Result<usize, Error> {
182 let mut ends: Vec<usize> = toc_entries
183 .iter()
184 .map(|&(_, o)| o)
185 .filter(|&o| o > start)
186 .collect();
187 ends.sort_unstable();
188 let end = ends.first().copied().unwrap_or(file_end);
189 if end < start {
190 return Err(Error::CorruptObject(
191 "commit-graph chunk layout invalid".to_owned(),
192 ));
193 }
194 Ok(end)
195 }
196
197 if let Some(gda) = generation_off {
198 let gda_end = chunk_byte_range(gda, &toc_entries, file_end)?;
199 let gda_len = gda_end.saturating_sub(gda);
200 let num_commits = fanout_off
201 .and_then(|fo| {
202 let slice = body.get(fo + 255 * 4..fo + 256 * 4)?;
203 Some(u32::from_be_bytes(slice.try_into().ok()?))
204 })
205 .ok_or_else(|| Error::CorruptObject("commit-graph missing fanout".to_owned()))?;
206 let expected = num_commits as usize * 4;
207 if gda_len < expected {
208 return Err(Error::CorruptObject(
209 "commit-graph generation data chunk is too small".to_owned(),
210 ));
211 }
212 let gda_slice = body.get(gda..gda + expected).ok_or_else(|| {
213 Error::CorruptObject("commit-graph generation data OOB".to_owned())
214 })?;
215 let mut max_overflow_idx: Option<u32> = None;
216 for w in 0..num_commits as usize {
217 let v =
218 u32::from_be_bytes(gda_slice[w * 4..w * 4 + 4].try_into().map_err(|_| {
219 Error::CorruptObject("commit-graph GDA2 corrupt".to_owned())
220 })?);
221 if v & CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW != 0 {
222 let pos = v ^ CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW;
223 max_overflow_idx = Some(match max_overflow_idx {
224 None => pos,
225 Some(m) => m.max(pos),
226 });
227 }
228 }
229 if let Some(pos) = max_overflow_idx {
230 let Some(gdo_start) = generation_overflow_off else {
231 return Err(Error::CorruptObject(
232 "commit-graph requires overflow generation data but has none".to_owned(),
233 ));
234 };
235 let gdo_end = chunk_byte_range(gdo_start, &toc_entries, file_end)?;
236 let overflow_bytes = gdo_end.saturating_sub(gdo_start);
237 let n_slots = overflow_bytes / 8;
238 if n_slots <= pos as usize {
239 return Err(Error::CorruptObject(
240 "commit-graph overflow generation data is too small".to_owned(),
241 ));
242 }
243 }
244 }
245 let bidx_len = bloom_idx_off.and_then(|b| {
246 chunk_offsets
247 .iter()
248 .find(|&&o| o > b)
249 .map(|&next| next.saturating_sub(b))
250 });
251
252 let fanout_off = fanout_off.ok_or_else(|| {
253 Error::CorruptObject("commit-graph missing OID fanout chunk".to_owned())
254 })?;
255 let oid_lookup_off = oid_lookup_off.ok_or_else(|| {
256 Error::CorruptObject("commit-graph missing OID lookup chunk".to_owned())
257 })?;
258 let commit_data_off = commit_data_off.ok_or_else(|| {
259 Error::CorruptObject("commit-graph missing commit data chunk".to_owned())
260 })?;
261 if fanout_off + 256 * 4 > body.len() || oid_lookup_off + 4 > body.len() {
262 return Err(Error::CorruptObject(
263 "commit-graph chunk extends past end of file".to_owned(),
264 ));
265 }
266 let num_commits = u32::from_be_bytes(
267 body[fanout_off + 255 * 4..fanout_off + 256 * 4]
268 .try_into()
269 .map_err(|_| Error::CorruptObject("commit-graph fanout corrupt".to_owned()))?,
270 );
271 if oid_lookup_off + num_commits as usize * HASH_LEN > body.len() {
272 return Err(Error::CorruptObject(
273 "commit-graph OID lookup extends past end of file".to_owned(),
274 ));
275 }
276 let graph_data_width = HASH_LEN + 16;
277 if commit_data_off + num_commits as usize * graph_data_width > body.len() {
278 return Err(Error::CorruptObject(
279 "commit-graph commit data extends past end of file".to_owned(),
280 ));
281 }
282
283 let read_generation_data = generation_off.is_some();
284 let mut bloom_settings = None;
285 let mut chunk_bloom_data = None;
286 if let (Some(_bidx), Some((bdat_off, bdat_len))) = (bloom_idx_off, bloom_data_range) {
287 if bdat_len < BLOOM_HEADER {
288 eprintln!(
289 "warning: ignoring too-small changed-path chunk ({} < {}) in commit-graph file",
290 bdat_len, BLOOM_HEADER
291 );
292 } else if bdat_off + bdat_len <= body.len() {
293 let hdr = &body[bdat_off..bdat_off + BLOOM_HEADER];
294 let hash_version: [u8; 4] = hdr[0..4]
295 .try_into()
296 .map_err(|_| Error::CorruptObject("Bloom header corrupt".to_owned()))?;
297 let num_hashes: [u8; 4] = hdr[4..8]
298 .try_into()
299 .map_err(|_| Error::CorruptObject("Bloom header corrupt".to_owned()))?;
300 let bits_per_entry: [u8; 4] = hdr[8..12]
301 .try_into()
302 .map_err(|_| Error::CorruptObject("Bloom header corrupt".to_owned()))?;
303 bloom_settings = Some(BloomFilterSettings {
304 hash_version: u32::from_be_bytes(hash_version),
305 num_hashes: u32::from_be_bytes(num_hashes),
306 bits_per_entry: u32::from_be_bytes(bits_per_entry),
307 max_changed_paths: 512,
308 });
309 chunk_bloom_data = Some((bdat_off, bdat_len));
310 }
311 }
312
313 let bloom_indexes_ok = if let (Some(bidx), Some(bsize)) = (bloom_idx_off, bidx_len) {
314 if bsize / 4 != num_commits as usize {
315 eprintln!("warning: commit-graph changed-path index chunk is too small");
316 false
317 } else if bidx + bsize > body.len() {
318 eprintln!("warning: commit-graph changed-path index chunk is too small");
319 false
320 } else {
321 true
322 }
323 } else {
324 false
325 };
326 let bloom_pair_ok = bloom_settings.is_some()
327 && chunk_bloom_data.is_some()
328 && bloom_indexes_ok
329 && chunk_bloom_data.is_some_and(|(_, len)| len >= BLOOM_HEADER);
330
331 let (chunk_bloom_indexes, bloom_settings) = if bloom_pair_ok {
332 (bloom_idx_off, bloom_settings)
333 } else {
334 (None, None)
335 };
336 let chunk_bloom_data = if bloom_pair_ok {
337 chunk_bloom_data
338 } else {
339 None
340 };
341
342 let base_layers_declared = body[7] as u32;
343 let base_chunk_size = match base_graphs_off {
344 Some(off) => {
345 let end = chunk_byte_range(off, &toc_entries, file_end)?;
346 end.saturating_sub(off)
347 }
348 None => 0,
349 };
350
351 Ok(Self {
352 path,
353 body,
354 num_commits,
355 oid_lookup_off,
356 chunk_commit_data_off: commit_data_off,
357 chunk_generation_data: generation_off,
358 read_generation_data,
359 chunk_bloom_indexes,
360 chunk_bloom_data,
361 bloom_settings,
362 bloom_disabled: false,
363 base_layers_declared,
364 base_chunk_size,
365 })
366 }
367
368 fn parse(path: PathBuf, raw: Vec<u8>) -> Option<Self> {
369 Self::try_parse(path, raw).ok()
370 }
371
372 fn oid_at_lex(&self, lex_index: u32) -> Option<ObjectId> {
373 if lex_index >= self.num_commits {
374 return None;
375 }
376 let off = self.oid_lookup_off + lex_index as usize * HASH_LEN;
377 ObjectId::from_bytes(self.body.get(off..off + HASH_LEN)?.try_into().ok()?).ok()
378 }
379
380 fn bsearch_oid(&self, oid: &ObjectId) -> Option<u32> {
381 let mut lo = 0u32;
382 let mut hi = self.num_commits;
383 let bytes = oid.as_bytes();
384 while lo < hi {
385 let mid = (lo + hi) / 2;
386 let off = self.oid_lookup_off + mid as usize * HASH_LEN;
387 let slice = &self.body[off..off + HASH_LEN];
388 match slice.cmp(bytes) {
389 std::cmp::Ordering::Less => lo = mid + 1,
390 std::cmp::Ordering::Greater => hi = mid,
391 std::cmp::Ordering::Equal => return Some(mid),
392 }
393 }
394 None
395 }
396
397 fn disable_bloom(&mut self) {
398 self.chunk_bloom_indexes = None;
399 self.chunk_bloom_data = None;
400 self.bloom_settings = None;
401 self.bloom_disabled = true;
402 }
403
404 fn layer_display_id(&self) -> String {
405 self.path
406 .file_stem()
407 .and_then(|s| s.to_str())
408 .map(|s| s.strip_prefix("graph-").unwrap_or(s).to_string())
409 .unwrap_or_else(|| "commit-graph".to_string())
410 }
411
412 fn bloom_filter_slice(&self, lex_index: u32) -> Option<&[u8]> {
413 let _settings = self.bloom_settings.as_ref()?;
414 let bidx_base = self.chunk_bloom_indexes?;
415 let (bdat_off, bdat_total) = self.chunk_bloom_data?;
416 let graph_warn = warn_path_for_graph_file(self.path.as_path());
417 if lex_index >= self.num_commits {
418 return None;
419 }
420 let payload_len = bdat_total.saturating_sub(BLOOM_HEADER);
421 let end_rel = u32::from_be_bytes(
422 self.body[bidx_base + lex_index as usize * 4..bidx_base + lex_index as usize * 4 + 4]
423 .try_into()
424 .ok()?,
425 ) as usize;
426 let start_rel = if lex_index == 0 {
427 0usize
428 } else {
429 u32::from_be_bytes(
430 self.body[bidx_base + (lex_index as usize - 1) * 4
431 ..bidx_base + (lex_index as usize - 1) * 4 + 4]
432 .try_into()
433 .ok()?,
434 ) as usize
435 };
436 let max_payload = payload_len;
442 if end_rel > max_payload {
443 eprintln!(
444 "warning: ignoring out-of-range offset ({end_rel}) for changed-path filter at pos {} of {} (chunk size: {bdat_total})",
445 lex_index,
446 graph_warn,
447 bdat_total = bdat_total
448 );
449 return None;
450 }
451 if start_rel > max_payload {
452 eprintln!(
453 "warning: ignoring out-of-range offset ({start_rel}) for changed-path filter at pos {} of {} (chunk size: {bdat_total})",
454 lex_index.saturating_sub(1),
455 graph_warn,
456 bdat_total = bdat_total
457 );
458 return None;
459 }
460 if end_rel < start_rel {
461 eprintln!(
462 "warning: ignoring decreasing changed-path index offsets ({start_rel} > {end_rel}) for positions {} and {} of {}",
463 lex_index.saturating_sub(1),
464 lex_index,
465 graph_warn
466 );
467 return None;
468 }
469 let data_base = bdat_off + BLOOM_HEADER;
470 let abs_start = data_base + start_rel;
471 let abs_end = data_base + end_rel;
472 if abs_end > bdat_off + bdat_total || abs_start > abs_end {
473 return None;
474 }
475 Some(&self.body[abs_start..abs_end])
476 }
477}
478
479#[derive(Debug, Clone, Copy, PartialEq, Eq)]
481pub enum BloomPrecheck {
482 Inapplicable,
484 NotInGraph,
486 FilterNotPresent,
488 DefinitelyNot,
490 Maybe,
492}
493
494#[derive(Debug, Default, Clone)]
496pub struct BloomWalkStats {
497 pub filter_not_present: u32,
498 pub maybe: u32,
499 pub definitely_not: u32,
500 pub false_positive: u32,
501}
502
503impl BloomWalkStats {
504 pub fn record_precheck(&mut self, pre: BloomPrecheck) {
505 match pre {
506 BloomPrecheck::Inapplicable | BloomPrecheck::NotInGraph => {}
507 BloomPrecheck::FilterNotPresent => self.filter_not_present += 1,
508 BloomPrecheck::DefinitelyNot => self.definitely_not += 1,
509 BloomPrecheck::Maybe => self.maybe += 1,
510 }
511 }
512
513 pub fn record_false_positive(&mut self) {
514 self.false_positive += 1;
515 }
516}
517
518pub type BloomWalkStatsHandle = Arc<Mutex<BloomWalkStats>>;
520
521#[derive(Debug, Clone)]
523pub struct CommitGraphChain {
524 layers: Vec<CommitGraphLayer>,
525}
526
527impl CommitGraphChain {
528 #[must_use]
530 pub fn top_layer_bloom_settings(&self) -> Option<BloomFilterSettings> {
531 self.layers.first()?.bloom_settings
532 }
533
534 #[must_use]
536 pub fn total_commits(&self) -> u32 {
537 self.layers.iter().map(|l| l.num_commits).sum()
538 }
539
540 #[must_use]
542 pub fn layer_paths_oldest_first(&self) -> Vec<PathBuf> {
543 self.layers.iter().rev().map(|l| l.path.clone()).collect()
544 }
545
546 #[must_use]
548 pub fn num_layers(&self) -> usize {
549 self.layers.len()
550 }
551
552 #[must_use]
554 pub fn layer_commit_counts_tip_first(&self) -> Vec<u32> {
555 self.layers.iter().map(|l| l.num_commits).collect()
556 }
557
558 #[must_use]
560 pub fn layer_has_generation_data_tip_first(&self) -> Vec<bool> {
561 self.layers
562 .iter()
563 .map(|l| l.chunk_generation_data.is_some())
564 .collect()
565 }
566
567 #[must_use]
569 pub fn layer_hashes_tip_first(&self) -> Vec<String> {
570 self.layers.iter().map(|l| l.layer_display_id()).collect()
571 }
572
573 #[must_use]
577 pub fn layer_object_dirs_tip_first(&self) -> Vec<PathBuf> {
578 self.layers
579 .iter()
580 .map(|l| {
581 let p = l.path.as_path();
584 let info = if p
585 .parent()
586 .and_then(|d| d.file_name())
587 .map(|n| n == "commit-graphs")
588 .unwrap_or(false)
589 {
590 p.parent().and_then(|d| d.parent())
591 } else {
592 p.parent()
593 };
594 info.and_then(|d| d.parent())
595 .map(Path::to_path_buf)
596 .unwrap_or_else(|| PathBuf::from("."))
597 })
598 .collect()
599 }
600
601 #[must_use]
605 pub fn sub_chain_tip_first(&self, start: usize, end: usize) -> Option<Self> {
606 let end = end.min(self.layers.len());
607 if start >= end {
608 return None;
609 }
610 Some(Self {
611 layers: self.layers[start..end].to_vec(),
612 })
613 }
614
615 #[must_use]
617 pub fn layer_oids(&self, tip_first_idx: usize) -> Vec<ObjectId> {
618 let Some(layer) = self.layers.get(tip_first_idx) else {
619 return Vec::new();
620 };
621 let mut out = Vec::with_capacity(layer.num_commits as usize);
622 for i in 0..layer.num_commits {
623 if let Some(oid) = layer.oid_at_lex(i) {
624 out.push(oid);
625 }
626 }
627 out
628 }
629
630 pub fn try_load(objects_dir: &Path) -> Result<Option<Self>, Error> {
635 let info = objects_dir.join("info");
636 let chain_path = info.join("commit-graphs").join("commit-graph-chain");
637 if chain_path.is_file() {
638 let content = std::fs::read_to_string(&chain_path).map_err(Error::from)?;
639 let mut layers = Vec::new();
640 for line in content.lines() {
641 let h = line.trim();
642 if h.len() != 40 {
643 continue;
644 }
645 let graph_path = info.join("commit-graphs").join(format!("graph-{h}.graph"));
646 let raw = std::fs::read(&graph_path).map_err(Error::from)?;
647 let layer = CommitGraphLayer::try_parse(graph_path, raw)?;
648 let n = layers.len();
655 if n > 0 && layer.base_chunk_size / HASH_LEN < n {
656 if warn_once_for_base_chunk_too_small(&layer.layer_display_id()) {
657 eprintln!("warning: commit-graph base graphs chunk is too small");
658 }
659 break;
660 }
661 layers.push(layer);
662 }
663 if layers.is_empty() {
664 return Ok(None);
665 }
666 layers.reverse();
670 let mut chain = Self { layers };
671 chain.validate_bloom_compatibility();
672 return Ok(Some(chain));
673 }
674 let single = info.join("commit-graph");
675 if single.is_file() {
676 let raw = std::fs::read(&single).map_err(Error::from)?;
677 let layer = CommitGraphLayer::try_parse(single.clone(), raw)?;
678 let mut chain = Self {
679 layers: vec![layer],
680 };
681 chain.validate_bloom_compatibility();
682 return Ok(Some(chain));
683 }
684 Ok(None)
685 }
686
687 pub fn load(objects_dir: &Path) -> Option<Self> {
689 Self::try_load(objects_dir).ok().flatten()
690 }
691
692 pub fn try_load_across(
716 objects_dir: &Path,
717 alt_dirs: &[PathBuf],
718 ) -> Result<Option<Self>, Error> {
719 let layer_dir = |dir: &Path| dir.join("info").join("commit-graphs");
720 let resolve_layer = |hash: &str| -> Option<PathBuf> {
721 let name = format!("graph-{hash}.graph");
722 let local = layer_dir(objects_dir).join(&name);
723 if local.is_file() {
724 return Some(local);
725 }
726 alt_dirs
727 .iter()
728 .map(|d| layer_dir(d).join(&name))
729 .find(|p| p.is_file())
730 };
731
732 let local_chain = layer_dir(objects_dir).join("commit-graph-chain");
740 let local_single = objects_dir.join("info").join("commit-graph");
741 let chain_path = if local_chain.is_file() {
742 local_chain
743 } else if local_single.is_file() {
744 return Self::try_load(objects_dir);
745 } else {
746 match alt_dirs
747 .iter()
748 .map(PathBuf::as_path)
749 .find(|d| layer_dir(d).join("commit-graph-chain").is_file())
750 {
751 Some(owner) => layer_dir(owner).join("commit-graph-chain"),
752 None => return Ok(None),
753 }
754 };
755
756 let content = std::fs::read_to_string(&chain_path).map_err(Error::from)?;
757 let mut layers = Vec::new();
758 for line in content.lines() {
759 let h = line.trim();
760 if h.len() != 40 {
761 continue;
762 }
763 let graph_path = resolve_layer(h).ok_or_else(|| {
764 Error::from(std::io::Error::new(
765 std::io::ErrorKind::NotFound,
766 format!("commit-graph layer graph-{h}.graph not found"),
767 ))
768 })?;
769 let raw = std::fs::read(&graph_path).map_err(Error::from)?;
770 let layer = CommitGraphLayer::try_parse(graph_path, raw)?;
771 let n = layers.len();
772 if n > 0 && layer.base_chunk_size / HASH_LEN < n {
773 if warn_once_for_base_chunk_too_small(&layer.layer_display_id()) {
774 eprintln!("warning: commit-graph base graphs chunk is too small");
775 }
776 break;
777 }
778 layers.push(layer);
779 }
780 if layers.is_empty() {
781 return Ok(None);
782 }
783 layers.reverse();
784 let mut chain = Self { layers };
785 chain.validate_bloom_compatibility();
786 Ok(Some(chain))
787 }
788
789 fn validate_bloom_compatibility(&mut self) {
790 let mut ref_settings: Option<BloomFilterSettings> = None;
796 for layer in &mut self.layers {
797 let Some(bs) = layer.bloom_settings else {
798 continue;
799 };
800 match ref_settings {
801 None => ref_settings = Some(bs),
802 Some(r) => {
803 if r.hash_version != bs.hash_version
804 || r.num_hashes != bs.num_hashes
805 || r.bits_per_entry != bs.bits_per_entry
806 {
807 let id = layer.layer_display_id();
808 if warn_once_for_disabled_bloom_layer(&id) {
814 eprintln!(
815 "warning: disabling Bloom filters for commit-graph layer '{id}' due to incompatible settings"
816 );
817 }
818 layer.disable_bloom();
819 }
820 }
821 }
822 }
823 }
824
825 pub fn existing_filter_bytes(
831 &self,
832 oid: &ObjectId,
833 want: &BloomFilterSettings,
834 ) -> Option<Vec<u8>> {
835 let (layer_idx, lex) = self.find_commit(oid)?;
836 let layer = &self.layers[layer_idx];
837 let settings = layer.bloom_settings.as_ref()?;
838 if settings.hash_version != want.hash_version
839 || settings.num_hashes != want.num_hashes
840 || settings.bits_per_entry != want.bits_per_entry
841 {
842 return None;
843 }
844 match layer.bloom_filter_slice(lex) {
851 Some(s) if !s.is_empty() => Some(s.to_vec()),
852 _ => None,
853 }
854 }
855
856 pub fn upgradable_filter_bytes(
862 &self,
863 oid: &ObjectId,
864 want: &BloomFilterSettings,
865 ) -> Option<Vec<u8>> {
866 let (layer_idx, lex) = self.find_commit(oid)?;
867 let layer = &self.layers[layer_idx];
868 let settings = layer.bloom_settings.as_ref()?;
869 if settings.hash_version == want.hash_version {
870 return None;
871 }
872 if settings.num_hashes != want.num_hashes || settings.bits_per_entry != want.bits_per_entry
873 {
874 return None;
875 }
876 match layer.bloom_filter_slice(lex) {
877 Some(s) if !s.is_empty() => Some(s.to_vec()),
878 _ => None,
879 }
880 }
881
882 pub fn find_commit(&self, oid: &ObjectId) -> Option<(usize, u32)> {
884 for (i, layer) in self.layers.iter().enumerate() {
885 if let Some(lex) = layer.bsearch_oid(oid) {
886 return Some((i, lex));
887 }
888 }
889 None
890 }
891
892 pub fn global_position(&self, oid: &ObjectId) -> Option<u32> {
894 let (layer_idx, lex) = self.find_commit(oid)?;
895 let below: u32 = self.layers[layer_idx + 1..]
896 .iter()
897 .map(|l| l.num_commits)
898 .sum();
899 Some(below + lex)
900 }
901
902 pub fn all_oids_in_order(&self) -> Vec<ObjectId> {
904 let mut out = Vec::new();
905 for layer in self.layers.iter().rev() {
906 for i in 0..layer.num_commits {
907 if let Some(oid) = layer.oid_at_lex(i) {
908 out.push(oid);
909 }
910 }
911 }
912 out
913 }
914
915 pub fn bloom_precheck_for_paths(
917 &self,
918 _odb: &Odb,
919 oid: ObjectId,
920 pathspecs: &[String],
921 bloom_cwd: Option<&str>,
922 requested_hash_version: i32,
923 read_changed_paths: bool,
924 ) -> std::result::Result<BloomPrecheck, crate::error::Error> {
925 if !read_changed_paths {
926 return Ok(BloomPrecheck::Inapplicable);
927 }
928 let Some((layer_idx, lex)) = self.find_commit(&oid) else {
929 return Ok(BloomPrecheck::NotInGraph);
930 };
931 let layer = &self.layers[layer_idx];
932 let Some(settings) = layer.bloom_settings.as_ref() else {
933 return Ok(BloomPrecheck::FilterNotPresent);
934 };
935 let effective_version = if requested_hash_version < 0 {
936 settings.hash_version as i32
937 } else {
938 requested_hash_version
939 };
940 if effective_version != settings.hash_version as i32 {
941 return Ok(BloomPrecheck::FilterNotPresent);
942 }
943
944 let filter = match layer.bloom_filter_slice(lex) {
950 Some(s) => s,
951 None => return Ok(BloomPrecheck::FilterNotPresent),
952 };
953 if filter.is_empty() {
954 return Ok(BloomPrecheck::FilterNotPresent);
955 }
956
957 let mut any_pathspec_maybe = false;
960 let mut checked_any_keys = false;
961 for spec in pathspecs {
962 if spec.is_empty() || crate::pathspec::pathspec_is_exclude(spec) {
963 continue;
964 }
965 let Some(norm) = crate::pathspec::bloom_lookup_prefix_with_cwd(spec, bloom_cwd) else {
966 continue;
967 };
968 let keys = bloom_keyvec_for_path(norm.as_str(), settings);
969 if keys.is_empty() {
970 continue;
971 }
972 checked_any_keys = true;
973 let mut all_keys_maybe = true;
974 for key in &keys {
975 match bloom_filter_contains(key, filter, settings) {
976 Ok(true) => {}
977 Ok(false) => {
978 all_keys_maybe = false;
979 break;
980 }
981 Err(()) => {
982 all_keys_maybe = true;
983 break;
984 }
985 }
986 }
987 if all_keys_maybe {
988 any_pathspec_maybe = true;
989 break;
990 }
991 }
992 if checked_any_keys && !any_pathspec_maybe {
993 return Ok(BloomPrecheck::DefinitelyNot);
994 }
995 Ok(BloomPrecheck::Maybe)
996 }
997}
998
999pub fn diff_changed_paths_for_bloom(
1001 odb: &Odb,
1002 parent_tree: Option<ObjectId>,
1003 commit_tree: ObjectId,
1004) -> crate::error::Result<(Vec<String>, usize)> {
1005 use crate::diff::diff_trees;
1006 let entries = diff_trees(odb, parent_tree.as_ref(), Some(&commit_tree), "")?;
1007 let raw_len = entries.len();
1008 let mut paths = Vec::new();
1009 for e in entries {
1010 let p = e.path().to_string();
1011 if !p.is_empty() {
1012 paths.push(p);
1013 }
1014 }
1015 Ok((paths, raw_len))
1016}
1017
1018pub use crate::bloom::collect_changed_paths_for_bloom;
1020
1021pub fn bloom_filter_for_commit_write(
1023 odb: &Odb,
1024 parents: &[ObjectId],
1025 tree_oid: ObjectId,
1026 settings: &BloomFilterSettings,
1027) -> crate::error::Result<(Vec<u8>, BloomBuildOutcome)> {
1028 let (changed_paths_vec, raw_count) = if let Some(first_parent) = parents.first() {
1032 let p = load_commit_tree(odb, *first_parent)?;
1033 diff_changed_paths_for_bloom(odb, Some(p), tree_oid)?
1034 } else {
1035 diff_changed_paths_for_bloom(odb, None, tree_oid)?
1036 };
1037 let set = collect_changed_paths_for_bloom(&changed_paths_vec);
1038 Ok(crate::bloom::build_bloom_filter_data(
1039 &set, raw_count, settings,
1040 ))
1041}
1042
1043fn load_commit_tree(odb: &Odb, commit_oid: ObjectId) -> crate::error::Result<ObjectId> {
1044 let obj = odb.read(&commit_oid)?;
1045 let c = crate::objects::parse_commit(&obj.data)?;
1046 Ok(c.tree)
1047}
1048
1049fn tree_has_high_bit_paths(odb: &Odb, tree_oid: ObjectId) -> bool {
1054 let Ok(obj) = odb.read(&tree_oid) else {
1055 return true;
1057 };
1058 let Ok(entries) = crate::objects::parse_tree(&obj.data) else {
1059 return true;
1060 };
1061 for e in &entries {
1062 if e.name.iter().any(|&b| b & 0x80 != 0) {
1063 return true;
1064 }
1065 if e.mode == 0o040000 && tree_has_high_bit_paths(odb, e.oid) {
1066 return true;
1067 }
1068 }
1069 false
1070}
1071
1072pub fn commit_tree_has_high_bit_paths(odb: &Odb, commit_oid: ObjectId) -> bool {
1075 match load_commit_tree(odb, commit_oid) {
1076 Ok(tree) => tree_has_high_bit_paths(odb, tree),
1077 Err(_) => true,
1078 }
1079}
1080
1081pub fn parse_graph_file(path: &Path) -> Option<ParsedGraphDump> {
1083 let raw = std::fs::read(path).ok()?;
1084 if raw.len() < 28 {
1085 return None;
1086 }
1087 let body = &raw[..raw.len() - HASH_LEN];
1088 if body.len() < 8 || &body[0..4] != SIGNATURE {
1089 return None;
1090 }
1091 let header_word = u32::from_be_bytes(body[0..4].try_into().ok()?);
1092 let num_chunks = body[6] as usize;
1093 let toc_start = 8;
1094 let mut present: std::collections::HashSet<u32> = std::collections::HashSet::new();
1095 for i in 0..num_chunks {
1096 let e = toc_start + i * 12;
1097 let id = u32::from_be_bytes(body[e..e + 4].try_into().ok()?);
1098 present.insert(id);
1099 }
1100 let mut chunk_names: Vec<String> = Vec::new();
1103 for (id, label) in [
1104 (CHUNK_OID_FANOUT, "oid_fanout"),
1105 (CHUNK_OID_LOOKUP, "oid_lookup"),
1106 (CHUNK_COMMIT_DATA, "commit_metadata"),
1107 (CHUNK_GENERATION_DATA, "generation_data"),
1108 (CHUNK_GENERATION_DATA_OVERFLOW, "generation_data_overflow"),
1109 (CHUNK_EXTRA_EDGES, "extra_edges"),
1110 (CHUNK_BLOOM_INDEXES, "bloom_indexes"),
1111 (CHUNK_BLOOM_DATA, "bloom_data"),
1112 ] {
1113 if present.contains(&id) {
1114 chunk_names.push(label.to_string());
1115 }
1116 }
1117 let layer = CommitGraphLayer::parse(path.to_path_buf(), raw.clone())?;
1118 let bloom_opt = layer.bloom_settings.map(|s| {
1119 format!(
1120 " bloom({},{},{})",
1121 s.hash_version, s.bits_per_entry, s.num_hashes
1122 )
1123 });
1124 let mut options = String::new();
1125 if let Some(b) = bloom_opt {
1126 options.push_str(&b);
1127 }
1128 if layer.read_generation_data {
1129 options.push_str(" read_generation_data");
1130 }
1131 Some(ParsedGraphDump {
1132 header_word,
1133 version: body[4],
1134 hash_ver: body[5],
1135 num_chunks: body[6],
1136 reserved: body[7],
1137 num_commits: layer.num_commits,
1138 chunks: chunk_names.join(" "),
1139 options,
1140 })
1141}
1142
1143pub struct ParsedGraphDump {
1144 pub header_word: u32,
1145 pub version: u8,
1146 pub hash_ver: u8,
1147 pub num_chunks: u8,
1148 pub reserved: u8,
1149 pub num_commits: u32,
1150 pub chunks: String,
1151 pub options: String,
1152}
1153
1154pub fn dump_bloom_filters(path: &Path) -> Option<Vec<String>> {
1156 let raw = std::fs::read(path).ok()?;
1157 let layer = CommitGraphLayer::parse(path.to_path_buf(), raw)?;
1158 let mut out = Vec::new();
1159 for i in 0..layer.num_commits {
1160 let slice = layer.bloom_filter_slice(i).unwrap_or(&[]);
1161 if slice.is_empty() {
1162 out.push(String::new());
1163 } else {
1164 let hex: String = slice.iter().map(|b| format!("{b:02x}")).collect();
1165 out.push(hex);
1166 }
1167 }
1168 Some(out)
1169}