1use std::path::{Path, PathBuf};
4use std::sync::{Arc, Mutex};
5
6use crate::bloom::{
7 bloom_filter_contains, bloom_keyvec_for_path, BloomBuildOutcome, BloomFilterSettings,
8};
9use crate::error::Error;
10use crate::objects::ObjectId;
11use crate::odb::Odb;
12
13const SIGNATURE: &[u8; 4] = b"CGPH";
14const GRAPH_VERSION: u8 = 1;
15const HASH_VERSION_SHA1: u8 = 1;
16const HASH_LEN: usize = 20;
17
18const CHUNK_OID_FANOUT: u32 = 0x4f49_4446; const CHUNK_OID_LOOKUP: u32 = 0x4f49_444c; const CHUNK_COMMIT_DATA: u32 = 0x4344_4154; const CHUNK_GENERATION_DATA: u32 = 0x4744_4132; const CHUNK_GENERATION_DATA_OVERFLOW: u32 = 0x4744_4f32; const CHUNK_EXTRA_EDGES: u32 = 0x4544_4745; const CHUNK_BLOOM_INDEXES: u32 = 0x4249_4458; const CHUNK_BLOOM_DATA: u32 = 0x4244_4154; const BLOOM_HEADER: usize = crate::bloom::BLOOMDATA_HEADER_LEN;
28
29const CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW: u32 = 1u32 << 31;
31
32fn warn_path_for_graph_file(path: &Path) -> String {
33 let s = path.to_string_lossy();
34 if let Some(idx) = s.find(".git/") {
35 return s[idx..].replace('\\', "/");
36 }
37 s.replace('\\', "/")
38}
39
40#[derive(Debug, Clone)]
42pub struct CommitGraphLayer {
43 pub path: PathBuf,
44 body: Vec<u8>,
45 num_commits: u32,
46 oid_lookup_off: usize,
47 #[allow(dead_code)]
48 chunk_commit_data_off: usize,
49 #[allow(dead_code)]
50 chunk_generation_data: Option<usize>,
51 read_generation_data: bool,
52 chunk_bloom_indexes: Option<usize>,
53 chunk_bloom_data: Option<(usize, usize)>,
54 bloom_settings: Option<BloomFilterSettings>,
55 bloom_disabled: bool,
56}
57
58impl CommitGraphLayer {
59 pub fn try_parse(path: PathBuf, raw: Vec<u8>) -> Result<Self, Error> {
61 if raw.len() < 28 {
62 return Err(Error::CorruptObject(
63 "commit-graph file too small".to_owned(),
64 ));
65 }
66 let body = raw[..raw.len() - HASH_LEN].to_vec();
67 if body.len() < 8 || &body[0..4] != SIGNATURE {
68 return Err(Error::CorruptObject(
69 "commit-graph has bad signature".to_owned(),
70 ));
71 }
72 if body[4] != GRAPH_VERSION || body[5] != HASH_VERSION_SHA1 {
73 return Err(Error::CorruptObject(format!(
74 "commit-graph version/hash not supported (version {} hash {})",
75 body[4], body[5]
76 )));
77 }
78 let num_chunks = body[6] as usize;
79 let toc_start = 8;
80 let toc_end = toc_start + (num_chunks + 1) * 12;
81 if body.len() < toc_end {
82 return Err(Error::CorruptObject(
83 "commit-graph truncated at chunk table".to_owned(),
84 ));
85 }
86
87 let mut fanout_off = None;
88 let mut oid_lookup_off = None;
89 let mut commit_data_off = None;
90 let mut generation_off = None;
91 let mut generation_overflow_off = None;
92 let mut bloom_idx_off = None;
93 let mut bloom_data_range = None;
94 let mut chunk_offsets: Vec<usize> = Vec::new();
95 let mut toc_entries: Vec<(u32, usize)> = Vec::with_capacity(num_chunks);
96
97 for i in 0..num_chunks {
98 let e = toc_start + i * 12;
99 let id = u32::from_be_bytes(
100 body[e..e + 4]
101 .try_into()
102 .map_err(|_| Error::CorruptObject("commit-graph bad TOC".to_owned()))?,
103 );
104 let off = u64::from_be_bytes(
105 body[e + 4..e + 12]
106 .try_into()
107 .map_err(|_| Error::CorruptObject("commit-graph bad TOC".to_owned()))?,
108 ) as usize;
109 toc_entries.push((id, off));
110 chunk_offsets.push(off);
111 match id {
112 CHUNK_OID_FANOUT => fanout_off = Some(off),
113 CHUNK_OID_LOOKUP => oid_lookup_off = Some(off),
114 CHUNK_COMMIT_DATA => commit_data_off = Some(off),
115 CHUNK_GENERATION_DATA => generation_off = Some(off),
116 CHUNK_GENERATION_DATA_OVERFLOW => generation_overflow_off = Some(off),
117 CHUNK_BLOOM_INDEXES => bloom_idx_off = Some(off),
118 CHUNK_BLOOM_DATA => {
119 let end = if i + 1 < num_chunks {
120 let e2 = toc_start + (i + 1) * 12;
121 u64::from_be_bytes(body[e2 + 4..e2 + 12].try_into().unwrap_or([0u8; 8]))
122 as usize
123 } else {
124 let term = toc_start + num_chunks * 12;
125 u64::from_be_bytes(body[term + 4..term + 12].try_into().unwrap_or([0u8; 8]))
126 as usize
127 };
128 bloom_data_range = Some((off, end.saturating_sub(off)));
129 }
130 _ => {}
131 }
132 }
133 let file_end = u64::from_be_bytes(
134 body[toc_start + num_chunks * 12 + 4..toc_start + num_chunks * 12 + 12]
135 .try_into()
136 .map_err(|_| Error::CorruptObject("commit-graph bad file end".to_owned()))?,
137 ) as usize;
138 chunk_offsets.push(file_end);
139 chunk_offsets.sort_unstable();
140 chunk_offsets.dedup();
141
142 fn chunk_byte_range(
143 start: usize,
144 toc_entries: &[(u32, usize)],
145 file_end: usize,
146 ) -> Result<usize, Error> {
147 let mut ends: Vec<usize> = toc_entries
148 .iter()
149 .map(|&(_, o)| o)
150 .filter(|&o| o > start)
151 .collect();
152 ends.sort_unstable();
153 let end = ends.first().copied().unwrap_or(file_end);
154 if end < start {
155 return Err(Error::CorruptObject(
156 "commit-graph chunk layout invalid".to_owned(),
157 ));
158 }
159 Ok(end)
160 }
161
162 if let Some(gda) = generation_off {
163 let gda_end = chunk_byte_range(gda, &toc_entries, file_end)?;
164 let gda_len = gda_end.saturating_sub(gda);
165 let num_commits = fanout_off
166 .and_then(|fo| {
167 let slice = body.get(fo + 255 * 4..fo + 256 * 4)?;
168 Some(u32::from_be_bytes(slice.try_into().ok()?))
169 })
170 .ok_or_else(|| Error::CorruptObject("commit-graph missing fanout".to_owned()))?;
171 let expected = num_commits as usize * 4;
172 if gda_len < expected {
173 return Err(Error::CorruptObject(
174 "commit-graph generation data chunk is too small".to_owned(),
175 ));
176 }
177 let gda_slice = body.get(gda..gda + expected).ok_or_else(|| {
178 Error::CorruptObject("commit-graph generation data OOB".to_owned())
179 })?;
180 let mut max_overflow_idx: Option<u32> = None;
181 for w in 0..num_commits as usize {
182 let v =
183 u32::from_be_bytes(gda_slice[w * 4..w * 4 + 4].try_into().map_err(|_| {
184 Error::CorruptObject("commit-graph GDA2 corrupt".to_owned())
185 })?);
186 if v & CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW != 0 {
187 let pos = v ^ CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW;
188 max_overflow_idx = Some(match max_overflow_idx {
189 None => pos,
190 Some(m) => m.max(pos),
191 });
192 }
193 }
194 if let Some(pos) = max_overflow_idx {
195 let Some(gdo_start) = generation_overflow_off else {
196 return Err(Error::CorruptObject(
197 "commit-graph requires overflow generation data but has none".to_owned(),
198 ));
199 };
200 let gdo_end = chunk_byte_range(gdo_start, &toc_entries, file_end)?;
201 let overflow_bytes = gdo_end.saturating_sub(gdo_start);
202 let n_slots = overflow_bytes / 8;
203 if n_slots <= pos as usize {
204 return Err(Error::CorruptObject(
205 "commit-graph overflow generation data is too small".to_owned(),
206 ));
207 }
208 }
209 }
210 let bidx_len = bloom_idx_off.and_then(|b| {
211 chunk_offsets
212 .iter()
213 .find(|&&o| o > b)
214 .map(|&next| next.saturating_sub(b))
215 });
216
217 let fanout_off = fanout_off.ok_or_else(|| {
218 Error::CorruptObject("commit-graph missing OID fanout chunk".to_owned())
219 })?;
220 let oid_lookup_off = oid_lookup_off.ok_or_else(|| {
221 Error::CorruptObject("commit-graph missing OID lookup chunk".to_owned())
222 })?;
223 let commit_data_off = commit_data_off.ok_or_else(|| {
224 Error::CorruptObject("commit-graph missing commit data chunk".to_owned())
225 })?;
226 if fanout_off + 256 * 4 > body.len() || oid_lookup_off + 4 > body.len() {
227 return Err(Error::CorruptObject(
228 "commit-graph chunk extends past end of file".to_owned(),
229 ));
230 }
231 let num_commits = u32::from_be_bytes(
232 body[fanout_off + 255 * 4..fanout_off + 256 * 4]
233 .try_into()
234 .map_err(|_| Error::CorruptObject("commit-graph fanout corrupt".to_owned()))?,
235 );
236 if oid_lookup_off + num_commits as usize * HASH_LEN > body.len() {
237 return Err(Error::CorruptObject(
238 "commit-graph OID lookup extends past end of file".to_owned(),
239 ));
240 }
241 let graph_data_width = HASH_LEN + 16;
242 if commit_data_off + num_commits as usize * graph_data_width > body.len() {
243 return Err(Error::CorruptObject(
244 "commit-graph commit data extends past end of file".to_owned(),
245 ));
246 }
247
248 let read_generation_data = generation_off.is_some();
249 let mut bloom_settings = None;
250 let mut chunk_bloom_data = None;
251 if let (Some(_bidx), Some((bdat_off, bdat_len))) = (bloom_idx_off, bloom_data_range) {
252 if bdat_len < BLOOM_HEADER {
253 eprintln!(
254 "warning: ignoring too-small changed-path chunk ({} < {}) in commit-graph file",
255 bdat_len, BLOOM_HEADER
256 );
257 } else if bdat_off + bdat_len <= body.len() {
258 let hdr = &body[bdat_off..bdat_off + BLOOM_HEADER];
259 let hash_version: [u8; 4] = hdr[0..4]
260 .try_into()
261 .map_err(|_| Error::CorruptObject("Bloom header corrupt".to_owned()))?;
262 let num_hashes: [u8; 4] = hdr[4..8]
263 .try_into()
264 .map_err(|_| Error::CorruptObject("Bloom header corrupt".to_owned()))?;
265 let bits_per_entry: [u8; 4] = hdr[8..12]
266 .try_into()
267 .map_err(|_| Error::CorruptObject("Bloom header corrupt".to_owned()))?;
268 bloom_settings = Some(BloomFilterSettings {
269 hash_version: u32::from_be_bytes(hash_version),
270 num_hashes: u32::from_be_bytes(num_hashes),
271 bits_per_entry: u32::from_be_bytes(bits_per_entry),
272 max_changed_paths: 512,
273 });
274 chunk_bloom_data = Some((bdat_off, bdat_len));
275 }
276 }
277
278 let bloom_indexes_ok = if let (Some(bidx), Some(bsize)) = (bloom_idx_off, bidx_len) {
279 if bsize / 4 != num_commits as usize {
280 eprintln!("warning: commit-graph changed-path index chunk is too small");
281 false
282 } else if bidx + bsize > body.len() {
283 eprintln!("warning: commit-graph changed-path index chunk is too small");
284 false
285 } else {
286 true
287 }
288 } else {
289 false
290 };
291 let bloom_pair_ok = bloom_settings.is_some()
292 && chunk_bloom_data.is_some()
293 && bloom_indexes_ok
294 && chunk_bloom_data.is_some_and(|(_, len)| len >= BLOOM_HEADER);
295
296 let (chunk_bloom_indexes, bloom_settings) = if bloom_pair_ok {
297 (bloom_idx_off, bloom_settings)
298 } else {
299 (None, None)
300 };
301 let chunk_bloom_data = if bloom_pair_ok {
302 chunk_bloom_data
303 } else {
304 None
305 };
306
307 Ok(Self {
308 path,
309 body,
310 num_commits,
311 oid_lookup_off,
312 chunk_commit_data_off: commit_data_off,
313 chunk_generation_data: generation_off,
314 read_generation_data,
315 chunk_bloom_indexes,
316 chunk_bloom_data,
317 bloom_settings,
318 bloom_disabled: false,
319 })
320 }
321
322 fn parse(path: PathBuf, raw: Vec<u8>) -> Option<Self> {
323 Self::try_parse(path, raw).ok()
324 }
325
326 fn oid_at_lex(&self, lex_index: u32) -> Option<ObjectId> {
327 if lex_index >= self.num_commits {
328 return None;
329 }
330 let off = self.oid_lookup_off + lex_index as usize * HASH_LEN;
331 ObjectId::from_bytes(self.body.get(off..off + HASH_LEN)?.try_into().ok()?).ok()
332 }
333
334 fn bsearch_oid(&self, oid: &ObjectId) -> Option<u32> {
335 let mut lo = 0u32;
336 let mut hi = self.num_commits;
337 let bytes = oid.as_bytes();
338 while lo < hi {
339 let mid = (lo + hi) / 2;
340 let off = self.oid_lookup_off + mid as usize * HASH_LEN;
341 let slice = &self.body[off..off + HASH_LEN];
342 match slice.cmp(bytes) {
343 std::cmp::Ordering::Less => lo = mid + 1,
344 std::cmp::Ordering::Greater => hi = mid,
345 std::cmp::Ordering::Equal => return Some(mid),
346 }
347 }
348 None
349 }
350
351 fn disable_bloom(&mut self) {
352 self.chunk_bloom_indexes = None;
353 self.chunk_bloom_data = None;
354 self.bloom_settings = None;
355 self.bloom_disabled = true;
356 }
357
358 fn layer_display_id(&self) -> String {
359 self.path
360 .file_stem()
361 .and_then(|s| s.to_str())
362 .map(|s| s.strip_prefix("graph-").unwrap_or(s).to_string())
363 .unwrap_or_else(|| "commit-graph".to_string())
364 }
365
366 fn bloom_filter_slice(&self, lex_index: u32) -> Option<&[u8]> {
367 let _settings = self.bloom_settings.as_ref()?;
368 let bidx_base = self.chunk_bloom_indexes?;
369 let (bdat_off, bdat_total) = self.chunk_bloom_data?;
370 let graph_warn = warn_path_for_graph_file(self.path.as_path());
371 if lex_index >= self.num_commits {
372 return None;
373 }
374 let payload_len = bdat_total.saturating_sub(BLOOM_HEADER);
375 let end_rel = u32::from_be_bytes(
376 self.body[bidx_base + lex_index as usize * 4..bidx_base + lex_index as usize * 4 + 4]
377 .try_into()
378 .ok()?,
379 ) as usize;
380 let start_rel = if lex_index == 0 {
381 0usize
382 } else {
383 u32::from_be_bytes(
384 self.body[bidx_base + (lex_index as usize - 1) * 4
385 ..bidx_base + (lex_index as usize - 1) * 4 + 4]
386 .try_into()
387 .ok()?,
388 ) as usize
389 };
390 if end_rel < start_rel {
391 eprintln!(
392 "warning: ignoring decreasing changed-path index offsets ({start_rel} > {end_rel}) for positions {} and {} of {}",
393 lex_index.saturating_sub(1),
394 lex_index,
395 graph_warn
396 );
397 return None;
398 }
399 let max_payload = payload_len;
400 if end_rel > max_payload {
401 eprintln!(
402 "warning: ignoring out-of-range offset ({end_rel}) for changed-path filter at pos {} of {} (chunk size: {bdat_total})",
403 lex_index,
404 graph_warn,
405 bdat_total = bdat_total
406 );
407 return None;
408 }
409 if start_rel > max_payload {
410 eprintln!(
411 "warning: ignoring out-of-range offset ({start_rel}) for changed-path filter at pos {} of {} (chunk size: {bdat_total})",
412 lex_index.saturating_sub(1),
413 graph_warn,
414 bdat_total = bdat_total
415 );
416 return None;
417 }
418 let data_base = bdat_off + BLOOM_HEADER;
419 let abs_start = data_base + start_rel;
420 let abs_end = data_base + end_rel;
421 if abs_end > bdat_off + bdat_total || abs_start > abs_end {
422 return None;
423 }
424 Some(&self.body[abs_start..abs_end])
425 }
426}
427
428#[derive(Debug, Clone, Copy, PartialEq, Eq)]
430pub enum BloomPrecheck {
431 Inapplicable,
433 NotInGraph,
435 FilterNotPresent,
437 DefinitelyNot,
439 Maybe,
441}
442
443#[derive(Debug, Default, Clone)]
445pub struct BloomWalkStats {
446 pub filter_not_present: u32,
447 pub maybe: u32,
448 pub definitely_not: u32,
449 pub false_positive: u32,
450}
451
452impl BloomWalkStats {
453 pub fn record_precheck(&mut self, pre: BloomPrecheck) {
454 match pre {
455 BloomPrecheck::Inapplicable | BloomPrecheck::NotInGraph => {}
456 BloomPrecheck::FilterNotPresent => self.filter_not_present += 1,
457 BloomPrecheck::DefinitelyNot => self.definitely_not += 1,
458 BloomPrecheck::Maybe => self.maybe += 1,
459 }
460 }
461
462 pub fn record_false_positive(&mut self) {
463 self.false_positive += 1;
464 }
465}
466
467pub type BloomWalkStatsHandle = Arc<Mutex<BloomWalkStats>>;
469
470#[derive(Debug, Clone)]
472pub struct CommitGraphChain {
473 layers: Vec<CommitGraphLayer>,
474}
475
476impl CommitGraphChain {
477 #[must_use]
479 pub fn top_layer_bloom_settings(&self) -> Option<BloomFilterSettings> {
480 self.layers.first()?.bloom_settings
481 }
482
483 #[must_use]
485 pub fn total_commits(&self) -> u32 {
486 self.layers.iter().map(|l| l.num_commits).sum()
487 }
488
489 #[must_use]
491 pub fn layer_paths_oldest_first(&self) -> Vec<PathBuf> {
492 self.layers.iter().rev().map(|l| l.path.clone()).collect()
493 }
494
495 pub fn try_load(objects_dir: &Path) -> Result<Option<Self>, Error> {
500 let info = objects_dir.join("info");
501 let chain_path = info.join("commit-graphs").join("commit-graph-chain");
502 if chain_path.is_file() {
503 let content = std::fs::read_to_string(&chain_path).map_err(Error::from)?;
504 let mut layers = Vec::new();
505 for line in content.lines() {
506 let h = line.trim();
507 if h.len() != 40 {
508 continue;
509 }
510 let graph_path = info.join("commit-graphs").join(format!("graph-{h}.graph"));
511 let raw = std::fs::read(&graph_path).map_err(Error::from)?;
512 let layer = CommitGraphLayer::try_parse(graph_path, raw)?;
513 layers.push(layer);
514 }
515 if layers.is_empty() {
516 return Ok(None);
517 }
518 let mut chain = Self { layers };
519 chain.validate_bloom_compatibility();
520 return Ok(Some(chain));
521 }
522 let single = info.join("commit-graph");
523 if single.is_file() {
524 let raw = std::fs::read(&single).map_err(Error::from)?;
525 let layer = CommitGraphLayer::try_parse(single.clone(), raw)?;
526 let mut chain = Self {
527 layers: vec![layer],
528 };
529 chain.validate_bloom_compatibility();
530 return Ok(Some(chain));
531 }
532 Ok(None)
533 }
534
535 pub fn load(objects_dir: &Path) -> Option<Self> {
537 Self::try_load(objects_dir).ok().flatten()
538 }
539
540 fn validate_bloom_compatibility(&mut self) {
541 let mut ref_settings: Option<BloomFilterSettings> = None;
542 for layer in &mut self.layers {
543 let Some(bs) = layer.bloom_settings else {
544 continue;
545 };
546 match ref_settings {
547 None => ref_settings = Some(bs),
548 Some(r) => {
549 if r.hash_version != bs.hash_version
550 || r.num_hashes != bs.num_hashes
551 || r.bits_per_entry != bs.bits_per_entry
552 {
553 let id = layer.layer_display_id();
554 eprintln!(
555 "warning: disabling Bloom filters for commit-graph layer '{id}' due to incompatible settings"
556 );
557 layer.disable_bloom();
558 }
559 }
560 }
561 }
562 }
563
564 pub fn find_commit(&self, oid: &ObjectId) -> Option<(usize, u32)> {
566 for (i, layer) in self.layers.iter().enumerate() {
567 if let Some(lex) = layer.bsearch_oid(oid) {
568 return Some((i, lex));
569 }
570 }
571 None
572 }
573
574 pub fn global_position(&self, oid: &ObjectId) -> Option<u32> {
576 let (layer_idx, lex) = self.find_commit(oid)?;
577 let below: u32 = self.layers[layer_idx + 1..]
578 .iter()
579 .map(|l| l.num_commits)
580 .sum();
581 Some(below + lex)
582 }
583
584 pub fn all_oids_in_order(&self) -> Vec<ObjectId> {
586 let mut out = Vec::new();
587 for layer in self.layers.iter().rev() {
588 for i in 0..layer.num_commits {
589 if let Some(oid) = layer.oid_at_lex(i) {
590 out.push(oid);
591 }
592 }
593 }
594 out
595 }
596
597 pub fn bloom_precheck_for_paths(
599 &self,
600 odb: &Odb,
601 oid: ObjectId,
602 pathspecs: &[String],
603 bloom_cwd: Option<&str>,
604 requested_hash_version: i32,
605 read_changed_paths: bool,
606 ) -> std::result::Result<BloomPrecheck, crate::error::Error> {
607 if !read_changed_paths {
608 return Ok(BloomPrecheck::Inapplicable);
609 }
610 let Some((layer_idx, lex)) = self.find_commit(&oid) else {
611 return Ok(BloomPrecheck::NotInGraph);
612 };
613 let layer = &self.layers[layer_idx];
614 let Some(settings) = layer.bloom_settings.as_ref() else {
615 return Ok(BloomPrecheck::FilterNotPresent);
616 };
617 let effective_version = if requested_hash_version < 0 {
618 settings.hash_version as i32
619 } else {
620 requested_hash_version
621 };
622 if effective_version != settings.hash_version as i32 {
623 return Ok(BloomPrecheck::FilterNotPresent);
624 }
625
626 let commit = crate::objects::parse_commit(&odb.read(&oid)?.data)?;
627 if commit.parents.len() > 1 {
628 return Ok(BloomPrecheck::Inapplicable);
629 }
630
631 let filter = match layer.bloom_filter_slice(lex) {
632 Some(s) => s,
633 None => return Ok(BloomPrecheck::FilterNotPresent),
634 };
635 if filter.is_empty() {
636 return Ok(BloomPrecheck::FilterNotPresent);
637 }
638
639 let mut any_pathspec_maybe = false;
642 let mut checked_any_keys = false;
643 for spec in pathspecs {
644 if spec.is_empty() || crate::pathspec::pathspec_is_exclude(spec) {
645 continue;
646 }
647 let Some(norm) = crate::pathspec::bloom_lookup_prefix_with_cwd(spec, bloom_cwd) else {
648 continue;
649 };
650 let keys = bloom_keyvec_for_path(norm.as_str(), settings);
651 if keys.is_empty() {
652 continue;
653 }
654 checked_any_keys = true;
655 let mut all_keys_maybe = true;
656 for key in &keys {
657 match bloom_filter_contains(key, filter, settings) {
658 Ok(true) => {}
659 Ok(false) => {
660 all_keys_maybe = false;
661 break;
662 }
663 Err(()) => {
664 all_keys_maybe = true;
665 break;
666 }
667 }
668 }
669 if all_keys_maybe {
670 any_pathspec_maybe = true;
671 break;
672 }
673 }
674 if checked_any_keys && !any_pathspec_maybe {
675 return Ok(BloomPrecheck::DefinitelyNot);
676 }
677 Ok(BloomPrecheck::Maybe)
678 }
679}
680
681pub fn diff_changed_paths_for_bloom(
683 odb: &Odb,
684 parent_tree: Option<ObjectId>,
685 commit_tree: ObjectId,
686) -> crate::error::Result<(Vec<String>, usize)> {
687 use crate::diff::diff_trees;
688 let entries = diff_trees(odb, parent_tree.as_ref(), Some(&commit_tree), "")?;
689 let raw_len = entries.len();
690 let mut paths = Vec::new();
691 for e in entries {
692 let p = e.path().to_string();
693 if !p.is_empty() {
694 paths.push(p);
695 }
696 }
697 Ok((paths, raw_len))
698}
699
700pub use crate::bloom::collect_changed_paths_for_bloom;
702
703pub fn bloom_filter_for_commit_write(
705 odb: &Odb,
706 parents: &[ObjectId],
707 tree_oid: ObjectId,
708 settings: &BloomFilterSettings,
709) -> crate::error::Result<(Vec<u8>, BloomBuildOutcome)> {
710 let (changed_paths_vec, raw_count) = if parents.len() == 1 {
711 let p = load_commit_tree(odb, parents[0])?;
712 diff_changed_paths_for_bloom(odb, Some(p), tree_oid)?
713 } else if parents.is_empty() {
714 diff_changed_paths_for_bloom(odb, None, tree_oid)?
715 } else {
716 return Ok((vec![0xff], BloomBuildOutcome::TruncatedLarge));
717 };
718 let set = collect_changed_paths_for_bloom(&changed_paths_vec);
719 Ok(crate::bloom::build_bloom_filter_data(
720 &set, raw_count, settings,
721 ))
722}
723
724fn load_commit_tree(odb: &Odb, commit_oid: ObjectId) -> crate::error::Result<ObjectId> {
725 let obj = odb.read(&commit_oid)?;
726 let c = crate::objects::parse_commit(&obj.data)?;
727 Ok(c.tree)
728}
729
730pub fn parse_graph_file(path: &Path) -> Option<ParsedGraphDump> {
732 let raw = std::fs::read(path).ok()?;
733 if raw.len() < 28 {
734 return None;
735 }
736 let body = &raw[..raw.len() - HASH_LEN];
737 if body.len() < 8 || &body[0..4] != SIGNATURE {
738 return None;
739 }
740 let header_word = u32::from_be_bytes(body[0..4].try_into().ok()?);
741 let num_chunks = body[6] as usize;
742 let mut chunk_names = Vec::new();
743 let toc_start = 8;
744 for i in 0..num_chunks {
745 let e = toc_start + i * 12;
746 let id = u32::from_be_bytes(body[e..e + 4].try_into().ok()?);
747 let name = match id {
748 CHUNK_OID_FANOUT => "oid_fanout",
749 CHUNK_OID_LOOKUP => "oid_lookup",
750 CHUNK_COMMIT_DATA => "commit_metadata",
751 CHUNK_GENERATION_DATA => "generation_data",
752 CHUNK_GENERATION_DATA_OVERFLOW => "generation_data_overflow",
753 CHUNK_EXTRA_EDGES => "extra_edges",
754 CHUNK_BLOOM_INDEXES => "bloom_indexes",
755 CHUNK_BLOOM_DATA => "bloom_data",
756 _ => "unknown",
757 };
758 chunk_names.push(name.to_string());
759 }
760 let layer = CommitGraphLayer::parse(path.to_path_buf(), raw.clone())?;
761 let bloom_opt = layer.bloom_settings.map(|s| {
762 format!(
763 " bloom({},{},{})",
764 s.hash_version, s.bits_per_entry, s.num_hashes
765 )
766 });
767 let mut options = String::new();
768 if let Some(b) = bloom_opt {
769 options.push_str(&b);
770 }
771 if layer.read_generation_data {
772 options.push_str(" read_generation_data");
773 }
774 Some(ParsedGraphDump {
775 header_word,
776 version: body[4],
777 hash_ver: body[5],
778 num_chunks: body[6],
779 reserved: body[7],
780 num_commits: layer.num_commits,
781 chunks: chunk_names.join(" "),
782 options,
783 })
784}
785
786pub struct ParsedGraphDump {
787 pub header_word: u32,
788 pub version: u8,
789 pub hash_ver: u8,
790 pub num_chunks: u8,
791 pub reserved: u8,
792 pub num_commits: u32,
793 pub chunks: String,
794 pub options: String,
795}
796
797pub fn dump_bloom_filters(path: &Path) -> Option<Vec<String>> {
799 let raw = std::fs::read(path).ok()?;
800 let layer = CommitGraphLayer::parse(path.to_path_buf(), raw)?;
801 let mut out = Vec::new();
802 for i in 0..layer.num_commits {
803 let slice = layer.bloom_filter_slice(i).unwrap_or(&[]);
804 if slice.is_empty() {
805 out.push(String::new());
806 } else {
807 let hex: String = slice.iter().map(|b| format!("{b:02x}")).collect();
808 out.push(hex);
809 }
810 }
811 Some(out)
812}