1use std::path::{Path, PathBuf};
4use std::sync::{Arc, Mutex};
5
6use crate::bloom::{
7 bloom_filter_contains, bloom_keyvec_for_path, BloomBuildOutcome, BloomFilterSettings,
8};
9use crate::error::Error;
10use crate::objects::ObjectId;
11use crate::odb::Odb;
12
13fn warn_once_for_disabled_bloom_layer(id: &str) -> bool {
17 use std::collections::HashSet;
18 use std::sync::OnceLock;
19 static SEEN: OnceLock<Mutex<HashSet<String>>> = OnceLock::new();
20 let set = SEEN.get_or_init(|| Mutex::new(HashSet::new()));
21 match set.lock() {
22 Ok(mut guard) => guard.insert(id.to_string()),
23 Err(_) => true,
24 }
25}
26
27const SIGNATURE: &[u8; 4] = b"CGPH";
28const GRAPH_VERSION: u8 = 1;
29const HASH_VERSION_SHA1: u8 = 1;
30const HASH_LEN: usize = 20;
31
32const CHUNK_OID_FANOUT: u32 = 0x4f49_4446; const CHUNK_OID_LOOKUP: u32 = 0x4f49_444c; const CHUNK_COMMIT_DATA: u32 = 0x4344_4154; const CHUNK_GENERATION_DATA: u32 = 0x4744_4132; const CHUNK_GENERATION_DATA_OVERFLOW: u32 = 0x4744_4f32; const CHUNK_EXTRA_EDGES: u32 = 0x4544_4745; const CHUNK_BLOOM_INDEXES: u32 = 0x4249_4458; const CHUNK_BLOOM_DATA: u32 = 0x4244_4154; const BLOOM_HEADER: usize = crate::bloom::BLOOMDATA_HEADER_LEN;
42
43const CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW: u32 = 1u32 << 31;
45
46fn warn_path_for_graph_file(path: &Path) -> String {
47 let s = path.to_string_lossy();
48 if let Some(idx) = s.find(".git/") {
49 return s[idx..].replace('\\', "/");
50 }
51 s.replace('\\', "/")
52}
53
54#[derive(Debug, Clone)]
56pub struct CommitGraphLayer {
57 pub path: PathBuf,
58 body: Vec<u8>,
59 num_commits: u32,
60 oid_lookup_off: usize,
61 #[allow(dead_code)]
62 chunk_commit_data_off: usize,
63 #[allow(dead_code)]
64 chunk_generation_data: Option<usize>,
65 read_generation_data: bool,
66 chunk_bloom_indexes: Option<usize>,
67 chunk_bloom_data: Option<(usize, usize)>,
68 bloom_settings: Option<BloomFilterSettings>,
69 bloom_disabled: bool,
70}
71
72impl CommitGraphLayer {
73 pub fn try_parse(path: PathBuf, raw: Vec<u8>) -> Result<Self, Error> {
75 if raw.len() < 28 {
76 return Err(Error::CorruptObject(
77 "commit-graph file too small".to_owned(),
78 ));
79 }
80 let body = raw[..raw.len() - HASH_LEN].to_vec();
81 if body.len() < 8 || &body[0..4] != SIGNATURE {
82 return Err(Error::CorruptObject(
83 "commit-graph has bad signature".to_owned(),
84 ));
85 }
86 if body[4] != GRAPH_VERSION || body[5] != HASH_VERSION_SHA1 {
87 return Err(Error::CorruptObject(format!(
88 "commit-graph version/hash not supported (version {} hash {})",
89 body[4], body[5]
90 )));
91 }
92 let num_chunks = body[6] as usize;
93 let toc_start = 8;
94 let toc_end = toc_start + (num_chunks + 1) * 12;
95 if body.len() < toc_end {
96 return Err(Error::CorruptObject(
97 "commit-graph truncated at chunk table".to_owned(),
98 ));
99 }
100
101 let mut fanout_off = None;
102 let mut oid_lookup_off = None;
103 let mut commit_data_off = None;
104 let mut generation_off = None;
105 let mut generation_overflow_off = None;
106 let mut bloom_idx_off = None;
107 let mut bloom_data_range = None;
108 let mut chunk_offsets: Vec<usize> = Vec::new();
109 let mut toc_entries: Vec<(u32, usize)> = Vec::with_capacity(num_chunks);
110
111 for i in 0..num_chunks {
112 let e = toc_start + i * 12;
113 let id = u32::from_be_bytes(
114 body[e..e + 4]
115 .try_into()
116 .map_err(|_| Error::CorruptObject("commit-graph bad TOC".to_owned()))?,
117 );
118 let off = u64::from_be_bytes(
119 body[e + 4..e + 12]
120 .try_into()
121 .map_err(|_| Error::CorruptObject("commit-graph bad TOC".to_owned()))?,
122 ) as usize;
123 toc_entries.push((id, off));
124 chunk_offsets.push(off);
125 match id {
126 CHUNK_OID_FANOUT => fanout_off = Some(off),
127 CHUNK_OID_LOOKUP => oid_lookup_off = Some(off),
128 CHUNK_COMMIT_DATA => commit_data_off = Some(off),
129 CHUNK_GENERATION_DATA => generation_off = Some(off),
130 CHUNK_GENERATION_DATA_OVERFLOW => generation_overflow_off = Some(off),
131 CHUNK_BLOOM_INDEXES => bloom_idx_off = Some(off),
132 CHUNK_BLOOM_DATA => {
133 let end = if i + 1 < num_chunks {
134 let e2 = toc_start + (i + 1) * 12;
135 u64::from_be_bytes(body[e2 + 4..e2 + 12].try_into().unwrap_or([0u8; 8]))
136 as usize
137 } else {
138 let term = toc_start + num_chunks * 12;
139 u64::from_be_bytes(body[term + 4..term + 12].try_into().unwrap_or([0u8; 8]))
140 as usize
141 };
142 bloom_data_range = Some((off, end.saturating_sub(off)));
143 }
144 _ => {}
145 }
146 }
147 let file_end = u64::from_be_bytes(
148 body[toc_start + num_chunks * 12 + 4..toc_start + num_chunks * 12 + 12]
149 .try_into()
150 .map_err(|_| Error::CorruptObject("commit-graph bad file end".to_owned()))?,
151 ) as usize;
152 chunk_offsets.push(file_end);
153 chunk_offsets.sort_unstable();
154 chunk_offsets.dedup();
155
156 fn chunk_byte_range(
157 start: usize,
158 toc_entries: &[(u32, usize)],
159 file_end: usize,
160 ) -> Result<usize, Error> {
161 let mut ends: Vec<usize> = toc_entries
162 .iter()
163 .map(|&(_, o)| o)
164 .filter(|&o| o > start)
165 .collect();
166 ends.sort_unstable();
167 let end = ends.first().copied().unwrap_or(file_end);
168 if end < start {
169 return Err(Error::CorruptObject(
170 "commit-graph chunk layout invalid".to_owned(),
171 ));
172 }
173 Ok(end)
174 }
175
176 if let Some(gda) = generation_off {
177 let gda_end = chunk_byte_range(gda, &toc_entries, file_end)?;
178 let gda_len = gda_end.saturating_sub(gda);
179 let num_commits = fanout_off
180 .and_then(|fo| {
181 let slice = body.get(fo + 255 * 4..fo + 256 * 4)?;
182 Some(u32::from_be_bytes(slice.try_into().ok()?))
183 })
184 .ok_or_else(|| Error::CorruptObject("commit-graph missing fanout".to_owned()))?;
185 let expected = num_commits as usize * 4;
186 if gda_len < expected {
187 return Err(Error::CorruptObject(
188 "commit-graph generation data chunk is too small".to_owned(),
189 ));
190 }
191 let gda_slice = body.get(gda..gda + expected).ok_or_else(|| {
192 Error::CorruptObject("commit-graph generation data OOB".to_owned())
193 })?;
194 let mut max_overflow_idx: Option<u32> = None;
195 for w in 0..num_commits as usize {
196 let v =
197 u32::from_be_bytes(gda_slice[w * 4..w * 4 + 4].try_into().map_err(|_| {
198 Error::CorruptObject("commit-graph GDA2 corrupt".to_owned())
199 })?);
200 if v & CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW != 0 {
201 let pos = v ^ CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW;
202 max_overflow_idx = Some(match max_overflow_idx {
203 None => pos,
204 Some(m) => m.max(pos),
205 });
206 }
207 }
208 if let Some(pos) = max_overflow_idx {
209 let Some(gdo_start) = generation_overflow_off else {
210 return Err(Error::CorruptObject(
211 "commit-graph requires overflow generation data but has none".to_owned(),
212 ));
213 };
214 let gdo_end = chunk_byte_range(gdo_start, &toc_entries, file_end)?;
215 let overflow_bytes = gdo_end.saturating_sub(gdo_start);
216 let n_slots = overflow_bytes / 8;
217 if n_slots <= pos as usize {
218 return Err(Error::CorruptObject(
219 "commit-graph overflow generation data is too small".to_owned(),
220 ));
221 }
222 }
223 }
224 let bidx_len = bloom_idx_off.and_then(|b| {
225 chunk_offsets
226 .iter()
227 .find(|&&o| o > b)
228 .map(|&next| next.saturating_sub(b))
229 });
230
231 let fanout_off = fanout_off.ok_or_else(|| {
232 Error::CorruptObject("commit-graph missing OID fanout chunk".to_owned())
233 })?;
234 let oid_lookup_off = oid_lookup_off.ok_or_else(|| {
235 Error::CorruptObject("commit-graph missing OID lookup chunk".to_owned())
236 })?;
237 let commit_data_off = commit_data_off.ok_or_else(|| {
238 Error::CorruptObject("commit-graph missing commit data chunk".to_owned())
239 })?;
240 if fanout_off + 256 * 4 > body.len() || oid_lookup_off + 4 > body.len() {
241 return Err(Error::CorruptObject(
242 "commit-graph chunk extends past end of file".to_owned(),
243 ));
244 }
245 let num_commits = u32::from_be_bytes(
246 body[fanout_off + 255 * 4..fanout_off + 256 * 4]
247 .try_into()
248 .map_err(|_| Error::CorruptObject("commit-graph fanout corrupt".to_owned()))?,
249 );
250 if oid_lookup_off + num_commits as usize * HASH_LEN > body.len() {
251 return Err(Error::CorruptObject(
252 "commit-graph OID lookup extends past end of file".to_owned(),
253 ));
254 }
255 let graph_data_width = HASH_LEN + 16;
256 if commit_data_off + num_commits as usize * graph_data_width > body.len() {
257 return Err(Error::CorruptObject(
258 "commit-graph commit data extends past end of file".to_owned(),
259 ));
260 }
261
262 let read_generation_data = generation_off.is_some();
263 let mut bloom_settings = None;
264 let mut chunk_bloom_data = None;
265 if let (Some(_bidx), Some((bdat_off, bdat_len))) = (bloom_idx_off, bloom_data_range) {
266 if bdat_len < BLOOM_HEADER {
267 eprintln!(
268 "warning: ignoring too-small changed-path chunk ({} < {}) in commit-graph file",
269 bdat_len, BLOOM_HEADER
270 );
271 } else if bdat_off + bdat_len <= body.len() {
272 let hdr = &body[bdat_off..bdat_off + BLOOM_HEADER];
273 let hash_version: [u8; 4] = hdr[0..4]
274 .try_into()
275 .map_err(|_| Error::CorruptObject("Bloom header corrupt".to_owned()))?;
276 let num_hashes: [u8; 4] = hdr[4..8]
277 .try_into()
278 .map_err(|_| Error::CorruptObject("Bloom header corrupt".to_owned()))?;
279 let bits_per_entry: [u8; 4] = hdr[8..12]
280 .try_into()
281 .map_err(|_| Error::CorruptObject("Bloom header corrupt".to_owned()))?;
282 bloom_settings = Some(BloomFilterSettings {
283 hash_version: u32::from_be_bytes(hash_version),
284 num_hashes: u32::from_be_bytes(num_hashes),
285 bits_per_entry: u32::from_be_bytes(bits_per_entry),
286 max_changed_paths: 512,
287 });
288 chunk_bloom_data = Some((bdat_off, bdat_len));
289 }
290 }
291
292 let bloom_indexes_ok = if let (Some(bidx), Some(bsize)) = (bloom_idx_off, bidx_len) {
293 if bsize / 4 != num_commits as usize {
294 eprintln!("warning: commit-graph changed-path index chunk is too small");
295 false
296 } else if bidx + bsize > body.len() {
297 eprintln!("warning: commit-graph changed-path index chunk is too small");
298 false
299 } else {
300 true
301 }
302 } else {
303 false
304 };
305 let bloom_pair_ok = bloom_settings.is_some()
306 && chunk_bloom_data.is_some()
307 && bloom_indexes_ok
308 && chunk_bloom_data.is_some_and(|(_, len)| len >= BLOOM_HEADER);
309
310 let (chunk_bloom_indexes, bloom_settings) = if bloom_pair_ok {
311 (bloom_idx_off, bloom_settings)
312 } else {
313 (None, None)
314 };
315 let chunk_bloom_data = if bloom_pair_ok {
316 chunk_bloom_data
317 } else {
318 None
319 };
320
321 Ok(Self {
322 path,
323 body,
324 num_commits,
325 oid_lookup_off,
326 chunk_commit_data_off: commit_data_off,
327 chunk_generation_data: generation_off,
328 read_generation_data,
329 chunk_bloom_indexes,
330 chunk_bloom_data,
331 bloom_settings,
332 bloom_disabled: false,
333 })
334 }
335
336 fn parse(path: PathBuf, raw: Vec<u8>) -> Option<Self> {
337 Self::try_parse(path, raw).ok()
338 }
339
340 fn oid_at_lex(&self, lex_index: u32) -> Option<ObjectId> {
341 if lex_index >= self.num_commits {
342 return None;
343 }
344 let off = self.oid_lookup_off + lex_index as usize * HASH_LEN;
345 ObjectId::from_bytes(self.body.get(off..off + HASH_LEN)?.try_into().ok()?).ok()
346 }
347
348 fn bsearch_oid(&self, oid: &ObjectId) -> Option<u32> {
349 let mut lo = 0u32;
350 let mut hi = self.num_commits;
351 let bytes = oid.as_bytes();
352 while lo < hi {
353 let mid = (lo + hi) / 2;
354 let off = self.oid_lookup_off + mid as usize * HASH_LEN;
355 let slice = &self.body[off..off + HASH_LEN];
356 match slice.cmp(bytes) {
357 std::cmp::Ordering::Less => lo = mid + 1,
358 std::cmp::Ordering::Greater => hi = mid,
359 std::cmp::Ordering::Equal => return Some(mid),
360 }
361 }
362 None
363 }
364
365 fn disable_bloom(&mut self) {
366 self.chunk_bloom_indexes = None;
367 self.chunk_bloom_data = None;
368 self.bloom_settings = None;
369 self.bloom_disabled = true;
370 }
371
372 fn layer_display_id(&self) -> String {
373 self.path
374 .file_stem()
375 .and_then(|s| s.to_str())
376 .map(|s| s.strip_prefix("graph-").unwrap_or(s).to_string())
377 .unwrap_or_else(|| "commit-graph".to_string())
378 }
379
380 fn bloom_filter_slice(&self, lex_index: u32) -> Option<&[u8]> {
381 let _settings = self.bloom_settings.as_ref()?;
382 let bidx_base = self.chunk_bloom_indexes?;
383 let (bdat_off, bdat_total) = self.chunk_bloom_data?;
384 let graph_warn = warn_path_for_graph_file(self.path.as_path());
385 if lex_index >= self.num_commits {
386 return None;
387 }
388 let payload_len = bdat_total.saturating_sub(BLOOM_HEADER);
389 let end_rel = u32::from_be_bytes(
390 self.body[bidx_base + lex_index as usize * 4..bidx_base + lex_index as usize * 4 + 4]
391 .try_into()
392 .ok()?,
393 ) as usize;
394 let start_rel = if lex_index == 0 {
395 0usize
396 } else {
397 u32::from_be_bytes(
398 self.body[bidx_base + (lex_index as usize - 1) * 4
399 ..bidx_base + (lex_index as usize - 1) * 4 + 4]
400 .try_into()
401 .ok()?,
402 ) as usize
403 };
404 let max_payload = payload_len;
410 if end_rel > max_payload {
411 eprintln!(
412 "warning: ignoring out-of-range offset ({end_rel}) for changed-path filter at pos {} of {} (chunk size: {bdat_total})",
413 lex_index,
414 graph_warn,
415 bdat_total = bdat_total
416 );
417 return None;
418 }
419 if start_rel > max_payload {
420 eprintln!(
421 "warning: ignoring out-of-range offset ({start_rel}) for changed-path filter at pos {} of {} (chunk size: {bdat_total})",
422 lex_index.saturating_sub(1),
423 graph_warn,
424 bdat_total = bdat_total
425 );
426 return None;
427 }
428 if end_rel < start_rel {
429 eprintln!(
430 "warning: ignoring decreasing changed-path index offsets ({start_rel} > {end_rel}) for positions {} and {} of {}",
431 lex_index.saturating_sub(1),
432 lex_index,
433 graph_warn
434 );
435 return None;
436 }
437 let data_base = bdat_off + BLOOM_HEADER;
438 let abs_start = data_base + start_rel;
439 let abs_end = data_base + end_rel;
440 if abs_end > bdat_off + bdat_total || abs_start > abs_end {
441 return None;
442 }
443 Some(&self.body[abs_start..abs_end])
444 }
445}
446
447#[derive(Debug, Clone, Copy, PartialEq, Eq)]
449pub enum BloomPrecheck {
450 Inapplicable,
452 NotInGraph,
454 FilterNotPresent,
456 DefinitelyNot,
458 Maybe,
460}
461
462#[derive(Debug, Default, Clone)]
464pub struct BloomWalkStats {
465 pub filter_not_present: u32,
466 pub maybe: u32,
467 pub definitely_not: u32,
468 pub false_positive: u32,
469}
470
471impl BloomWalkStats {
472 pub fn record_precheck(&mut self, pre: BloomPrecheck) {
473 match pre {
474 BloomPrecheck::Inapplicable | BloomPrecheck::NotInGraph => {}
475 BloomPrecheck::FilterNotPresent => self.filter_not_present += 1,
476 BloomPrecheck::DefinitelyNot => self.definitely_not += 1,
477 BloomPrecheck::Maybe => self.maybe += 1,
478 }
479 }
480
481 pub fn record_false_positive(&mut self) {
482 self.false_positive += 1;
483 }
484}
485
486pub type BloomWalkStatsHandle = Arc<Mutex<BloomWalkStats>>;
488
489#[derive(Debug, Clone)]
491pub struct CommitGraphChain {
492 layers: Vec<CommitGraphLayer>,
493}
494
495impl CommitGraphChain {
496 #[must_use]
498 pub fn top_layer_bloom_settings(&self) -> Option<BloomFilterSettings> {
499 self.layers.first()?.bloom_settings
500 }
501
502 #[must_use]
504 pub fn total_commits(&self) -> u32 {
505 self.layers.iter().map(|l| l.num_commits).sum()
506 }
507
508 #[must_use]
510 pub fn layer_paths_oldest_first(&self) -> Vec<PathBuf> {
511 self.layers.iter().rev().map(|l| l.path.clone()).collect()
512 }
513
514 pub fn try_load(objects_dir: &Path) -> Result<Option<Self>, Error> {
519 let info = objects_dir.join("info");
520 let chain_path = info.join("commit-graphs").join("commit-graph-chain");
521 if chain_path.is_file() {
522 let content = std::fs::read_to_string(&chain_path).map_err(Error::from)?;
523 let mut layers = Vec::new();
524 for line in content.lines() {
525 let h = line.trim();
526 if h.len() != 40 {
527 continue;
528 }
529 let graph_path = info.join("commit-graphs").join(format!("graph-{h}.graph"));
530 let raw = std::fs::read(&graph_path).map_err(Error::from)?;
531 let layer = CommitGraphLayer::try_parse(graph_path, raw)?;
532 layers.push(layer);
533 }
534 if layers.is_empty() {
535 return Ok(None);
536 }
537 layers.reverse();
541 let mut chain = Self { layers };
542 chain.validate_bloom_compatibility();
543 return Ok(Some(chain));
544 }
545 let single = info.join("commit-graph");
546 if single.is_file() {
547 let raw = std::fs::read(&single).map_err(Error::from)?;
548 let layer = CommitGraphLayer::try_parse(single.clone(), raw)?;
549 let mut chain = Self {
550 layers: vec![layer],
551 };
552 chain.validate_bloom_compatibility();
553 return Ok(Some(chain));
554 }
555 Ok(None)
556 }
557
558 pub fn load(objects_dir: &Path) -> Option<Self> {
560 Self::try_load(objects_dir).ok().flatten()
561 }
562
563 fn validate_bloom_compatibility(&mut self) {
564 let mut ref_settings: Option<BloomFilterSettings> = None;
570 for layer in &mut self.layers {
571 let Some(bs) = layer.bloom_settings else {
572 continue;
573 };
574 match ref_settings {
575 None => ref_settings = Some(bs),
576 Some(r) => {
577 if r.hash_version != bs.hash_version
578 || r.num_hashes != bs.num_hashes
579 || r.bits_per_entry != bs.bits_per_entry
580 {
581 let id = layer.layer_display_id();
582 if warn_once_for_disabled_bloom_layer(&id) {
588 eprintln!(
589 "warning: disabling Bloom filters for commit-graph layer '{id}' due to incompatible settings"
590 );
591 }
592 layer.disable_bloom();
593 }
594 }
595 }
596 }
597 }
598
599 pub fn existing_filter_bytes(
605 &self,
606 oid: &ObjectId,
607 want: &BloomFilterSettings,
608 ) -> Option<Vec<u8>> {
609 let (layer_idx, lex) = self.find_commit(oid)?;
610 let layer = &self.layers[layer_idx];
611 let settings = layer.bloom_settings.as_ref()?;
612 if settings.hash_version != want.hash_version
613 || settings.num_hashes != want.num_hashes
614 || settings.bits_per_entry != want.bits_per_entry
615 {
616 return None;
617 }
618 match layer.bloom_filter_slice(lex) {
625 Some(s) if !s.is_empty() => Some(s.to_vec()),
626 _ => None,
627 }
628 }
629
630 pub fn upgradable_filter_bytes(
636 &self,
637 oid: &ObjectId,
638 want: &BloomFilterSettings,
639 ) -> Option<Vec<u8>> {
640 let (layer_idx, lex) = self.find_commit(oid)?;
641 let layer = &self.layers[layer_idx];
642 let settings = layer.bloom_settings.as_ref()?;
643 if settings.hash_version == want.hash_version {
644 return None;
645 }
646 if settings.num_hashes != want.num_hashes || settings.bits_per_entry != want.bits_per_entry
647 {
648 return None;
649 }
650 match layer.bloom_filter_slice(lex) {
651 Some(s) if !s.is_empty() => Some(s.to_vec()),
652 _ => None,
653 }
654 }
655
656 pub fn find_commit(&self, oid: &ObjectId) -> Option<(usize, u32)> {
658 for (i, layer) in self.layers.iter().enumerate() {
659 if let Some(lex) = layer.bsearch_oid(oid) {
660 return Some((i, lex));
661 }
662 }
663 None
664 }
665
666 pub fn global_position(&self, oid: &ObjectId) -> Option<u32> {
668 let (layer_idx, lex) = self.find_commit(oid)?;
669 let below: u32 = self.layers[layer_idx + 1..]
670 .iter()
671 .map(|l| l.num_commits)
672 .sum();
673 Some(below + lex)
674 }
675
676 pub fn all_oids_in_order(&self) -> Vec<ObjectId> {
678 let mut out = Vec::new();
679 for layer in self.layers.iter().rev() {
680 for i in 0..layer.num_commits {
681 if let Some(oid) = layer.oid_at_lex(i) {
682 out.push(oid);
683 }
684 }
685 }
686 out
687 }
688
689 pub fn bloom_precheck_for_paths(
691 &self,
692 _odb: &Odb,
693 oid: ObjectId,
694 pathspecs: &[String],
695 bloom_cwd: Option<&str>,
696 requested_hash_version: i32,
697 read_changed_paths: bool,
698 ) -> std::result::Result<BloomPrecheck, crate::error::Error> {
699 if !read_changed_paths {
700 return Ok(BloomPrecheck::Inapplicable);
701 }
702 let Some((layer_idx, lex)) = self.find_commit(&oid) else {
703 return Ok(BloomPrecheck::NotInGraph);
704 };
705 let layer = &self.layers[layer_idx];
706 let Some(settings) = layer.bloom_settings.as_ref() else {
707 return Ok(BloomPrecheck::FilterNotPresent);
708 };
709 let effective_version = if requested_hash_version < 0 {
710 settings.hash_version as i32
711 } else {
712 requested_hash_version
713 };
714 if effective_version != settings.hash_version as i32 {
715 return Ok(BloomPrecheck::FilterNotPresent);
716 }
717
718 let filter = match layer.bloom_filter_slice(lex) {
724 Some(s) => s,
725 None => return Ok(BloomPrecheck::FilterNotPresent),
726 };
727 if filter.is_empty() {
728 return Ok(BloomPrecheck::FilterNotPresent);
729 }
730
731 let mut any_pathspec_maybe = false;
734 let mut checked_any_keys = false;
735 for spec in pathspecs {
736 if spec.is_empty() || crate::pathspec::pathspec_is_exclude(spec) {
737 continue;
738 }
739 let Some(norm) = crate::pathspec::bloom_lookup_prefix_with_cwd(spec, bloom_cwd) else {
740 continue;
741 };
742 let keys = bloom_keyvec_for_path(norm.as_str(), settings);
743 if keys.is_empty() {
744 continue;
745 }
746 checked_any_keys = true;
747 let mut all_keys_maybe = true;
748 for key in &keys {
749 match bloom_filter_contains(key, filter, settings) {
750 Ok(true) => {}
751 Ok(false) => {
752 all_keys_maybe = false;
753 break;
754 }
755 Err(()) => {
756 all_keys_maybe = true;
757 break;
758 }
759 }
760 }
761 if all_keys_maybe {
762 any_pathspec_maybe = true;
763 break;
764 }
765 }
766 if checked_any_keys && !any_pathspec_maybe {
767 return Ok(BloomPrecheck::DefinitelyNot);
768 }
769 Ok(BloomPrecheck::Maybe)
770 }
771}
772
773pub fn diff_changed_paths_for_bloom(
775 odb: &Odb,
776 parent_tree: Option<ObjectId>,
777 commit_tree: ObjectId,
778) -> crate::error::Result<(Vec<String>, usize)> {
779 use crate::diff::diff_trees;
780 let entries = diff_trees(odb, parent_tree.as_ref(), Some(&commit_tree), "")?;
781 let raw_len = entries.len();
782 let mut paths = Vec::new();
783 for e in entries {
784 let p = e.path().to_string();
785 if !p.is_empty() {
786 paths.push(p);
787 }
788 }
789 Ok((paths, raw_len))
790}
791
792pub use crate::bloom::collect_changed_paths_for_bloom;
794
795pub fn bloom_filter_for_commit_write(
797 odb: &Odb,
798 parents: &[ObjectId],
799 tree_oid: ObjectId,
800 settings: &BloomFilterSettings,
801) -> crate::error::Result<(Vec<u8>, BloomBuildOutcome)> {
802 let (changed_paths_vec, raw_count) = if let Some(first_parent) = parents.first() {
806 let p = load_commit_tree(odb, *first_parent)?;
807 diff_changed_paths_for_bloom(odb, Some(p), tree_oid)?
808 } else {
809 diff_changed_paths_for_bloom(odb, None, tree_oid)?
810 };
811 let set = collect_changed_paths_for_bloom(&changed_paths_vec);
812 Ok(crate::bloom::build_bloom_filter_data(
813 &set, raw_count, settings,
814 ))
815}
816
817fn load_commit_tree(odb: &Odb, commit_oid: ObjectId) -> crate::error::Result<ObjectId> {
818 let obj = odb.read(&commit_oid)?;
819 let c = crate::objects::parse_commit(&obj.data)?;
820 Ok(c.tree)
821}
822
823fn tree_has_high_bit_paths(odb: &Odb, tree_oid: ObjectId) -> bool {
828 let Ok(obj) = odb.read(&tree_oid) else {
829 return true;
831 };
832 let Ok(entries) = crate::objects::parse_tree(&obj.data) else {
833 return true;
834 };
835 for e in &entries {
836 if e.name.iter().any(|&b| b & 0x80 != 0) {
837 return true;
838 }
839 if e.mode == 0o040000 && tree_has_high_bit_paths(odb, e.oid) {
840 return true;
841 }
842 }
843 false
844}
845
846pub fn commit_tree_has_high_bit_paths(odb: &Odb, commit_oid: ObjectId) -> bool {
849 match load_commit_tree(odb, commit_oid) {
850 Ok(tree) => tree_has_high_bit_paths(odb, tree),
851 Err(_) => true,
852 }
853}
854
855pub fn parse_graph_file(path: &Path) -> Option<ParsedGraphDump> {
857 let raw = std::fs::read(path).ok()?;
858 if raw.len() < 28 {
859 return None;
860 }
861 let body = &raw[..raw.len() - HASH_LEN];
862 if body.len() < 8 || &body[0..4] != SIGNATURE {
863 return None;
864 }
865 let header_word = u32::from_be_bytes(body[0..4].try_into().ok()?);
866 let num_chunks = body[6] as usize;
867 let mut chunk_names = Vec::new();
868 let toc_start = 8;
869 for i in 0..num_chunks {
870 let e = toc_start + i * 12;
871 let id = u32::from_be_bytes(body[e..e + 4].try_into().ok()?);
872 let name = match id {
873 CHUNK_OID_FANOUT => "oid_fanout",
874 CHUNK_OID_LOOKUP => "oid_lookup",
875 CHUNK_COMMIT_DATA => "commit_metadata",
876 CHUNK_GENERATION_DATA => "generation_data",
877 CHUNK_GENERATION_DATA_OVERFLOW => "generation_data_overflow",
878 CHUNK_EXTRA_EDGES => "extra_edges",
879 CHUNK_BLOOM_INDEXES => "bloom_indexes",
880 CHUNK_BLOOM_DATA => "bloom_data",
881 _ => "unknown",
882 };
883 chunk_names.push(name.to_string());
884 }
885 let layer = CommitGraphLayer::parse(path.to_path_buf(), raw.clone())?;
886 let bloom_opt = layer.bloom_settings.map(|s| {
887 format!(
888 " bloom({},{},{})",
889 s.hash_version, s.bits_per_entry, s.num_hashes
890 )
891 });
892 let mut options = String::new();
893 if let Some(b) = bloom_opt {
894 options.push_str(&b);
895 }
896 if layer.read_generation_data {
897 options.push_str(" read_generation_data");
898 }
899 Some(ParsedGraphDump {
900 header_word,
901 version: body[4],
902 hash_ver: body[5],
903 num_chunks: body[6],
904 reserved: body[7],
905 num_commits: layer.num_commits,
906 chunks: chunk_names.join(" "),
907 options,
908 })
909}
910
911pub struct ParsedGraphDump {
912 pub header_word: u32,
913 pub version: u8,
914 pub hash_ver: u8,
915 pub num_chunks: u8,
916 pub reserved: u8,
917 pub num_commits: u32,
918 pub chunks: String,
919 pub options: String,
920}
921
922pub fn dump_bloom_filters(path: &Path) -> Option<Vec<String>> {
924 let raw = std::fs::read(path).ok()?;
925 let layer = CommitGraphLayer::parse(path.to_path_buf(), raw)?;
926 let mut out = Vec::new();
927 for i in 0..layer.num_commits {
928 let slice = layer.bloom_filter_slice(i).unwrap_or(&[]);
929 if slice.is_empty() {
930 out.push(String::new());
931 } else {
932 let hex: String = slice.iter().map(|b| format!("{b:02x}")).collect();
933 out.push(hex);
934 }
935 }
936 Some(out)
937}