Skip to main content

grit_lib/
commit_graph_file.rs

1//! Parsing Git commit-graph files and Bloom filter lookup (`commit-graph.c` / `bloom.c` compatible).
2
3use std::path::{Path, PathBuf};
4use std::sync::{Arc, Mutex};
5
6use crate::bloom::{
7    bloom_filter_contains, bloom_keyvec_for_path, BloomBuildOutcome, BloomFilterSettings,
8};
9use crate::error::Error;
10use crate::objects::ObjectId;
11use crate::odb::Odb;
12
13const SIGNATURE: &[u8; 4] = b"CGPH";
14const GRAPH_VERSION: u8 = 1;
15const HASH_VERSION_SHA1: u8 = 1;
16const HASH_LEN: usize = 20;
17
18const CHUNK_OID_FANOUT: u32 = 0x4f49_4446; // OIDF
19const CHUNK_OID_LOOKUP: u32 = 0x4f49_444c; // OIDL
20const CHUNK_COMMIT_DATA: u32 = 0x4344_4154; // CDAT
21const CHUNK_GENERATION_DATA: u32 = 0x4744_4132; // GDA2
22const CHUNK_GENERATION_DATA_OVERFLOW: u32 = 0x4744_4f32; // GDO2
23const CHUNK_EXTRA_EDGES: u32 = 0x4544_4745; // EDGE
24const CHUNK_BLOOM_INDEXES: u32 = 0x4249_4458; // BIDX
25const CHUNK_BLOOM_DATA: u32 = 0x4244_4154; // BDAT
26
27const BLOOM_HEADER: usize = crate::bloom::BLOOMDATA_HEADER_LEN;
28
29/// `CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW` — high bit marks GDA2 entries that index GDO2.
30const CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW: u32 = 1u32 << 31;
31
32fn warn_path_for_graph_file(path: &Path) -> String {
33    let s = path.to_string_lossy();
34    if let Some(idx) = s.find(".git/") {
35        return s[idx..].replace('\\', "/");
36    }
37    s.replace('\\', "/")
38}
39
40/// One layer from `.git/objects/info/commit-graph` or `commit-graphs/<hash>.graph`.
41#[derive(Debug, Clone)]
42pub struct CommitGraphLayer {
43    pub path: PathBuf,
44    body: Vec<u8>,
45    num_commits: u32,
46    oid_lookup_off: usize,
47    #[allow(dead_code)]
48    chunk_commit_data_off: usize,
49    #[allow(dead_code)]
50    chunk_generation_data: Option<usize>,
51    read_generation_data: bool,
52    chunk_bloom_indexes: Option<usize>,
53    chunk_bloom_data: Option<(usize, usize)>,
54    bloom_settings: Option<BloomFilterSettings>,
55    bloom_disabled: bool,
56}
57
58impl CommitGraphLayer {
59    /// Parse a commit-graph layer; fails if generation overflow chunk is inconsistent with GDA2.
60    pub fn try_parse(path: PathBuf, raw: Vec<u8>) -> Result<Self, Error> {
61        if raw.len() < 28 {
62            return Err(Error::CorruptObject(
63                "commit-graph file too small".to_owned(),
64            ));
65        }
66        let body = raw[..raw.len() - HASH_LEN].to_vec();
67        if body.len() < 8 || &body[0..4] != SIGNATURE {
68            return Err(Error::CorruptObject(
69                "commit-graph has bad signature".to_owned(),
70            ));
71        }
72        if body[4] != GRAPH_VERSION || body[5] != HASH_VERSION_SHA1 {
73            return Err(Error::CorruptObject(format!(
74                "commit-graph version/hash not supported (version {} hash {})",
75                body[4], body[5]
76            )));
77        }
78        let num_chunks = body[6] as usize;
79        let toc_start = 8;
80        let toc_end = toc_start + (num_chunks + 1) * 12;
81        if body.len() < toc_end {
82            return Err(Error::CorruptObject(
83                "commit-graph truncated at chunk table".to_owned(),
84            ));
85        }
86
87        let mut fanout_off = None;
88        let mut oid_lookup_off = None;
89        let mut commit_data_off = None;
90        let mut generation_off = None;
91        let mut generation_overflow_off = None;
92        let mut bloom_idx_off = None;
93        let mut bloom_data_range = None;
94        let mut chunk_offsets: Vec<usize> = Vec::new();
95        let mut toc_entries: Vec<(u32, usize)> = Vec::with_capacity(num_chunks);
96
97        for i in 0..num_chunks {
98            let e = toc_start + i * 12;
99            let id = u32::from_be_bytes(
100                body[e..e + 4]
101                    .try_into()
102                    .map_err(|_| Error::CorruptObject("commit-graph bad TOC".to_owned()))?,
103            );
104            let off = u64::from_be_bytes(
105                body[e + 4..e + 12]
106                    .try_into()
107                    .map_err(|_| Error::CorruptObject("commit-graph bad TOC".to_owned()))?,
108            ) as usize;
109            toc_entries.push((id, off));
110            chunk_offsets.push(off);
111            match id {
112                CHUNK_OID_FANOUT => fanout_off = Some(off),
113                CHUNK_OID_LOOKUP => oid_lookup_off = Some(off),
114                CHUNK_COMMIT_DATA => commit_data_off = Some(off),
115                CHUNK_GENERATION_DATA => generation_off = Some(off),
116                CHUNK_GENERATION_DATA_OVERFLOW => generation_overflow_off = Some(off),
117                CHUNK_BLOOM_INDEXES => bloom_idx_off = Some(off),
118                CHUNK_BLOOM_DATA => {
119                    let end = if i + 1 < num_chunks {
120                        let e2 = toc_start + (i + 1) * 12;
121                        u64::from_be_bytes(body[e2 + 4..e2 + 12].try_into().unwrap_or([0u8; 8]))
122                            as usize
123                    } else {
124                        let term = toc_start + num_chunks * 12;
125                        u64::from_be_bytes(body[term + 4..term + 12].try_into().unwrap_or([0u8; 8]))
126                            as usize
127                    };
128                    bloom_data_range = Some((off, end.saturating_sub(off)));
129                }
130                _ => {}
131            }
132        }
133        let file_end = u64::from_be_bytes(
134            body[toc_start + num_chunks * 12 + 4..toc_start + num_chunks * 12 + 12]
135                .try_into()
136                .map_err(|_| Error::CorruptObject("commit-graph bad file end".to_owned()))?,
137        ) as usize;
138        chunk_offsets.push(file_end);
139        chunk_offsets.sort_unstable();
140        chunk_offsets.dedup();
141
142        fn chunk_byte_range(
143            start: usize,
144            toc_entries: &[(u32, usize)],
145            file_end: usize,
146        ) -> Result<usize, Error> {
147            let mut ends: Vec<usize> = toc_entries
148                .iter()
149                .map(|&(_, o)| o)
150                .filter(|&o| o > start)
151                .collect();
152            ends.sort_unstable();
153            let end = ends.first().copied().unwrap_or(file_end);
154            if end < start {
155                return Err(Error::CorruptObject(
156                    "commit-graph chunk layout invalid".to_owned(),
157                ));
158            }
159            Ok(end)
160        }
161
162        if let Some(gda) = generation_off {
163            let gda_end = chunk_byte_range(gda, &toc_entries, file_end)?;
164            let gda_len = gda_end.saturating_sub(gda);
165            let num_commits = fanout_off
166                .and_then(|fo| {
167                    let slice = body.get(fo + 255 * 4..fo + 256 * 4)?;
168                    Some(u32::from_be_bytes(slice.try_into().ok()?))
169                })
170                .ok_or_else(|| Error::CorruptObject("commit-graph missing fanout".to_owned()))?;
171            let expected = num_commits as usize * 4;
172            if gda_len < expected {
173                return Err(Error::CorruptObject(
174                    "commit-graph generation data chunk is too small".to_owned(),
175                ));
176            }
177            let gda_slice = body.get(gda..gda + expected).ok_or_else(|| {
178                Error::CorruptObject("commit-graph generation data OOB".to_owned())
179            })?;
180            let mut max_overflow_idx: Option<u32> = None;
181            for w in 0..num_commits as usize {
182                let v =
183                    u32::from_be_bytes(gda_slice[w * 4..w * 4 + 4].try_into().map_err(|_| {
184                        Error::CorruptObject("commit-graph GDA2 corrupt".to_owned())
185                    })?);
186                if v & CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW != 0 {
187                    let pos = v ^ CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW;
188                    max_overflow_idx = Some(match max_overflow_idx {
189                        None => pos,
190                        Some(m) => m.max(pos),
191                    });
192                }
193            }
194            if let Some(pos) = max_overflow_idx {
195                let Some(gdo_start) = generation_overflow_off else {
196                    return Err(Error::CorruptObject(
197                        "commit-graph requires overflow generation data but has none".to_owned(),
198                    ));
199                };
200                let gdo_end = chunk_byte_range(gdo_start, &toc_entries, file_end)?;
201                let overflow_bytes = gdo_end.saturating_sub(gdo_start);
202                let n_slots = overflow_bytes / 8;
203                if n_slots <= pos as usize {
204                    return Err(Error::CorruptObject(
205                        "commit-graph overflow generation data is too small".to_owned(),
206                    ));
207                }
208            }
209        }
210        let bidx_len = bloom_idx_off.and_then(|b| {
211            chunk_offsets
212                .iter()
213                .find(|&&o| o > b)
214                .map(|&next| next.saturating_sub(b))
215        });
216
217        let fanout_off = fanout_off.ok_or_else(|| {
218            Error::CorruptObject("commit-graph missing OID fanout chunk".to_owned())
219        })?;
220        let oid_lookup_off = oid_lookup_off.ok_or_else(|| {
221            Error::CorruptObject("commit-graph missing OID lookup chunk".to_owned())
222        })?;
223        let commit_data_off = commit_data_off.ok_or_else(|| {
224            Error::CorruptObject("commit-graph missing commit data chunk".to_owned())
225        })?;
226        if fanout_off + 256 * 4 > body.len() || oid_lookup_off + 4 > body.len() {
227            return Err(Error::CorruptObject(
228                "commit-graph chunk extends past end of file".to_owned(),
229            ));
230        }
231        let num_commits = u32::from_be_bytes(
232            body[fanout_off + 255 * 4..fanout_off + 256 * 4]
233                .try_into()
234                .map_err(|_| Error::CorruptObject("commit-graph fanout corrupt".to_owned()))?,
235        );
236        if oid_lookup_off + num_commits as usize * HASH_LEN > body.len() {
237            return Err(Error::CorruptObject(
238                "commit-graph OID lookup extends past end of file".to_owned(),
239            ));
240        }
241        let graph_data_width = HASH_LEN + 16;
242        if commit_data_off + num_commits as usize * graph_data_width > body.len() {
243            return Err(Error::CorruptObject(
244                "commit-graph commit data extends past end of file".to_owned(),
245            ));
246        }
247
248        let read_generation_data = generation_off.is_some();
249        let mut bloom_settings = None;
250        let mut chunk_bloom_data = None;
251        if let (Some(_bidx), Some((bdat_off, bdat_len))) = (bloom_idx_off, bloom_data_range) {
252            if bdat_len < BLOOM_HEADER {
253                eprintln!(
254                    "warning: ignoring too-small changed-path chunk ({} < {}) in commit-graph file",
255                    bdat_len, BLOOM_HEADER
256                );
257            } else if bdat_off + bdat_len <= body.len() {
258                let hdr = &body[bdat_off..bdat_off + BLOOM_HEADER];
259                let hash_version: [u8; 4] = hdr[0..4]
260                    .try_into()
261                    .map_err(|_| Error::CorruptObject("Bloom header corrupt".to_owned()))?;
262                let num_hashes: [u8; 4] = hdr[4..8]
263                    .try_into()
264                    .map_err(|_| Error::CorruptObject("Bloom header corrupt".to_owned()))?;
265                let bits_per_entry: [u8; 4] = hdr[8..12]
266                    .try_into()
267                    .map_err(|_| Error::CorruptObject("Bloom header corrupt".to_owned()))?;
268                bloom_settings = Some(BloomFilterSettings {
269                    hash_version: u32::from_be_bytes(hash_version),
270                    num_hashes: u32::from_be_bytes(num_hashes),
271                    bits_per_entry: u32::from_be_bytes(bits_per_entry),
272                    max_changed_paths: 512,
273                });
274                chunk_bloom_data = Some((bdat_off, bdat_len));
275            }
276        }
277
278        let bloom_indexes_ok = if let (Some(bidx), Some(bsize)) = (bloom_idx_off, bidx_len) {
279            if bsize / 4 != num_commits as usize {
280                eprintln!("warning: commit-graph changed-path index chunk is too small");
281                false
282            } else if bidx + bsize > body.len() {
283                eprintln!("warning: commit-graph changed-path index chunk is too small");
284                false
285            } else {
286                true
287            }
288        } else {
289            false
290        };
291        let bloom_pair_ok = bloom_settings.is_some()
292            && chunk_bloom_data.is_some()
293            && bloom_indexes_ok
294            && chunk_bloom_data.is_some_and(|(_, len)| len >= BLOOM_HEADER);
295
296        let (chunk_bloom_indexes, bloom_settings) = if bloom_pair_ok {
297            (bloom_idx_off, bloom_settings)
298        } else {
299            (None, None)
300        };
301        let chunk_bloom_data = if bloom_pair_ok {
302            chunk_bloom_data
303        } else {
304            None
305        };
306
307        Ok(Self {
308            path,
309            body,
310            num_commits,
311            oid_lookup_off,
312            chunk_commit_data_off: commit_data_off,
313            chunk_generation_data: generation_off,
314            read_generation_data,
315            chunk_bloom_indexes,
316            chunk_bloom_data,
317            bloom_settings,
318            bloom_disabled: false,
319        })
320    }
321
322    fn parse(path: PathBuf, raw: Vec<u8>) -> Option<Self> {
323        Self::try_parse(path, raw).ok()
324    }
325
326    fn oid_at_lex(&self, lex_index: u32) -> Option<ObjectId> {
327        if lex_index >= self.num_commits {
328            return None;
329        }
330        let off = self.oid_lookup_off + lex_index as usize * HASH_LEN;
331        ObjectId::from_bytes(self.body.get(off..off + HASH_LEN)?.try_into().ok()?).ok()
332    }
333
334    fn bsearch_oid(&self, oid: &ObjectId) -> Option<u32> {
335        let mut lo = 0u32;
336        let mut hi = self.num_commits;
337        let bytes = oid.as_bytes();
338        while lo < hi {
339            let mid = (lo + hi) / 2;
340            let off = self.oid_lookup_off + mid as usize * HASH_LEN;
341            let slice = &self.body[off..off + HASH_LEN];
342            match slice.cmp(bytes) {
343                std::cmp::Ordering::Less => lo = mid + 1,
344                std::cmp::Ordering::Greater => hi = mid,
345                std::cmp::Ordering::Equal => return Some(mid),
346            }
347        }
348        None
349    }
350
351    fn disable_bloom(&mut self) {
352        self.chunk_bloom_indexes = None;
353        self.chunk_bloom_data = None;
354        self.bloom_settings = None;
355        self.bloom_disabled = true;
356    }
357
358    fn layer_display_id(&self) -> String {
359        self.path
360            .file_stem()
361            .and_then(|s| s.to_str())
362            .map(|s| s.strip_prefix("graph-").unwrap_or(s).to_string())
363            .unwrap_or_else(|| "commit-graph".to_string())
364    }
365
366    fn bloom_filter_slice(&self, lex_index: u32) -> Option<&[u8]> {
367        let _settings = self.bloom_settings.as_ref()?;
368        let bidx_base = self.chunk_bloom_indexes?;
369        let (bdat_off, bdat_total) = self.chunk_bloom_data?;
370        let graph_warn = warn_path_for_graph_file(self.path.as_path());
371        if lex_index >= self.num_commits {
372            return None;
373        }
374        let payload_len = bdat_total.saturating_sub(BLOOM_HEADER);
375        let end_rel = u32::from_be_bytes(
376            self.body[bidx_base + lex_index as usize * 4..bidx_base + lex_index as usize * 4 + 4]
377                .try_into()
378                .ok()?,
379        ) as usize;
380        let start_rel = if lex_index == 0 {
381            0usize
382        } else {
383            u32::from_be_bytes(
384                self.body[bidx_base + (lex_index as usize - 1) * 4
385                    ..bidx_base + (lex_index as usize - 1) * 4 + 4]
386                    .try_into()
387                    .ok()?,
388            ) as usize
389        };
390        if end_rel < start_rel {
391            eprintln!(
392                "warning: ignoring decreasing changed-path index offsets ({start_rel} > {end_rel}) for positions {} and {} of {}",
393                lex_index.saturating_sub(1),
394                lex_index,
395                graph_warn
396            );
397            return None;
398        }
399        let max_payload = payload_len;
400        if end_rel > max_payload {
401            eprintln!(
402                "warning: ignoring out-of-range offset ({end_rel}) for changed-path filter at pos {} of {} (chunk size: {bdat_total})",
403                lex_index,
404                graph_warn,
405                bdat_total = bdat_total
406            );
407            return None;
408        }
409        if start_rel > max_payload {
410            eprintln!(
411                "warning: ignoring out-of-range offset ({start_rel}) for changed-path filter at pos {} of {} (chunk size: {bdat_total})",
412                lex_index.saturating_sub(1),
413                graph_warn,
414                bdat_total = bdat_total
415            );
416            return None;
417        }
418        let data_base = bdat_off + BLOOM_HEADER;
419        let abs_start = data_base + start_rel;
420        let abs_end = data_base + end_rel;
421        if abs_end > bdat_off + bdat_total || abs_start > abs_end {
422            return None;
423        }
424        Some(&self.body[abs_start..abs_end])
425    }
426}
427
428/// Result of consulting Bloom filters before running a tree diff (matches `revision.c`).
429#[derive(Debug, Clone, Copy, PartialEq, Eq)]
430pub enum BloomPrecheck {
431    /// No commit-graph, pathspecs disallow Bloom, or wrong parent count — no statistics.
432    Inapplicable,
433    /// Commit not in graph or generation unavailable — skip Bloom (`-1` without `filter_not_present`).
434    NotInGraph,
435    /// Bloom filter missing or unusable (`filter_not_present` in Git).
436    FilterNotPresent,
437    /// Bloom says path cannot be in this commit (`definitely_not`).
438    DefinitelyNot,
439    /// Bloom says maybe — caller must run diff and may count `false_positive`.
440    Maybe,
441}
442
443/// Counters for `GIT_TRACE2_PERF` Bloom statistics (`revision.c` `trace2_bloom_filter_statistics_atexit`).
444#[derive(Debug, Default, Clone)]
445pub struct BloomWalkStats {
446    pub filter_not_present: u32,
447    pub maybe: u32,
448    pub definitely_not: u32,
449    pub false_positive: u32,
450}
451
452impl BloomWalkStats {
453    pub fn record_precheck(&mut self, pre: BloomPrecheck) {
454        match pre {
455            BloomPrecheck::Inapplicable | BloomPrecheck::NotInGraph => {}
456            BloomPrecheck::FilterNotPresent => self.filter_not_present += 1,
457            BloomPrecheck::DefinitelyNot => self.definitely_not += 1,
458            BloomPrecheck::Maybe => self.maybe += 1,
459        }
460    }
461
462    pub fn record_false_positive(&mut self) {
463        self.false_positive += 1;
464    }
465}
466
467/// Shared stats handle for [`crate::rev_list::RevListOptions::bloom_stats`].
468pub type BloomWalkStatsHandle = Arc<Mutex<BloomWalkStats>>;
469
470/// Loaded commit-graph chain (newest layer first, matching `commit-graph-chain` file order).
471#[derive(Debug, Clone)]
472pub struct CommitGraphChain {
473    layers: Vec<CommitGraphLayer>,
474}
475
476impl CommitGraphChain {
477    /// Bloom settings from the newest layer, if that layer carries Bloom data.
478    #[must_use]
479    pub fn top_layer_bloom_settings(&self) -> Option<BloomFilterSettings> {
480        self.layers.first()?.bloom_settings
481    }
482
483    /// Total commits across all layers (Git `num_commits_in_base` offset for new layers).
484    #[must_use]
485    pub fn total_commits(&self) -> u32 {
486        self.layers.iter().map(|l| l.num_commits).sum()
487    }
488
489    /// Layer file paths from oldest base to newest (reverse of chain file order).
490    #[must_use]
491    pub fn layer_paths_oldest_first(&self) -> Vec<PathBuf> {
492        self.layers.iter().rev().map(|l| l.path.clone()).collect()
493    }
494
495    /// Load from `objects/info/commit-graph` or `objects/info/commit-graphs/commit-graph-chain`.
496    ///
497    /// Returns `Ok(None)` when no commit-graph exists. Corrupt graphs (including invalid GDO2)
498    /// return [`Err`].
499    pub fn try_load(objects_dir: &Path) -> Result<Option<Self>, Error> {
500        let info = objects_dir.join("info");
501        let chain_path = info.join("commit-graphs").join("commit-graph-chain");
502        if chain_path.is_file() {
503            let content = std::fs::read_to_string(&chain_path).map_err(Error::from)?;
504            let mut layers = Vec::new();
505            for line in content.lines() {
506                let h = line.trim();
507                if h.len() != 40 {
508                    continue;
509                }
510                let graph_path = info.join("commit-graphs").join(format!("graph-{h}.graph"));
511                let raw = std::fs::read(&graph_path).map_err(Error::from)?;
512                let layer = CommitGraphLayer::try_parse(graph_path, raw)?;
513                layers.push(layer);
514            }
515            if layers.is_empty() {
516                return Ok(None);
517            }
518            let mut chain = Self { layers };
519            chain.validate_bloom_compatibility();
520            return Ok(Some(chain));
521        }
522        let single = info.join("commit-graph");
523        if single.is_file() {
524            let raw = std::fs::read(&single).map_err(Error::from)?;
525            let layer = CommitGraphLayer::try_parse(single.clone(), raw)?;
526            let mut chain = Self {
527                layers: vec![layer],
528            };
529            chain.validate_bloom_compatibility();
530            return Ok(Some(chain));
531        }
532        Ok(None)
533    }
534
535    /// Like [`Self::try_load`] but ignores parse errors (returns `None`).
536    pub fn load(objects_dir: &Path) -> Option<Self> {
537        Self::try_load(objects_dir).ok().flatten()
538    }
539
540    fn validate_bloom_compatibility(&mut self) {
541        let mut ref_settings: Option<BloomFilterSettings> = None;
542        for layer in &mut self.layers {
543            let Some(bs) = layer.bloom_settings else {
544                continue;
545            };
546            match ref_settings {
547                None => ref_settings = Some(bs),
548                Some(r) => {
549                    if r.hash_version != bs.hash_version
550                        || r.num_hashes != bs.num_hashes
551                        || r.bits_per_entry != bs.bits_per_entry
552                    {
553                        let id = layer.layer_display_id();
554                        eprintln!(
555                            "warning: disabling Bloom filters for commit-graph layer '{id}' due to incompatible settings"
556                        );
557                        layer.disable_bloom();
558                    }
559                }
560            }
561        }
562    }
563
564    /// Lexicographic position in the full chain, or `None` if not in any layer.
565    pub fn find_commit(&self, oid: &ObjectId) -> Option<(usize, u32)> {
566        for (i, layer) in self.layers.iter().enumerate() {
567            if let Some(lex) = layer.bsearch_oid(oid) {
568                return Some((i, lex));
569            }
570        }
571        None
572    }
573
574    /// Global commit-graph position (Git `graph_pos`): base layers first, then newer layers.
575    pub fn global_position(&self, oid: &ObjectId) -> Option<u32> {
576        let (layer_idx, lex) = self.find_commit(oid)?;
577        let below: u32 = self.layers[layer_idx + 1..]
578            .iter()
579            .map(|l| l.num_commits)
580            .sum();
581        Some(below + lex)
582    }
583
584    /// All commit OIDs in the chain (oldest base first, then newer layers).
585    pub fn all_oids_in_order(&self) -> Vec<ObjectId> {
586        let mut out = Vec::new();
587        for layer in self.layers.iter().rev() {
588            for i in 0..layer.num_commits {
589                if let Some(oid) = layer.oid_at_lex(i) {
590                    out.push(oid);
591                }
592            }
593        }
594        out
595    }
596
597    /// Consult Bloom filters for a single-parent commit before diffing trees.
598    pub fn bloom_precheck_for_paths(
599        &self,
600        odb: &Odb,
601        oid: ObjectId,
602        pathspecs: &[String],
603        bloom_cwd: Option<&str>,
604        requested_hash_version: i32,
605        read_changed_paths: bool,
606    ) -> std::result::Result<BloomPrecheck, crate::error::Error> {
607        if !read_changed_paths {
608            return Ok(BloomPrecheck::Inapplicable);
609        }
610        let Some((layer_idx, lex)) = self.find_commit(&oid) else {
611            return Ok(BloomPrecheck::NotInGraph);
612        };
613        let layer = &self.layers[layer_idx];
614        let Some(settings) = layer.bloom_settings.as_ref() else {
615            return Ok(BloomPrecheck::FilterNotPresent);
616        };
617        let effective_version = if requested_hash_version < 0 {
618            settings.hash_version as i32
619        } else {
620            requested_hash_version
621        };
622        if effective_version != settings.hash_version as i32 {
623            return Ok(BloomPrecheck::FilterNotPresent);
624        }
625
626        let commit = crate::objects::parse_commit(&odb.read(&oid)?.data)?;
627        if commit.parents.len() > 1 {
628            return Ok(BloomPrecheck::Inapplicable);
629        }
630
631        let filter = match layer.bloom_filter_slice(lex) {
632            Some(s) => s,
633            None => return Ok(BloomPrecheck::FilterNotPresent),
634        };
635        if filter.is_empty() {
636            return Ok(BloomPrecheck::FilterNotPresent);
637        }
638
639        // Git `bloom_filter_contains_vec`: within one pathspec, every prefix key must match;
640        // multiple pathspecs are ORed (`revision.c` loop over `bloom_keyvecs_nr`).
641        let mut any_pathspec_maybe = false;
642        let mut checked_any_keys = false;
643        for spec in pathspecs {
644            if spec.is_empty() || crate::pathspec::pathspec_is_exclude(spec) {
645                continue;
646            }
647            let Some(norm) = crate::pathspec::bloom_lookup_prefix_with_cwd(spec, bloom_cwd) else {
648                continue;
649            };
650            let keys = bloom_keyvec_for_path(norm.as_str(), settings);
651            if keys.is_empty() {
652                continue;
653            }
654            checked_any_keys = true;
655            let mut all_keys_maybe = true;
656            for key in &keys {
657                match bloom_filter_contains(key, filter, settings) {
658                    Ok(true) => {}
659                    Ok(false) => {
660                        all_keys_maybe = false;
661                        break;
662                    }
663                    Err(()) => {
664                        all_keys_maybe = true;
665                        break;
666                    }
667                }
668            }
669            if all_keys_maybe {
670                any_pathspec_maybe = true;
671                break;
672            }
673        }
674        if checked_any_keys && !any_pathspec_maybe {
675            return Ok(BloomPrecheck::DefinitelyNot);
676        }
677        Ok(BloomPrecheck::Maybe)
678    }
679}
680
681/// Compute changed paths between parent and commit trees (recursive diff, no rename detection).
682pub fn diff_changed_paths_for_bloom(
683    odb: &Odb,
684    parent_tree: Option<ObjectId>,
685    commit_tree: ObjectId,
686) -> crate::error::Result<(Vec<String>, usize)> {
687    use crate::diff::diff_trees;
688    let entries = diff_trees(odb, parent_tree.as_ref(), Some(&commit_tree), "")?;
689    let raw_len = entries.len();
690    let mut paths = Vec::new();
691    for e in entries {
692        let p = e.path().to_string();
693        if !p.is_empty() {
694            paths.push(p);
695        }
696    }
697    Ok((paths, raw_len))
698}
699
700/// Re-export for `commit-graph` write.
701pub use crate::bloom::collect_changed_paths_for_bloom;
702
703/// Build Bloom filter bytes for one commit; returns cumulative size contribution for BIDX.
704pub fn bloom_filter_for_commit_write(
705    odb: &Odb,
706    parents: &[ObjectId],
707    tree_oid: ObjectId,
708    settings: &BloomFilterSettings,
709) -> crate::error::Result<(Vec<u8>, BloomBuildOutcome)> {
710    let (changed_paths_vec, raw_count) = if parents.len() == 1 {
711        let p = load_commit_tree(odb, parents[0])?;
712        diff_changed_paths_for_bloom(odb, Some(p), tree_oid)?
713    } else if parents.is_empty() {
714        diff_changed_paths_for_bloom(odb, None, tree_oid)?
715    } else {
716        return Ok((vec![0xff], BloomBuildOutcome::TruncatedLarge));
717    };
718    let set = collect_changed_paths_for_bloom(&changed_paths_vec);
719    Ok(crate::bloom::build_bloom_filter_data(
720        &set, raw_count, settings,
721    ))
722}
723
724fn load_commit_tree(odb: &Odb, commit_oid: ObjectId) -> crate::error::Result<ObjectId> {
725    let obj = odb.read(&commit_oid)?;
726    let c = crate::objects::parse_commit(&obj.data)?;
727    Ok(c.tree)
728}
729
730/// Parse all chunks for `test-tool read-graph` / debugging.
731pub fn parse_graph_file(path: &Path) -> Option<ParsedGraphDump> {
732    let raw = std::fs::read(path).ok()?;
733    if raw.len() < 28 {
734        return None;
735    }
736    let body = &raw[..raw.len() - HASH_LEN];
737    if body.len() < 8 || &body[0..4] != SIGNATURE {
738        return None;
739    }
740    let header_word = u32::from_be_bytes(body[0..4].try_into().ok()?);
741    let num_chunks = body[6] as usize;
742    let mut chunk_names = Vec::new();
743    let toc_start = 8;
744    for i in 0..num_chunks {
745        let e = toc_start + i * 12;
746        let id = u32::from_be_bytes(body[e..e + 4].try_into().ok()?);
747        let name = match id {
748            CHUNK_OID_FANOUT => "oid_fanout",
749            CHUNK_OID_LOOKUP => "oid_lookup",
750            CHUNK_COMMIT_DATA => "commit_metadata",
751            CHUNK_GENERATION_DATA => "generation_data",
752            CHUNK_GENERATION_DATA_OVERFLOW => "generation_data_overflow",
753            CHUNK_EXTRA_EDGES => "extra_edges",
754            CHUNK_BLOOM_INDEXES => "bloom_indexes",
755            CHUNK_BLOOM_DATA => "bloom_data",
756            _ => "unknown",
757        };
758        chunk_names.push(name.to_string());
759    }
760    let layer = CommitGraphLayer::parse(path.to_path_buf(), raw.clone())?;
761    let bloom_opt = layer.bloom_settings.map(|s| {
762        format!(
763            " bloom({},{},{})",
764            s.hash_version, s.bits_per_entry, s.num_hashes
765        )
766    });
767    let mut options = String::new();
768    if let Some(b) = bloom_opt {
769        options.push_str(&b);
770    }
771    if layer.read_generation_data {
772        options.push_str(" read_generation_data");
773    }
774    Some(ParsedGraphDump {
775        header_word,
776        version: body[4],
777        hash_ver: body[5],
778        num_chunks: body[6],
779        reserved: body[7],
780        num_commits: layer.num_commits,
781        chunks: chunk_names.join(" "),
782        options,
783    })
784}
785
786pub struct ParsedGraphDump {
787    pub header_word: u32,
788    pub version: u8,
789    pub hash_ver: u8,
790    pub num_chunks: u8,
791    pub reserved: u8,
792    pub num_commits: u32,
793    pub chunks: String,
794    pub options: String,
795}
796
797/// Dump hex lines of Bloom filters (one per commit, empty line for empty filter).
798pub fn dump_bloom_filters(path: &Path) -> Option<Vec<String>> {
799    let raw = std::fs::read(path).ok()?;
800    let layer = CommitGraphLayer::parse(path.to_path_buf(), raw)?;
801    let mut out = Vec::new();
802    for i in 0..layer.num_commits {
803        let slice = layer.bloom_filter_slice(i).unwrap_or(&[]);
804        if slice.is_empty() {
805            out.push(String::new());
806        } else {
807            let hex: String = slice.iter().map(|b| format!("{b:02x}")).collect();
808            out.push(hex);
809        }
810    }
811    Some(out)
812}