1use std::collections::{HashMap, HashSet};
4use std::io::Write;
5
6use sha1::{Digest, Sha1};
7
8use crate::bloom::{BloomBuildOutcome, BloomFilterSettings};
9use crate::commit_graph_file::CommitGraphChain;
10use crate::objects::{parse_commit, ObjectId, ObjectKind};
11use crate::odb::Odb;
12
13const SIGNATURE: &[u8; 4] = b"CGPH";
14const VERSION: u8 = 1;
15const HASH_VERSION_SHA1: u8 = 1;
16const HASH_LEN: usize = 20;
17
18const CHUNK_OID_FANOUT: u32 = 0x4f49_4446;
19const CHUNK_OID_LOOKUP: u32 = 0x4f49_444c;
20const CHUNK_COMMIT_DATA: u32 = 0x4344_4154;
21const CHUNK_GENERATION_DATA: u32 = 0x4744_4132;
22const CHUNK_GENERATION_DATA_OVERFLOW: u32 = 0x4744_4f32; const CHUNK_EXTRA_EDGES: u32 = 0x4544_4745;
24const CHUNK_BLOOM_INDEXES: u32 = 0x4249_4458;
25const CHUNK_BLOOM_DATA: u32 = 0x4244_4154;
26const CHUNK_BASE: u32 = 0x4241_5345;
27
28const PARENT_NONE: u32 = 0x7000_0000;
29const GRAPH_EXTRA_EDGES_NEEDED: u32 = 0x8000_0000;
30const GRAPH_LAST_EDGE: u32 = 0x8000_0000;
31
32const GENERATION_NUMBER_V2_OFFSET_MAX: u64 = (1u64 << 31) - 1;
34const CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW: u32 = 1u32 << 31;
36
37#[derive(Debug, Clone)]
39pub struct CommitGraphCommitInfo {
40 pub tree: ObjectId,
41 pub parents: Vec<ObjectId>,
42 pub commit_time: u64,
44}
45
46fn sha1_file_body(body: &[u8]) -> [u8; 20] {
47 let mut h = Sha1::new();
48 h.update(body);
49 h.finalize().into()
50}
51
52fn parse_commit_time(committer: &str) -> u64 {
53 let parts: Vec<&str> = committer.rsplitn(3, ' ').collect();
54 if parts.len() >= 2 {
55 parts[1].parse::<u64>().unwrap_or(0)
56 } else {
57 0
58 }
59}
60
61pub fn load_commit_graph_commit_info(
63 odb: &Odb,
64 oid: ObjectId,
65) -> crate::error::Result<CommitGraphCommitInfo> {
66 let obj = odb.read(&oid)?;
67 if obj.kind != ObjectKind::Commit {
68 return Err(crate::error::Error::CorruptObject(format!(
69 "object {oid} is not a commit"
70 )));
71 }
72 let c = parse_commit(&obj.data)?;
73 Ok(CommitGraphCommitInfo {
74 tree: c.tree,
75 parents: c.parents.clone(),
76 commit_time: parse_commit_time(&c.committer),
77 })
78}
79
80fn compute_topo_generations(
81 sorted_oids: &[ObjectId],
82 infos: &HashMap<ObjectId, CommitGraphCommitInfo>,
83 oid_to_idx: &HashMap<ObjectId, u32>,
84) -> Vec<u32> {
85 let n = sorted_oids.len();
86 let mut gen = vec![0u32; n];
87 let mut computed = vec![false; n];
88 for i in 0..n {
89 if computed[i] {
90 continue;
91 }
92 let mut work_stack: Vec<(usize, bool)> = vec![(i, false)];
93 while let Some((idx, parents_done)) = work_stack.pop() {
94 if computed[idx] {
95 continue;
96 }
97 let oid = sorted_oids[idx];
98 let info = &infos[&oid];
99 if parents_done {
100 let mut max_parent_gen = 0u32;
101 for p in &info.parents {
102 if let Some(&pidx) = oid_to_idx.get(p) {
103 max_parent_gen = max_parent_gen.max(gen[pidx as usize]);
104 }
105 }
106 gen[idx] = max_parent_gen + 1;
107 computed[idx] = true;
108 } else {
109 let mut all_done = true;
110 for p in &info.parents {
111 if let Some(&pidx) = oid_to_idx.get(p) {
112 if !computed[pidx as usize] {
113 all_done = false;
114 }
115 }
116 }
117 if all_done {
118 let mut max_parent_gen = 0u32;
119 for p in &info.parents {
120 if let Some(&pidx) = oid_to_idx.get(p) {
121 max_parent_gen = max_parent_gen.max(gen[pidx as usize]);
122 }
123 }
124 gen[idx] = max_parent_gen + 1;
125 computed[idx] = true;
126 } else {
127 work_stack.push((idx, true));
128 for p in &info.parents {
129 if let Some(&pidx) = oid_to_idx.get(p) {
130 if !computed[pidx as usize] {
131 work_stack.push((pidx as usize, false));
132 }
133 }
134 }
135 }
136 }
137 }
138 }
139 gen
140}
141
142fn compute_corrected_generations(
143 sorted_oids: &[ObjectId],
144 infos: &HashMap<ObjectId, CommitGraphCommitInfo>,
145 oid_to_idx: &HashMap<ObjectId, u32>,
146 topo_gen: &[u32],
147) -> Vec<u64> {
148 let n = sorted_oids.len();
149 let mut gen_date = vec![0u64; n];
150 let mut computed = vec![false; n];
151 for i in 0..n {
152 if computed[i] {
153 continue;
154 }
155 let mut work_stack: Vec<(usize, bool)> = vec![(i, false)];
156 while let Some((idx, parents_done)) = work_stack.pop() {
157 if computed[idx] {
158 continue;
159 }
160 let oid = sorted_oids[idx];
161 let info = &infos[&oid];
162 let cdate = info.commit_time;
163 if parents_done {
164 let mut max_g = cdate;
165 for p in &info.parents {
166 if let Some(&pidx) = oid_to_idx.get(p) {
167 max_g = max_g.max(gen_date[pidx as usize]);
168 }
169 }
170 let topo = topo_gen[idx] as u64;
171 if max_g < topo {
172 max_g = topo;
173 }
174 gen_date[idx] = max_g + 1;
175 computed[idx] = true;
176 } else {
177 let mut all_done = true;
178 for p in &info.parents {
179 if let Some(&pidx) = oid_to_idx.get(p) {
180 if !computed[pidx as usize] {
181 all_done = false;
182 }
183 }
184 }
185 if all_done {
186 let mut max_g = cdate;
187 for p in &info.parents {
188 if let Some(&pidx) = oid_to_idx.get(p) {
189 max_g = max_g.max(gen_date[pidx as usize]);
190 }
191 }
192 let topo = topo_gen[idx] as u64;
193 if max_g < topo {
194 max_g = topo;
195 }
196 gen_date[idx] = max_g + 1;
197 computed[idx] = true;
198 } else {
199 work_stack.push((idx, true));
200 for p in &info.parents {
201 if let Some(&pidx) = oid_to_idx.get(p) {
202 if !computed[pidx as usize] {
203 work_stack.push((pidx as usize, false));
204 }
205 }
206 }
207 }
208 }
209 }
210 }
211 gen_date
212}
213
214fn resolve_parent_edge(
215 parent: ObjectId,
216 oid_to_idx: &HashMap<ObjectId, u32>,
217 base_count: u32,
218 chain: Option<&CommitGraphChain>,
219) -> u32 {
220 if let Some(&idx) = oid_to_idx.get(&parent) {
221 return idx + base_count;
222 }
223 if let Some(c) = chain {
224 if let Some(gpos) = c.global_position(&parent) {
225 return gpos;
226 }
227 }
228 PARENT_NONE
229}
230
231#[derive(Debug, Default, Clone, Copy)]
233pub struct BloomWriteStats {
234 pub filter_computed: u32,
235 pub filter_not_computed: u32,
236 pub filter_trunc_empty: u32,
237 pub filter_trunc_large: u32,
238 pub filter_upgraded: u32,
239}
240
241pub fn build_commit_graph_bytes(
243 sorted_oids: &[ObjectId],
244 infos: &HashMap<ObjectId, CommitGraphCommitInfo>,
245 odb: &Odb,
246 changed_paths: bool,
247 bloom_settings: &BloomFilterSettings,
248 base_chain: Option<&CommitGraphChain>,
249 base_graph_hashes: &[[u8; 20]],
250 max_new_filters: Option<u32>,
251 existing_filters: &HashMap<ObjectId, Vec<u8>>,
252 upgraded_filters: &HashMap<ObjectId, Vec<u8>>,
253 write_generation_data: bool,
254) -> crate::error::Result<(Vec<u8>, BloomWriteStats)> {
255 let base_count: u32 = base_chain.map(CommitGraphChain::total_commits).unwrap_or(0);
256
257 let oid_to_idx: HashMap<ObjectId, u32> = sorted_oids
258 .iter()
259 .enumerate()
260 .map(|(i, o)| (*o, i as u32))
261 .collect();
262
263 let topo = compute_topo_generations(sorted_oids, infos, &oid_to_idx);
264 let gen_date = compute_corrected_generations(sorted_oids, infos, &oid_to_idx, &topo);
265
266 let mut gda2: Vec<u8> = Vec::with_capacity(sorted_oids.len() * 4);
267 let mut generation_overflow: Vec<u8> = Vec::new();
268 let mut overflow_count: u32 = 0;
269 for (i, oid) in sorted_oids.iter().enumerate() {
270 let info = &infos[oid];
271 let offset_raw = gen_date[i].saturating_sub(info.commit_time);
272 if offset_raw > GENERATION_NUMBER_V2_OFFSET_MAX {
273 let marker = CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW | overflow_count;
274 overflow_count = overflow_count.wrapping_add(1);
275 gda2.extend_from_slice(&marker.to_be_bytes());
276 generation_overflow.extend_from_slice(&((offset_raw >> 32) as u32).to_be_bytes());
277 generation_overflow.extend_from_slice(&((offset_raw as u32).to_be_bytes()));
278 } else {
279 gda2.extend_from_slice(&(offset_raw as u32).to_be_bytes());
280 }
281 }
282
283 let mut extra_edges: Vec<u8> = Vec::new();
284
285 let mut cdat: Vec<u8> = Vec::with_capacity(sorted_oids.len() * (HASH_LEN + 16));
286 for (i, oid) in sorted_oids.iter().enumerate() {
287 let info = &infos[oid];
288 cdat.extend_from_slice(info.tree.as_bytes());
289
290 let p1 = info
291 .parents
292 .first()
293 .map(|p| resolve_parent_edge(*p, &oid_to_idx, base_count, base_chain))
294 .unwrap_or(PARENT_NONE);
295 cdat.extend_from_slice(&p1.to_be_bytes());
296
297 let p2 = if info.parents.len() <= 1 {
298 PARENT_NONE
299 } else if info.parents.len() == 2 {
300 resolve_parent_edge(info.parents[1], &oid_to_idx, base_count, base_chain)
301 } else {
302 let start_u32 = (extra_edges.len() / 4) as u32;
303 for (j, p) in info.parents.iter().enumerate().skip(1) {
304 let mut ev = resolve_parent_edge(*p, &oid_to_idx, base_count, base_chain);
305 if j + 1 == info.parents.len() {
306 ev |= GRAPH_LAST_EDGE;
307 }
308 extra_edges.extend_from_slice(&ev.to_be_bytes());
309 }
310 GRAPH_EXTRA_EDGES_NEEDED | start_u32
311 };
312 cdat.extend_from_slice(&p2.to_be_bytes());
313
314 let topo = topo[i];
315 let date = info.commit_time;
316 let packed = (topo << 2) | (((date >> 32) & 0x3) as u32);
317 cdat.extend_from_slice(&packed.to_be_bytes());
318 cdat.extend_from_slice(&((date & 0xFFFF_FFFF) as u32).to_be_bytes());
319 }
320
321 let mut fanout = vec![0u8; 256 * 4];
322 let mut counts = [0u32; 256];
323 for oid in sorted_oids {
324 counts[oid.as_bytes()[0] as usize] += 1;
325 }
326 let mut cum = 0u32;
327 for i in 0..256 {
328 cum += counts[i];
329 fanout[i * 4..i * 4 + 4].copy_from_slice(&cum.to_be_bytes());
330 }
331
332 let mut oid_lookup = Vec::with_capacity(sorted_oids.len() * HASH_LEN);
333 for oid in sorted_oids {
334 oid_lookup.extend_from_slice(oid.as_bytes());
335 }
336
337 let mut bloom_stats = BloomWriteStats::default();
338 let max_new = max_new_filters.unwrap_or(u32::MAX);
339 let (bidx, bdat, bloom_total_payload) = if changed_paths {
340 let mut indexes: Vec<u32> = Vec::with_capacity(sorted_oids.len());
341 let mut data_payload = Vec::new();
342 let mut cur = 0u32;
343 for oid in sorted_oids {
344 let info = &infos[oid];
345 if let Some(existing) = existing_filters.get(oid) {
348 bloom_stats.filter_not_computed += 1;
349 cur += existing.len() as u32;
350 indexes.push(cur);
351 data_payload.extend_from_slice(existing);
352 continue;
353 }
354 if let Some(upgraded) = upgraded_filters.get(oid) {
358 bloom_stats.filter_upgraded += 1;
359 cur += upgraded.len() as u32;
360 indexes.push(cur);
361 data_payload.extend_from_slice(upgraded);
362 continue;
363 }
364 let compute = bloom_stats.filter_computed < max_new;
365 let (bytes, outcome) = if compute {
366 crate::commit_graph_file::bloom_filter_for_commit_write(
367 odb,
368 &info.parents,
369 info.tree,
370 bloom_settings,
371 )?
372 } else {
373 (Vec::new(), BloomBuildOutcome::Normal)
374 };
375 if compute {
376 bloom_stats.filter_computed += 1;
377 match outcome {
378 BloomBuildOutcome::Normal => {}
379 BloomBuildOutcome::TruncatedLarge => bloom_stats.filter_trunc_large += 1,
380 BloomBuildOutcome::TruncatedEmpty => bloom_stats.filter_trunc_empty += 1,
381 }
382 } else {
383 bloom_stats.filter_not_computed += 1;
384 }
385 cur += bytes.len() as u32;
386 indexes.push(cur);
387 data_payload.extend_from_slice(&bytes);
388 }
389 let mut bdat_chunk = Vec::with_capacity(12 + data_payload.len());
390 bdat_chunk.extend_from_slice(&bloom_settings.hash_version.to_be_bytes());
391 bdat_chunk.extend_from_slice(&bloom_settings.num_hashes.to_be_bytes());
392 bdat_chunk.extend_from_slice(&bloom_settings.bits_per_entry.to_be_bytes());
393 bdat_chunk.extend_from_slice(&data_payload);
394 let mut bidx_bytes = Vec::with_capacity(indexes.len() * 4);
395 for v in indexes {
396 bidx_bytes.extend_from_slice(&v.to_be_bytes());
397 }
398 (bidx_bytes, bdat_chunk, data_payload.len())
399 } else {
400 (Vec::new(), Vec::new(), 0)
401 };
402
403 let _ = bloom_total_payload;
404
405 let mut chunks: Vec<(u32, Vec<u8>)> = Vec::new();
406 chunks.push((CHUNK_OID_FANOUT, fanout));
407 chunks.push((CHUNK_OID_LOOKUP, oid_lookup));
408 chunks.push((CHUNK_COMMIT_DATA, cdat));
409 if write_generation_data {
410 chunks.push((CHUNK_GENERATION_DATA, gda2));
411 if !generation_overflow.is_empty() {
412 chunks.push((CHUNK_GENERATION_DATA_OVERFLOW, generation_overflow));
413 }
414 }
415 if !extra_edges.is_empty() {
416 chunks.push((CHUNK_EXTRA_EDGES, extra_edges));
417 }
418 if changed_paths {
419 chunks.push((CHUNK_BLOOM_INDEXES, bidx));
420 chunks.push((CHUNK_BLOOM_DATA, bdat));
421 }
422 if !base_graph_hashes.is_empty() {
423 let mut base_chunk = Vec::new();
424 for h in base_graph_hashes {
425 base_chunk.extend_from_slice(h);
426 }
427 chunks.push((CHUNK_BASE, base_chunk));
428 }
429
430 let num_chunks = chunks.len() as u8;
431 let header_size = 8u64;
432 let toc_size = (num_chunks as u64 + 1) * 12;
433 let mut offsets = Vec::with_capacity(chunks.len());
434 let mut cur = header_size + toc_size;
435 for (_, data) in &chunks {
436 offsets.push(cur);
437 cur += data.len() as u64;
438 }
439 let end_offset = cur;
440
441 let mut out = Vec::with_capacity(end_offset as usize + HASH_LEN);
442 out.write_all(SIGNATURE)?;
443 let base_layers = base_graph_hashes.len() as u8;
444 out.write_all(&[VERSION, HASH_VERSION_SHA1, num_chunks, base_layers])?;
445 for i in 0..chunks.len() {
446 out.write_all(&chunks[i].0.to_be_bytes())?;
447 out.write_all(&offsets[i].to_be_bytes())?;
448 }
449 out.write_all(&[0u8; 4])?;
450 out.write_all(&end_offset.to_be_bytes())?;
451 for (_, data) in &chunks {
452 out.write_all(data)?;
453 }
454
455 let checksum = sha1_file_body(&out);
456 out.write_all(&checksum)?;
457 Ok((out, bloom_stats))
458}
459
460pub fn collect_reachable_commit_oids(
462 git_dir: &std::path::Path,
463 odb: &Odb,
464) -> crate::error::Result<HashSet<ObjectId>> {
465 use std::fs;
466 let mut commits: HashSet<ObjectId> = HashSet::new();
467 let mut stack: Vec<ObjectId> = Vec::new();
468
469 fn collect_ref_tips(
470 git_dir: &std::path::Path,
471 dir: &std::path::Path,
472 stack: &mut Vec<ObjectId>,
473 ) -> crate::error::Result<()> {
474 if !dir.exists() {
475 return Ok(());
476 }
477 for entry in fs::read_dir(dir)? {
478 let entry = entry?;
479 let path = entry.path();
480 if path.is_dir() {
481 collect_ref_tips(git_dir, &path, stack)?;
482 } else if let Ok(content) = fs::read_to_string(&path) {
483 if let Ok(oid) = ObjectId::from_hex(content.trim()) {
484 stack.push(oid);
485 }
486 }
487 }
488 Ok(())
489 }
490
491 let refs_dir = git_dir.join("refs");
492 collect_ref_tips(git_dir, &refs_dir, &mut stack)?;
493
494 let packed_refs = git_dir.join("packed-refs");
495 if packed_refs.exists() {
496 if let Ok(content) = fs::read_to_string(&packed_refs) {
497 for line in content.lines() {
498 if line.starts_with('#') || line.starts_with('^') {
499 continue;
500 }
501 if let Some(hex) = line.split_whitespace().next() {
502 if let Ok(oid) = ObjectId::from_hex(hex) {
503 stack.push(oid);
504 }
505 }
506 }
507 }
508 }
509
510 let head_path = git_dir.join("HEAD");
511 if head_path.exists() {
512 let head = fs::read_to_string(&head_path)?;
513 let head = head.trim();
514 if let Some(refpath) = head.strip_prefix("ref: ") {
515 let full = git_dir.join(refpath);
516 if full.exists() {
517 if let Ok(content) = fs::read_to_string(&full) {
518 if let Ok(oid) = ObjectId::from_hex(content.trim()) {
519 stack.push(oid);
520 }
521 }
522 }
523 } else if let Ok(oid) = ObjectId::from_hex(head) {
524 stack.push(oid);
525 }
526 }
527
528 while let Some(oid) = stack.pop() {
529 if commits.contains(&oid) {
530 continue;
531 }
532 let obj = match odb.read(&oid) {
533 Ok(o) => o,
534 Err(_) => continue,
535 };
536 if obj.kind != ObjectKind::Commit {
537 if obj.kind == ObjectKind::Tag {
538 if let Ok(text) = std::str::from_utf8(&obj.data) {
539 for line in text.lines() {
540 if let Some(rest) = line.strip_prefix("object ") {
541 if let Ok(target) = ObjectId::from_hex(rest.trim()) {
542 stack.push(target);
543 }
544 }
545 }
546 }
547 }
548 continue;
549 }
550 let commit = parse_commit(&obj.data)?;
551 for parent in &commit.parents {
552 stack.push(*parent);
553 }
554 commits.insert(oid);
555 }
556
557 Ok(commits)
558}
559
560pub fn count_referenced_commit_tips(
564 git_dir: &std::path::Path,
565 odb: &Odb,
566) -> crate::error::Result<usize> {
567 use std::fs;
568 let mut tips: Vec<ObjectId> = Vec::new();
569
570 fn collect_ref_tips(
571 dir: &std::path::Path,
572 tips: &mut Vec<ObjectId>,
573 ) -> crate::error::Result<()> {
574 if !dir.exists() {
575 return Ok(());
576 }
577 for entry in fs::read_dir(dir)? {
578 let entry = entry?;
579 let path = entry.path();
580 if path.is_dir() {
581 collect_ref_tips(&path, tips)?;
582 } else if let Ok(content) = fs::read_to_string(&path) {
583 if let Ok(oid) = ObjectId::from_hex(content.trim()) {
584 tips.push(oid);
585 }
586 }
587 }
588 Ok(())
589 }
590
591 collect_ref_tips(&git_dir.join("refs"), &mut tips)?;
592
593 let packed_refs = git_dir.join("packed-refs");
594 if packed_refs.exists() {
595 if let Ok(content) = fs::read_to_string(&packed_refs) {
596 for line in content.lines() {
597 if line.starts_with('#') || line.starts_with('^') {
598 continue;
599 }
600 if let Some(hex) = line.split_whitespace().next() {
601 if let Ok(oid) = ObjectId::from_hex(hex) {
602 tips.push(oid);
603 }
604 }
605 }
606 }
607 }
608
609 let mut commits: HashSet<ObjectId> = HashSet::new();
613 for tip in tips {
614 if let Some(commit_oid) = peel_to_commit(odb, tip) {
615 commits.insert(commit_oid);
616 }
617 }
618 Ok(commits.len())
619}
620
621fn peel_to_commit(odb: &Odb, oid: ObjectId) -> Option<ObjectId> {
624 let mut current = oid;
625 for _ in 0..16 {
626 let obj = odb.read(¤t).ok()?;
627 match obj.kind {
628 ObjectKind::Commit => return Some(current),
629 ObjectKind::Tag => {
630 let text = std::str::from_utf8(&obj.data).ok()?;
631 let target = text
632 .lines()
633 .find_map(|line| line.strip_prefix("object "))
634 .and_then(|rest| ObjectId::from_hex(rest.trim()).ok())?;
635 current = target;
636 }
637 _ => return None,
638 }
639 }
640 None
641}