1use std::collections::{HashMap, HashSet};
4use std::io::Write;
5
6use sha1::{Digest, Sha1};
7use sha2::{Digest as Sha256Digest, Sha256};
8
9use crate::bloom::{BloomBuildOutcome, BloomFilterSettings};
10use crate::commit_graph_file::CommitGraphChain;
11use crate::objects::{parse_commit, HashAlgo, ObjectId, ObjectKind};
12use crate::odb::Odb;
13
14const SIGNATURE: &[u8; 4] = b"CGPH";
15const VERSION: u8 = 1;
16const HASH_LEN: usize = 20;
17
18const CHUNK_OID_FANOUT: u32 = 0x4f49_4446;
19const CHUNK_OID_LOOKUP: u32 = 0x4f49_444c;
20const CHUNK_COMMIT_DATA: u32 = 0x4344_4154;
21const CHUNK_GENERATION_DATA: u32 = 0x4744_4132;
22const CHUNK_GENERATION_DATA_OVERFLOW: u32 = 0x4744_4f32; const CHUNK_EXTRA_EDGES: u32 = 0x4544_4745;
24const CHUNK_BLOOM_INDEXES: u32 = 0x4249_4458;
25const CHUNK_BLOOM_DATA: u32 = 0x4244_4154;
26const CHUNK_BASE: u32 = 0x4241_5345;
27
28const PARENT_NONE: u32 = 0x7000_0000;
29const GRAPH_EXTRA_EDGES_NEEDED: u32 = 0x8000_0000;
30const GRAPH_LAST_EDGE: u32 = 0x8000_0000;
31
32const GENERATION_NUMBER_V2_OFFSET_MAX: u64 = (1u64 << 31) - 1;
34const CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW: u32 = 1u32 << 31;
36
37#[derive(Debug, Clone)]
39pub struct CommitGraphCommitInfo {
40 pub tree: ObjectId,
41 pub parents: Vec<ObjectId>,
42 pub commit_time: u64,
44}
45
46fn sha1_file_body(body: &[u8]) -> [u8; 20] {
47 let mut h = Sha1::new();
48 h.update(body);
49 h.finalize().into()
50}
51
52fn hash_file_body(body: &[u8], algo: HashAlgo) -> Vec<u8> {
54 match algo {
55 HashAlgo::Sha1 => sha1_file_body(body).to_vec(),
56 HashAlgo::Sha256 => {
57 let mut h = Sha256::new();
58 Sha256Digest::update(&mut h, body);
59 h.finalize().to_vec()
60 }
61 }
62}
63
64fn parse_commit_time(committer: &str) -> u64 {
65 let parts: Vec<&str> = committer.rsplitn(3, ' ').collect();
66 if parts.len() >= 2 {
67 parts[1].parse::<u64>().unwrap_or(0)
68 } else {
69 0
70 }
71}
72
73pub fn load_commit_graph_commit_info(
75 odb: &Odb,
76 oid: ObjectId,
77) -> crate::error::Result<CommitGraphCommitInfo> {
78 let obj = odb.read(&oid)?;
79 if obj.kind != ObjectKind::Commit {
80 return Err(crate::error::Error::CorruptObject(format!(
81 "object {oid} is not a commit"
82 )));
83 }
84 let c = parse_commit(&obj.data)?;
85 Ok(CommitGraphCommitInfo {
86 tree: c.tree,
87 parents: c.parents.clone(),
88 commit_time: parse_commit_time(&c.committer),
89 })
90}
91
92fn compute_topo_generations(
93 sorted_oids: &[ObjectId],
94 infos: &HashMap<ObjectId, CommitGraphCommitInfo>,
95 oid_to_idx: &HashMap<ObjectId, u32>,
96) -> Vec<u32> {
97 let n = sorted_oids.len();
98 let mut gen = vec![0u32; n];
99 let mut computed = vec![false; n];
100 for i in 0..n {
101 if computed[i] {
102 continue;
103 }
104 let mut work_stack: Vec<(usize, bool)> = vec![(i, false)];
105 while let Some((idx, parents_done)) = work_stack.pop() {
106 if computed[idx] {
107 continue;
108 }
109 let oid = sorted_oids[idx];
110 let info = &infos[&oid];
111 if parents_done {
112 let mut max_parent_gen = 0u32;
113 for p in &info.parents {
114 if let Some(&pidx) = oid_to_idx.get(p) {
115 max_parent_gen = max_parent_gen.max(gen[pidx as usize]);
116 }
117 }
118 gen[idx] = max_parent_gen + 1;
119 computed[idx] = true;
120 } else {
121 let mut all_done = true;
122 for p in &info.parents {
123 if let Some(&pidx) = oid_to_idx.get(p) {
124 if !computed[pidx as usize] {
125 all_done = false;
126 }
127 }
128 }
129 if all_done {
130 let mut max_parent_gen = 0u32;
131 for p in &info.parents {
132 if let Some(&pidx) = oid_to_idx.get(p) {
133 max_parent_gen = max_parent_gen.max(gen[pidx as usize]);
134 }
135 }
136 gen[idx] = max_parent_gen + 1;
137 computed[idx] = true;
138 } else {
139 work_stack.push((idx, true));
140 for p in &info.parents {
141 if let Some(&pidx) = oid_to_idx.get(p) {
142 if !computed[pidx as usize] {
143 work_stack.push((pidx as usize, false));
144 }
145 }
146 }
147 }
148 }
149 }
150 }
151 gen
152}
153
154fn compute_corrected_generations(
155 sorted_oids: &[ObjectId],
156 infos: &HashMap<ObjectId, CommitGraphCommitInfo>,
157 oid_to_idx: &HashMap<ObjectId, u32>,
158 topo_gen: &[u32],
159) -> Vec<u64> {
160 let n = sorted_oids.len();
161 let mut gen_date = vec![0u64; n];
162 let mut computed = vec![false; n];
163 for i in 0..n {
164 if computed[i] {
165 continue;
166 }
167 let mut work_stack: Vec<(usize, bool)> = vec![(i, false)];
168 while let Some((idx, parents_done)) = work_stack.pop() {
169 if computed[idx] {
170 continue;
171 }
172 let oid = sorted_oids[idx];
173 let info = &infos[&oid];
174 let cdate = info.commit_time;
175 if parents_done {
176 let mut max_g = cdate;
177 for p in &info.parents {
178 if let Some(&pidx) = oid_to_idx.get(p) {
179 max_g = max_g.max(gen_date[pidx as usize]);
180 }
181 }
182 let topo = topo_gen[idx] as u64;
183 if max_g < topo {
184 max_g = topo;
185 }
186 gen_date[idx] = max_g + 1;
187 computed[idx] = true;
188 } else {
189 let mut all_done = true;
190 for p in &info.parents {
191 if let Some(&pidx) = oid_to_idx.get(p) {
192 if !computed[pidx as usize] {
193 all_done = false;
194 }
195 }
196 }
197 if all_done {
198 let mut max_g = cdate;
199 for p in &info.parents {
200 if let Some(&pidx) = oid_to_idx.get(p) {
201 max_g = max_g.max(gen_date[pidx as usize]);
202 }
203 }
204 let topo = topo_gen[idx] as u64;
205 if max_g < topo {
206 max_g = topo;
207 }
208 gen_date[idx] = max_g + 1;
209 computed[idx] = true;
210 } else {
211 work_stack.push((idx, true));
212 for p in &info.parents {
213 if let Some(&pidx) = oid_to_idx.get(p) {
214 if !computed[pidx as usize] {
215 work_stack.push((pidx as usize, false));
216 }
217 }
218 }
219 }
220 }
221 }
222 }
223 gen_date
224}
225
226fn resolve_parent_edge(
227 parent: ObjectId,
228 oid_to_idx: &HashMap<ObjectId, u32>,
229 base_count: u32,
230 chain: Option<&CommitGraphChain>,
231) -> u32 {
232 if let Some(&idx) = oid_to_idx.get(&parent) {
233 return idx + base_count;
234 }
235 if let Some(c) = chain {
236 if let Some(gpos) = c.global_position(&parent) {
237 return gpos;
238 }
239 }
240 PARENT_NONE
241}
242
243#[derive(Debug, Default, Clone, Copy)]
245pub struct BloomWriteStats {
246 pub filter_computed: u32,
247 pub filter_not_computed: u32,
248 pub filter_trunc_empty: u32,
249 pub filter_trunc_large: u32,
250 pub filter_upgraded: u32,
251}
252
253pub fn build_commit_graph_bytes(
255 sorted_oids: &[ObjectId],
256 infos: &HashMap<ObjectId, CommitGraphCommitInfo>,
257 odb: &Odb,
258 changed_paths: bool,
259 bloom_settings: &BloomFilterSettings,
260 base_chain: Option<&CommitGraphChain>,
261 base_graph_hashes: &[[u8; 20]],
262 max_new_filters: Option<u32>,
263 existing_filters: &HashMap<ObjectId, Vec<u8>>,
264 upgraded_filters: &HashMap<ObjectId, Vec<u8>>,
265 write_generation_data: bool,
266) -> crate::error::Result<(Vec<u8>, BloomWriteStats)> {
267 let base_count: u32 = base_chain.map(CommitGraphChain::total_commits).unwrap_or(0);
268
269 let oid_to_idx: HashMap<ObjectId, u32> = sorted_oids
270 .iter()
271 .enumerate()
272 .map(|(i, o)| (*o, i as u32))
273 .collect();
274
275 let topo = compute_topo_generations(sorted_oids, infos, &oid_to_idx);
276 let gen_date = compute_corrected_generations(sorted_oids, infos, &oid_to_idx, &topo);
277
278 let mut gda2: Vec<u8> = Vec::with_capacity(sorted_oids.len() * 4);
279 let mut generation_overflow: Vec<u8> = Vec::new();
280 let mut overflow_count: u32 = 0;
281 for (i, oid) in sorted_oids.iter().enumerate() {
282 let info = &infos[oid];
283 let offset_raw = gen_date[i].saturating_sub(info.commit_time);
284 if offset_raw > GENERATION_NUMBER_V2_OFFSET_MAX {
285 let marker = CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW | overflow_count;
286 overflow_count = overflow_count.wrapping_add(1);
287 gda2.extend_from_slice(&marker.to_be_bytes());
288 generation_overflow.extend_from_slice(&((offset_raw >> 32) as u32).to_be_bytes());
289 generation_overflow.extend_from_slice(&((offset_raw as u32).to_be_bytes()));
290 } else {
291 gda2.extend_from_slice(&(offset_raw as u32).to_be_bytes());
292 }
293 }
294
295 let mut extra_edges: Vec<u8> = Vec::new();
296
297 let mut cdat: Vec<u8> = Vec::with_capacity(sorted_oids.len() * (HASH_LEN + 16));
298 for (i, oid) in sorted_oids.iter().enumerate() {
299 let info = &infos[oid];
300 cdat.extend_from_slice(info.tree.as_bytes());
301
302 let p1 = info
303 .parents
304 .first()
305 .map(|p| resolve_parent_edge(*p, &oid_to_idx, base_count, base_chain))
306 .unwrap_or(PARENT_NONE);
307 cdat.extend_from_slice(&p1.to_be_bytes());
308
309 let p2 = if info.parents.len() <= 1 {
310 PARENT_NONE
311 } else if info.parents.len() == 2 {
312 resolve_parent_edge(info.parents[1], &oid_to_idx, base_count, base_chain)
313 } else {
314 let start_u32 = (extra_edges.len() / 4) as u32;
315 for (j, p) in info.parents.iter().enumerate().skip(1) {
316 let mut ev = resolve_parent_edge(*p, &oid_to_idx, base_count, base_chain);
317 if j + 1 == info.parents.len() {
318 ev |= GRAPH_LAST_EDGE;
319 }
320 extra_edges.extend_from_slice(&ev.to_be_bytes());
321 }
322 GRAPH_EXTRA_EDGES_NEEDED | start_u32
323 };
324 cdat.extend_from_slice(&p2.to_be_bytes());
325
326 let topo = topo[i];
327 let date = info.commit_time;
328 let packed = (topo << 2) | (((date >> 32) & 0x3) as u32);
329 cdat.extend_from_slice(&packed.to_be_bytes());
330 cdat.extend_from_slice(&((date & 0xFFFF_FFFF) as u32).to_be_bytes());
331 }
332
333 let mut fanout = vec![0u8; 256 * 4];
334 let mut counts = [0u32; 256];
335 for oid in sorted_oids {
336 counts[oid.as_bytes()[0] as usize] += 1;
337 }
338 let mut cum = 0u32;
339 for i in 0..256 {
340 cum += counts[i];
341 fanout[i * 4..i * 4 + 4].copy_from_slice(&cum.to_be_bytes());
342 }
343
344 let mut oid_lookup = Vec::with_capacity(sorted_oids.len() * HASH_LEN);
345 for oid in sorted_oids {
346 oid_lookup.extend_from_slice(oid.as_bytes());
347 }
348
349 let mut bloom_stats = BloomWriteStats::default();
350 let max_new = max_new_filters.unwrap_or(u32::MAX);
351 let (bidx, bdat, bloom_total_payload) = if changed_paths {
352 let mut indexes: Vec<u32> = Vec::with_capacity(sorted_oids.len());
353 let mut data_payload = Vec::new();
354 let mut cur = 0u32;
355 for oid in sorted_oids {
356 let info = &infos[oid];
357 if let Some(existing) = existing_filters.get(oid) {
360 bloom_stats.filter_not_computed += 1;
361 cur += existing.len() as u32;
362 indexes.push(cur);
363 data_payload.extend_from_slice(existing);
364 continue;
365 }
366 if let Some(upgraded) = upgraded_filters.get(oid) {
370 bloom_stats.filter_upgraded += 1;
371 cur += upgraded.len() as u32;
372 indexes.push(cur);
373 data_payload.extend_from_slice(upgraded);
374 continue;
375 }
376 let compute = bloom_stats.filter_computed < max_new;
377 let (bytes, outcome) = if compute {
378 crate::commit_graph_file::bloom_filter_for_commit_write(
379 odb,
380 &info.parents,
381 info.tree,
382 bloom_settings,
383 )?
384 } else {
385 (Vec::new(), BloomBuildOutcome::Normal)
386 };
387 if compute {
388 bloom_stats.filter_computed += 1;
389 match outcome {
390 BloomBuildOutcome::Normal => {}
391 BloomBuildOutcome::TruncatedLarge => bloom_stats.filter_trunc_large += 1,
392 BloomBuildOutcome::TruncatedEmpty => bloom_stats.filter_trunc_empty += 1,
393 }
394 } else {
395 bloom_stats.filter_not_computed += 1;
396 }
397 cur += bytes.len() as u32;
398 indexes.push(cur);
399 data_payload.extend_from_slice(&bytes);
400 }
401 let mut bdat_chunk = Vec::with_capacity(12 + data_payload.len());
402 bdat_chunk.extend_from_slice(&bloom_settings.hash_version.to_be_bytes());
403 bdat_chunk.extend_from_slice(&bloom_settings.num_hashes.to_be_bytes());
404 bdat_chunk.extend_from_slice(&bloom_settings.bits_per_entry.to_be_bytes());
405 bdat_chunk.extend_from_slice(&data_payload);
406 let mut bidx_bytes = Vec::with_capacity(indexes.len() * 4);
407 for v in indexes {
408 bidx_bytes.extend_from_slice(&v.to_be_bytes());
409 }
410 (bidx_bytes, bdat_chunk, data_payload.len())
411 } else {
412 (Vec::new(), Vec::new(), 0)
413 };
414
415 let _ = bloom_total_payload;
416
417 let mut chunks: Vec<(u32, Vec<u8>)> = Vec::new();
418 chunks.push((CHUNK_OID_FANOUT, fanout));
419 chunks.push((CHUNK_OID_LOOKUP, oid_lookup));
420 chunks.push((CHUNK_COMMIT_DATA, cdat));
421 if write_generation_data {
422 chunks.push((CHUNK_GENERATION_DATA, gda2));
423 if !generation_overflow.is_empty() {
424 chunks.push((CHUNK_GENERATION_DATA_OVERFLOW, generation_overflow));
425 }
426 }
427 if !extra_edges.is_empty() {
428 chunks.push((CHUNK_EXTRA_EDGES, extra_edges));
429 }
430 if changed_paths {
431 chunks.push((CHUNK_BLOOM_INDEXES, bidx));
432 chunks.push((CHUNK_BLOOM_DATA, bdat));
433 }
434 if !base_graph_hashes.is_empty() {
435 let mut base_chunk = Vec::new();
436 for h in base_graph_hashes {
437 base_chunk.extend_from_slice(h);
438 }
439 chunks.push((CHUNK_BASE, base_chunk));
440 }
441
442 let num_chunks = chunks.len() as u8;
443 let header_size = 8u64;
444 let toc_size = (num_chunks as u64 + 1) * 12;
445 let mut offsets = Vec::with_capacity(chunks.len());
446 let mut cur = header_size + toc_size;
447 for (_, data) in &chunks {
448 offsets.push(cur);
449 cur += data.len() as u64;
450 }
451 let end_offset = cur;
452
453 let algo = odb.hash_algo();
456 let mut out = Vec::with_capacity(end_offset as usize + algo.len());
457 out.write_all(SIGNATURE)?;
458 let base_layers = base_graph_hashes.len() as u8;
459 out.write_all(&[VERSION, algo.oid_version(), num_chunks, base_layers])?;
460 for i in 0..chunks.len() {
461 out.write_all(&chunks[i].0.to_be_bytes())?;
462 out.write_all(&offsets[i].to_be_bytes())?;
463 }
464 out.write_all(&[0u8; 4])?;
465 out.write_all(&end_offset.to_be_bytes())?;
466 for (_, data) in &chunks {
467 out.write_all(data)?;
468 }
469
470 let checksum = hash_file_body(&out, algo);
471 out.write_all(&checksum)?;
472 Ok((out, bloom_stats))
473}
474
475pub fn collect_reachable_commit_oids(
477 git_dir: &std::path::Path,
478 odb: &Odb,
479) -> crate::error::Result<HashSet<ObjectId>> {
480 use std::fs;
481 let mut commits: HashSet<ObjectId> = HashSet::new();
482 let mut stack: Vec<ObjectId> = Vec::new();
483
484 fn collect_ref_tips(
485 git_dir: &std::path::Path,
486 dir: &std::path::Path,
487 stack: &mut Vec<ObjectId>,
488 ) -> crate::error::Result<()> {
489 if !dir.exists() {
490 return Ok(());
491 }
492 for entry in fs::read_dir(dir)? {
493 let entry = entry?;
494 let path = entry.path();
495 if path.is_dir() {
496 collect_ref_tips(git_dir, &path, stack)?;
497 } else if let Ok(content) = fs::read_to_string(&path) {
498 if let Ok(oid) = ObjectId::from_hex(content.trim()) {
499 stack.push(oid);
500 }
501 }
502 }
503 Ok(())
504 }
505
506 let refs_dir = git_dir.join("refs");
507 collect_ref_tips(git_dir, &refs_dir, &mut stack)?;
508
509 let packed_refs = git_dir.join("packed-refs");
510 if packed_refs.exists() {
511 if let Ok(content) = fs::read_to_string(&packed_refs) {
512 for line in content.lines() {
513 if line.starts_with('#') || line.starts_with('^') {
514 continue;
515 }
516 if let Some(hex) = line.split_whitespace().next() {
517 if let Ok(oid) = ObjectId::from_hex(hex) {
518 stack.push(oid);
519 }
520 }
521 }
522 }
523 }
524
525 let head_path = git_dir.join("HEAD");
526 if head_path.exists() {
527 let head = fs::read_to_string(&head_path)?;
528 let head = head.trim();
529 if let Some(refpath) = head.strip_prefix("ref: ") {
530 let full = git_dir.join(refpath);
531 if full.exists() {
532 if let Ok(content) = fs::read_to_string(&full) {
533 if let Ok(oid) = ObjectId::from_hex(content.trim()) {
534 stack.push(oid);
535 }
536 }
537 }
538 } else if let Ok(oid) = ObjectId::from_hex(head) {
539 stack.push(oid);
540 }
541 }
542
543 while let Some(oid) = stack.pop() {
544 if commits.contains(&oid) {
545 continue;
546 }
547 let obj = match odb.read(&oid) {
548 Ok(o) => o,
549 Err(_) => continue,
550 };
551 if obj.kind != ObjectKind::Commit {
552 if obj.kind == ObjectKind::Tag {
553 if let Ok(text) = std::str::from_utf8(&obj.data) {
554 for line in text.lines() {
555 if let Some(rest) = line.strip_prefix("object ") {
556 if let Ok(target) = ObjectId::from_hex(rest.trim()) {
557 stack.push(target);
558 }
559 }
560 }
561 }
562 }
563 continue;
564 }
565 let commit = parse_commit(&obj.data)?;
566 for parent in &commit.parents {
567 stack.push(*parent);
568 }
569 commits.insert(oid);
570 }
571
572 Ok(commits)
573}
574
575pub fn count_referenced_commit_tips(
579 git_dir: &std::path::Path,
580 odb: &Odb,
581) -> crate::error::Result<usize> {
582 use std::fs;
583 let mut tips: Vec<ObjectId> = Vec::new();
584
585 fn collect_ref_tips(
586 dir: &std::path::Path,
587 tips: &mut Vec<ObjectId>,
588 ) -> crate::error::Result<()> {
589 if !dir.exists() {
590 return Ok(());
591 }
592 for entry in fs::read_dir(dir)? {
593 let entry = entry?;
594 let path = entry.path();
595 if path.is_dir() {
596 collect_ref_tips(&path, tips)?;
597 } else if let Ok(content) = fs::read_to_string(&path) {
598 if let Ok(oid) = ObjectId::from_hex(content.trim()) {
599 tips.push(oid);
600 }
601 }
602 }
603 Ok(())
604 }
605
606 collect_ref_tips(&git_dir.join("refs"), &mut tips)?;
607
608 let packed_refs = git_dir.join("packed-refs");
609 if packed_refs.exists() {
610 if let Ok(content) = fs::read_to_string(&packed_refs) {
611 for line in content.lines() {
612 if line.starts_with('#') || line.starts_with('^') {
613 continue;
614 }
615 if let Some(hex) = line.split_whitespace().next() {
616 if let Ok(oid) = ObjectId::from_hex(hex) {
617 tips.push(oid);
618 }
619 }
620 }
621 }
622 }
623
624 let mut commits: HashSet<ObjectId> = HashSet::new();
628 for tip in tips {
629 if let Some(commit_oid) = peel_to_commit(odb, tip) {
630 commits.insert(commit_oid);
631 }
632 }
633 Ok(commits.len())
634}
635
636fn peel_to_commit(odb: &Odb, oid: ObjectId) -> Option<ObjectId> {
639 let mut current = oid;
640 for _ in 0..16 {
641 let obj = odb.read(¤t).ok()?;
642 match obj.kind {
643 ObjectKind::Commit => return Some(current),
644 ObjectKind::Tag => {
645 let text = std::str::from_utf8(&obj.data).ok()?;
646 let target = text
647 .lines()
648 .find_map(|line| line.strip_prefix("object "))
649 .and_then(|rest| ObjectId::from_hex(rest.trim()).ok())?;
650 current = target;
651 }
652 _ => return None,
653 }
654 }
655 None
656}