1use std::cmp::max;
16use std::collections::BTreeMap;
17use std::collections::HashMap;
18use std::fmt;
19use std::fmt::Debug;
20use std::io::Write as _;
21use std::iter;
22use std::ops::Bound;
23use std::path::Path;
24use std::sync::Arc;
25
26use blake2::Blake2b512;
27use digest::Digest as _;
28use itertools::Itertools as _;
29use pollster::FutureExt as _;
30use smallvec::SmallVec;
31use smallvec::smallvec;
32use tempfile::NamedTempFile;
33
34use super::changed_path::CompositeChangedPathIndex;
35use super::changed_path::collect_changed_paths;
36use super::composite::AsCompositeIndex;
37use super::composite::ChangeIdIndexImpl;
38use super::composite::CommitIndexSegment;
39use super::composite::CommitIndexSegmentId;
40use super::composite::CompositeCommitIndex;
41use super::composite::CompositeIndex;
42use super::composite::DynCommitIndexSegment;
43use super::entry::GlobalCommitPosition;
44use super::entry::LocalCommitPosition;
45use super::entry::SmallGlobalCommitPositionsVec;
46use super::entry::SmallLocalCommitPositionsVec;
47use super::readonly::COMMIT_INDEX_SEGMENT_FILE_FORMAT_VERSION;
48use super::readonly::DefaultReadonlyIndex;
49use super::readonly::FieldLengths;
50use super::readonly::OVERFLOW_FLAG;
51use super::readonly::ReadonlyCommitIndexSegment;
52use crate::backend::BackendResult;
53use crate::backend::ChangeId;
54use crate::backend::CommitId;
55use crate::commit::Commit;
56use crate::file_util::IoResultExt as _;
57use crate::file_util::PathError;
58use crate::file_util::persist_content_addressed_temp_file;
59use crate::index::ChangeIdIndex;
60use crate::index::Index;
61use crate::index::IndexError;
62use crate::index::IndexResult;
63use crate::index::MutableIndex;
64use crate::index::ReadonlyIndex;
65use crate::object_id::HexPrefix;
66use crate::object_id::ObjectId;
67use crate::object_id::PrefixResolution;
68use crate::repo_path::RepoPathBuf;
69use crate::revset::ResolvedExpression;
70use crate::revset::Revset;
71use crate::revset::RevsetEvaluationError;
72use crate::store::Store;
73
74#[derive(Clone, Debug)]
75struct MutableGraphEntry {
76 commit_id: CommitId,
77 change_id: ChangeId,
78 generation_number: u32,
79 parent_positions: SmallGlobalCommitPositionsVec,
80}
81
82#[derive(Clone)]
83pub(super) struct MutableCommitIndexSegment {
84 parent_file: Option<Arc<ReadonlyCommitIndexSegment>>,
85 num_parent_commits: u32,
86 field_lengths: FieldLengths,
87 graph: Vec<MutableGraphEntry>,
88 commit_lookup: BTreeMap<CommitId, LocalCommitPosition>,
89 change_lookup: BTreeMap<ChangeId, SmallLocalCommitPositionsVec>,
90}
91
92impl Debug for MutableCommitIndexSegment {
93 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
94 f.debug_struct("MutableCommitIndexSegment")
95 .field("parent_file", &self.parent_file)
96 .finish_non_exhaustive()
97 }
98}
99
100impl MutableCommitIndexSegment {
101 pub(super) fn full(field_lengths: FieldLengths) -> Self {
102 Self {
103 parent_file: None,
104 num_parent_commits: 0,
105 field_lengths,
106 graph: vec![],
107 commit_lookup: BTreeMap::new(),
108 change_lookup: BTreeMap::new(),
109 }
110 }
111
112 pub(super) fn incremental(parent_file: Arc<ReadonlyCommitIndexSegment>) -> Self {
113 let num_parent_commits = parent_file.as_composite().num_commits();
114 let field_lengths = parent_file.field_lengths();
115 Self {
116 parent_file: Some(parent_file),
117 num_parent_commits,
118 field_lengths,
119 graph: vec![],
120 commit_lookup: BTreeMap::new(),
121 change_lookup: BTreeMap::new(),
122 }
123 }
124
125 pub(super) fn as_composite(&self) -> &CompositeCommitIndex {
126 CompositeCommitIndex::new(self)
127 }
128
129 pub(super) fn add_commit_data(
130 &mut self,
131 commit_id: CommitId,
132 change_id: ChangeId,
133 parent_ids: &[CommitId],
134 ) {
135 if self.as_composite().has_id(&commit_id) {
136 return;
137 }
138 let mut entry = MutableGraphEntry {
139 commit_id,
140 change_id,
141 generation_number: 0,
142 parent_positions: SmallVec::new(),
143 };
144 for parent_id in parent_ids {
145 let parent_entry = self
146 .as_composite()
147 .entry_by_id(parent_id)
148 .expect("parent commit is not indexed");
149 entry.generation_number = max(
150 entry.generation_number,
151 parent_entry.generation_number() + 1,
152 );
153 entry.parent_positions.push(parent_entry.position());
154 }
155 let local_pos = LocalCommitPosition(u32::try_from(self.graph.len()).unwrap());
156 self.commit_lookup
157 .insert(entry.commit_id.clone(), local_pos);
158 self.change_lookup
159 .entry(entry.change_id.clone())
160 .and_modify(|positions| positions.push(local_pos))
162 .or_insert(smallvec![local_pos]);
163 self.graph.push(entry);
164 }
165
166 pub(super) fn add_commits_from(&mut self, other_segment: &DynCommitIndexSegment) {
167 let other = CompositeCommitIndex::new(other_segment);
168 for pos in other_segment.num_parent_commits()..other.num_commits() {
169 let entry = other.entry_by_pos(GlobalCommitPosition(pos));
170 let parent_ids = entry.parents().map(|entry| entry.commit_id()).collect_vec();
171 self.add_commit_data(entry.commit_id(), entry.change_id(), &parent_ids);
172 }
173 }
174
175 pub(super) fn merge_in(&mut self, other: &Arc<ReadonlyCommitIndexSegment>) {
176 let files_to_add = itertools::merge_join_by(
178 self.as_composite().ancestor_files_without_local(),
179 iter::once(other).chain(other.as_composite().ancestor_files_without_local()),
180 |own, other| {
181 let own_num_commits = own.as_composite().num_commits();
182 let other_num_commits = other.as_composite().num_commits();
183 own_num_commits.cmp(&other_num_commits).reverse()
184 },
185 )
186 .take_while(|own_other| {
187 own_other
188 .as_ref()
189 .both()
190 .is_none_or(|(own, other)| own.id() != other.id())
191 })
192 .filter_map(|own_other| own_other.right())
193 .collect_vec();
194
195 for &file in files_to_add.iter().rev() {
196 self.add_commits_from(file.as_ref());
197 }
198 }
199
200 fn serialize_parent_filename(&self, buf: &mut Vec<u8>) {
201 if let Some(parent_file) = &self.parent_file {
202 let hex = parent_file.id().hex();
203 buf.extend(u32::try_from(hex.len()).unwrap().to_le_bytes());
204 buf.extend_from_slice(hex.as_bytes());
205 } else {
206 buf.extend(0_u32.to_le_bytes());
207 }
208 }
209
210 fn serialize_local_entries(&self, buf: &mut Vec<u8>) {
211 assert_eq!(self.graph.len(), self.commit_lookup.len());
212 debug_assert_eq!(
213 self.graph.len(),
214 self.change_lookup.values().flatten().count()
215 );
216
217 let num_commits = u32::try_from(self.graph.len()).unwrap();
218 buf.extend(num_commits.to_le_bytes());
219 let num_change_ids = u32::try_from(self.change_lookup.len()).unwrap();
220 buf.extend(num_change_ids.to_le_bytes());
221 let parent_overflow_offset = buf.len();
223 buf.extend(0_u32.to_le_bytes());
224 let change_overflow_offset = buf.len();
225 buf.extend(0_u32.to_le_bytes());
226
227 let change_id_pos_map: HashMap<&ChangeId, u32> = self
229 .change_lookup
230 .keys()
231 .enumerate()
232 .map(|(i, change_id)| (change_id, u32::try_from(i).unwrap()))
233 .collect();
234
235 let mut parent_overflow = vec![];
236 for entry in &self.graph {
237 buf.extend(entry.generation_number.to_le_bytes());
238
239 match entry.parent_positions.as_slice() {
240 [] => {
241 buf.extend((!0_u32).to_le_bytes());
242 buf.extend((!0_u32).to_le_bytes());
243 }
244 [GlobalCommitPosition(pos1)] => {
245 assert!(*pos1 < OVERFLOW_FLAG);
246 buf.extend(pos1.to_le_bytes());
247 buf.extend((!0_u32).to_le_bytes());
248 }
249 [GlobalCommitPosition(pos1), GlobalCommitPosition(pos2)] => {
250 assert!(*pos1 < OVERFLOW_FLAG);
251 assert!(*pos2 < OVERFLOW_FLAG);
252 buf.extend(pos1.to_le_bytes());
253 buf.extend(pos2.to_le_bytes());
254 }
255 positions => {
256 let overflow_pos = u32::try_from(parent_overflow.len()).unwrap();
257 let num_parents = u32::try_from(positions.len()).unwrap();
258 assert!(overflow_pos < OVERFLOW_FLAG);
259 assert!(num_parents < OVERFLOW_FLAG);
260 buf.extend((!overflow_pos).to_le_bytes());
261 buf.extend((!num_parents).to_le_bytes());
262 parent_overflow.extend_from_slice(positions);
263 }
264 }
265
266 buf.extend(change_id_pos_map[&entry.change_id].to_le_bytes());
267
268 assert_eq!(
269 entry.commit_id.as_bytes().len(),
270 self.field_lengths.commit_id
271 );
272 buf.extend_from_slice(entry.commit_id.as_bytes());
273 }
274
275 for LocalCommitPosition(pos) in self.commit_lookup.values() {
276 buf.extend(pos.to_le_bytes());
277 }
278
279 for change_id in self.change_lookup.keys() {
280 assert_eq!(change_id.as_bytes().len(), self.field_lengths.change_id);
281 buf.extend_from_slice(change_id.as_bytes());
282 }
283
284 let mut change_overflow = vec![];
285 for positions in self.change_lookup.values() {
286 match positions.as_slice() {
287 [] => panic!("change id lookup entry must not be empty"),
288 [LocalCommitPosition(pos1)] => {
290 assert!(*pos1 < OVERFLOW_FLAG);
291 buf.extend(pos1.to_le_bytes());
292 }
293 positions => {
294 let overflow_pos = u32::try_from(change_overflow.len()).unwrap();
295 assert!(overflow_pos < OVERFLOW_FLAG);
296 buf.extend((!overflow_pos).to_le_bytes());
297 change_overflow.extend_from_slice(positions);
298 }
299 }
300 }
301
302 let num_parent_overflow = u32::try_from(parent_overflow.len()).unwrap();
303 buf[parent_overflow_offset..][..4].copy_from_slice(&num_parent_overflow.to_le_bytes());
304 for GlobalCommitPosition(pos) in parent_overflow {
305 buf.extend(pos.to_le_bytes());
306 }
307
308 let num_change_overflow = u32::try_from(change_overflow.len()).unwrap();
309 buf[change_overflow_offset..][..4].copy_from_slice(&num_change_overflow.to_le_bytes());
310 for LocalCommitPosition(pos) in change_overflow {
311 buf.extend(pos.to_le_bytes());
312 }
313 }
314
315 pub(super) fn maybe_squash_with_ancestors(self) -> Self {
319 let mut num_new_commits = self.num_local_commits();
320 let mut files_to_squash = vec![];
321 let mut base_parent_file = None;
322 for parent_file in self.as_composite().ancestor_files_without_local() {
323 if 2 * num_new_commits < parent_file.num_local_commits() {
326 base_parent_file = Some(parent_file.clone());
327 break;
328 }
329 num_new_commits += parent_file.num_local_commits();
330 files_to_squash.push(parent_file.clone());
331 }
332
333 if files_to_squash.is_empty() {
334 return self;
335 }
336
337 let mut squashed = if let Some(parent_file) = base_parent_file {
338 Self::incremental(parent_file)
339 } else {
340 Self::full(self.field_lengths)
341 };
342 for parent_file in files_to_squash.iter().rev() {
343 squashed.add_commits_from(parent_file.as_ref());
344 }
345 squashed.add_commits_from(&self);
346 squashed
347 }
348
349 pub(super) fn save_in(
350 mut self,
351 dir: &Path,
352 ) -> Result<Arc<ReadonlyCommitIndexSegment>, PathError> {
353 if self.num_local_commits() == 0
354 && let Some(parent_file) = self.parent_file.take()
355 {
356 return Ok(parent_file);
357 }
358
359 let mut buf = Vec::new();
360 buf.extend(COMMIT_INDEX_SEGMENT_FILE_FORMAT_VERSION.to_le_bytes());
361 self.serialize_parent_filename(&mut buf);
362 let local_entries_offset = buf.len();
363 self.serialize_local_entries(&mut buf);
364 let mut hasher = Blake2b512::new();
365 hasher.update(&buf);
366 let index_file_id = CommitIndexSegmentId::from_bytes(&hasher.finalize());
367 let index_file_path = dir.join(index_file_id.hex());
368
369 let mut temp_file = NamedTempFile::new_in(dir).context(dir)?;
370 let file = temp_file.as_file_mut();
371 file.write_all(&buf).context(temp_file.path())?;
372 persist_content_addressed_temp_file(temp_file, &index_file_path)
373 .context(&index_file_path)?;
374
375 Ok(ReadonlyCommitIndexSegment::load_with_parent_file(
376 &mut &buf[local_entries_offset..],
377 index_file_id,
378 self.parent_file,
379 self.field_lengths,
380 )
381 .expect("in-memory index data should be valid and readable"))
382 }
383}
384
385impl CommitIndexSegment for MutableCommitIndexSegment {
386 fn num_parent_commits(&self) -> u32 {
387 self.num_parent_commits
388 }
389
390 fn num_local_commits(&self) -> u32 {
391 self.graph.len().try_into().unwrap()
392 }
393
394 fn parent_file(&self) -> Option<&Arc<ReadonlyCommitIndexSegment>> {
395 self.parent_file.as_ref()
396 }
397
398 fn commit_id_to_pos(&self, commit_id: &CommitId) -> Option<LocalCommitPosition> {
399 self.commit_lookup.get(commit_id).copied()
400 }
401
402 fn resolve_neighbor_commit_ids(
403 &self,
404 commit_id: &CommitId,
405 ) -> (Option<CommitId>, Option<CommitId>) {
406 let (prev_id, next_id) = resolve_neighbor_ids(&self.commit_lookup, commit_id);
407 (prev_id.cloned(), next_id.cloned())
408 }
409
410 fn resolve_commit_id_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<CommitId> {
411 let min_bytes_prefix = CommitId::from_bytes(prefix.min_prefix_bytes());
412 resolve_id_prefix(&self.commit_lookup, prefix, &min_bytes_prefix).map(|(id, _)| id.clone())
413 }
414
415 fn resolve_neighbor_change_ids(
416 &self,
417 change_id: &ChangeId,
418 ) -> (Option<ChangeId>, Option<ChangeId>) {
419 let (prev_id, next_id) = resolve_neighbor_ids(&self.change_lookup, change_id);
420 (prev_id.cloned(), next_id.cloned())
421 }
422
423 fn resolve_change_id_prefix(
424 &self,
425 prefix: &HexPrefix,
426 ) -> PrefixResolution<(ChangeId, SmallLocalCommitPositionsVec)> {
427 let min_bytes_prefix = ChangeId::from_bytes(prefix.min_prefix_bytes());
428 resolve_id_prefix(&self.change_lookup, prefix, &min_bytes_prefix)
429 .map(|(id, positions)| (id.clone(), positions.clone()))
430 }
431
432 fn generation_number(&self, local_pos: LocalCommitPosition) -> u32 {
433 self.graph[local_pos.0 as usize].generation_number
434 }
435
436 fn commit_id(&self, local_pos: LocalCommitPosition) -> CommitId {
437 self.graph[local_pos.0 as usize].commit_id.clone()
438 }
439
440 fn change_id(&self, local_pos: LocalCommitPosition) -> ChangeId {
441 self.graph[local_pos.0 as usize].change_id.clone()
442 }
443
444 fn num_parents(&self, local_pos: LocalCommitPosition) -> u32 {
445 self.graph[local_pos.0 as usize]
446 .parent_positions
447 .len()
448 .try_into()
449 .unwrap()
450 }
451
452 fn parent_positions(&self, local_pos: LocalCommitPosition) -> SmallGlobalCommitPositionsVec {
453 self.graph[local_pos.0 as usize].parent_positions.clone()
454 }
455}
456
457pub struct DefaultMutableIndex(CompositeIndex);
459
460impl DefaultMutableIndex {
461 pub(super) fn full(lengths: FieldLengths) -> Self {
462 let commits = Box::new(MutableCommitIndexSegment::full(lengths));
463 let mut changed_paths = CompositeChangedPathIndex::null();
465 changed_paths.make_mutable();
466 Self(CompositeIndex::from_mutable(commits, changed_paths))
467 }
468
469 pub(super) fn incremental(parent_index: &DefaultReadonlyIndex) -> Self {
470 let commits = Box::new(MutableCommitIndexSegment::incremental(
471 parent_index.readonly_commits().clone(),
472 ));
473 let mut changed_paths = parent_index.changed_paths().clone();
474 changed_paths.make_mutable();
475 Self(CompositeIndex::from_mutable(commits, changed_paths))
476 }
477
478 pub(super) fn into_segment(
479 self,
480 ) -> (Box<MutableCommitIndexSegment>, CompositeChangedPathIndex) {
481 self.0.into_mutable().expect("must have mutable")
482 }
483
484 fn mutable_commits(&mut self) -> &mut MutableCommitIndexSegment {
485 self.0.mutable_commits().expect("must have mutable")
486 }
487
488 pub fn num_commits(&self) -> u32 {
490 self.0.commits().num_commits()
491 }
492
493 #[tracing::instrument(skip(self))]
494 pub(super) async fn add_commit(&mut self, commit: &Commit) -> BackendResult<()> {
495 let new_commit_pos = GlobalCommitPosition(self.num_commits());
496 self.add_commit_data(
497 commit.id().clone(),
498 commit.change_id().clone(),
499 commit.parent_ids(),
500 );
501 if new_commit_pos == GlobalCommitPosition(self.num_commits()) {
502 return Ok(()); }
504 if self.0.changed_paths().next_mutable_commit_pos() == Some(new_commit_pos) {
505 self.add_commit_changed_paths(commit).await?;
506 }
507 Ok(())
508 }
509
510 pub(super) fn add_commit_data(
511 &mut self,
512 commit_id: CommitId,
513 change_id: ChangeId,
514 parent_ids: &[CommitId],
515 ) {
516 self.mutable_commits()
517 .add_commit_data(commit_id, change_id, parent_ids);
518 }
519
520 async fn add_commit_changed_paths(&mut self, commit: &Commit) -> BackendResult<()> {
523 let paths = collect_changed_paths(self, commit).await?;
524 self.0.changed_paths_mut().add_changed_paths(paths);
525 Ok(())
526 }
527
528 pub(super) fn merge_in(&mut self, other: &DefaultReadonlyIndex) {
529 let start_commit_pos = GlobalCommitPosition(self.num_commits());
530 self.mutable_commits().merge_in(other.readonly_commits());
531 if self.0.changed_paths().next_mutable_commit_pos() == Some(start_commit_pos) {
532 let other_commits = other.as_composite().commits();
533 for self_pos in (start_commit_pos.0..self.num_commits()).map(GlobalCommitPosition) {
534 let entry = self.0.commits().entry_by_pos(self_pos);
535 let other_pos = other_commits.commit_id_to_pos(&entry.commit_id()).unwrap();
536 let Some(paths) = other.changed_paths().changed_paths(other_pos) else {
537 break; };
539 let paths = paths.map(|path| path.to_owned()).collect();
540 self.0.changed_paths_mut().add_changed_paths(paths);
541 }
542 }
543 }
544}
545
546impl AsCompositeIndex for DefaultMutableIndex {
547 fn as_composite(&self) -> &CompositeIndex {
548 &self.0
549 }
550}
551
552impl Index for DefaultMutableIndex {
553 fn shortest_unique_commit_id_prefix_len(&self, commit_id: &CommitId) -> IndexResult<usize> {
554 self.0.shortest_unique_commit_id_prefix_len(commit_id)
555 }
556
557 fn resolve_commit_id_prefix(
558 &self,
559 prefix: &HexPrefix,
560 ) -> IndexResult<PrefixResolution<CommitId>> {
561 self.0.resolve_commit_id_prefix(prefix)
562 }
563
564 fn has_id(&self, commit_id: &CommitId) -> IndexResult<bool> {
565 self.0.has_id(commit_id)
566 }
567
568 fn is_ancestor(&self, ancestor_id: &CommitId, descendant_id: &CommitId) -> IndexResult<bool> {
569 self.0.is_ancestor(ancestor_id, descendant_id)
570 }
571
572 fn common_ancestors(&self, set1: &[CommitId], set2: &[CommitId]) -> IndexResult<Vec<CommitId>> {
573 self.0.common_ancestors(set1, set2)
574 }
575
576 fn all_heads_for_gc(&self) -> IndexResult<Box<dyn Iterator<Item = CommitId> + '_>> {
577 self.0.all_heads_for_gc()
578 }
579
580 fn heads(&self, candidates: &mut dyn Iterator<Item = &CommitId>) -> IndexResult<Vec<CommitId>> {
581 self.0.heads(candidates)
582 }
583
584 fn changed_paths_in_commit(
585 &self,
586 commit_id: &CommitId,
587 ) -> IndexResult<Option<Box<dyn Iterator<Item = RepoPathBuf> + '_>>> {
588 self.0.changed_paths_in_commit(commit_id)
589 }
590
591 fn evaluate_revset(
592 &self,
593 expression: &ResolvedExpression,
594 store: &Arc<Store>,
595 ) -> Result<Box<dyn Revset + '_>, RevsetEvaluationError> {
596 self.0.evaluate_revset(expression, store)
597 }
598}
599
600impl MutableIndex for DefaultMutableIndex {
601 fn as_index(&self) -> &dyn Index {
602 self
603 }
604
605 fn change_id_index(
606 &self,
607 heads: &mut dyn Iterator<Item = &CommitId>,
608 ) -> Box<dyn ChangeIdIndex + '_> {
609 Box::new(ChangeIdIndexImpl::new(self, heads))
610 }
611
612 fn add_commit(&mut self, commit: &Commit) -> IndexResult<()> {
613 Self::add_commit(self, commit)
614 .block_on()
615 .map_err(|err| IndexError::Other(err.into()))
616 }
617
618 fn merge_in(&mut self, other: &dyn ReadonlyIndex) -> IndexResult<()> {
619 let other: &DefaultReadonlyIndex = other
620 .downcast_ref()
621 .expect("index to merge in must be a DefaultReadonlyIndex");
622 Self::merge_in(self, other);
623 Ok(())
624 }
625}
626
627fn resolve_neighbor_ids<'a, K: Ord, V>(
628 lookup_table: &'a BTreeMap<K, V>,
629 id: &K,
630) -> (Option<&'a K>, Option<&'a K>) {
631 let prev_id = lookup_table
632 .range((Bound::Unbounded, Bound::Excluded(id)))
633 .next_back()
634 .map(|(id, _)| id);
635 let next_id = lookup_table
636 .range((Bound::Excluded(id), Bound::Unbounded))
637 .next()
638 .map(|(id, _)| id);
639 (prev_id, next_id)
640}
641
642fn resolve_id_prefix<'a, K: ObjectId + Ord, V>(
643 lookup_table: &'a BTreeMap<K, V>,
644 prefix: &HexPrefix,
645 min_bytes_prefix: &K,
646) -> PrefixResolution<(&'a K, &'a V)> {
647 let mut matches = lookup_table
648 .range((Bound::Included(min_bytes_prefix), Bound::Unbounded))
649 .take_while(|&(id, _)| prefix.matches(id))
650 .fuse();
651 match (matches.next(), matches.next()) {
652 (Some(entry), None) => PrefixResolution::SingleMatch(entry),
653 (Some(_), Some(_)) => PrefixResolution::AmbiguousMatch,
654 (None, _) => PrefixResolution::NoMatch,
655 }
656}