1use std::cmp::max;
16use std::collections::BTreeMap;
17use std::collections::HashMap;
18use std::fmt;
19use std::fmt::Debug;
20use std::io::Write as _;
21use std::iter;
22use std::ops::Bound;
23use std::path::Path;
24use std::sync::Arc;
25
26use blake2::Blake2b512;
27use digest::Digest as _;
28use itertools::Itertools as _;
29use pollster::FutureExt as _;
30use smallvec::SmallVec;
31use smallvec::smallvec;
32use tempfile::NamedTempFile;
33
34use super::changed_path::CompositeChangedPathIndex;
35use super::changed_path::collect_changed_paths;
36use super::composite::AsCompositeIndex;
37use super::composite::ChangeIdIndexImpl;
38use super::composite::CommitIndexSegment;
39use super::composite::CommitIndexSegmentId;
40use super::composite::CompositeCommitIndex;
41use super::composite::CompositeIndex;
42use super::composite::DynCommitIndexSegment;
43use super::entry::GlobalCommitPosition;
44use super::entry::LocalCommitPosition;
45use super::entry::SmallGlobalCommitPositionsVec;
46use super::entry::SmallLocalCommitPositionsVec;
47use super::readonly::COMMIT_INDEX_SEGMENT_FILE_FORMAT_VERSION;
48use super::readonly::DefaultReadonlyIndex;
49use super::readonly::FieldLengths;
50use super::readonly::OVERFLOW_FLAG;
51use super::readonly::ReadonlyCommitIndexSegment;
52use crate::backend::BackendResult;
53use crate::backend::ChangeId;
54use crate::backend::CommitId;
55use crate::commit::Commit;
56use crate::file_util::IoResultExt as _;
57use crate::file_util::PathError;
58use crate::file_util::persist_content_addressed_temp_file;
59use crate::index::AllHeadsForGcUnsupported;
60use crate::index::ChangeIdIndex;
61use crate::index::Index;
62use crate::index::IndexError;
63use crate::index::MutableIndex;
64use crate::index::ReadonlyIndex;
65use crate::object_id::HexPrefix;
66use crate::object_id::ObjectId;
67use crate::object_id::PrefixResolution;
68use crate::repo_path::RepoPathBuf;
69use crate::revset::ResolvedExpression;
70use crate::revset::Revset;
71use crate::revset::RevsetEvaluationError;
72use crate::store::Store;
73
74#[derive(Clone, Debug)]
75struct MutableGraphEntry {
76 commit_id: CommitId,
77 change_id: ChangeId,
78 generation_number: u32,
79 parent_positions: SmallGlobalCommitPositionsVec,
80}
81
82#[derive(Clone)]
83pub(super) struct MutableCommitIndexSegment {
84 parent_file: Option<Arc<ReadonlyCommitIndexSegment>>,
85 num_parent_commits: u32,
86 field_lengths: FieldLengths,
87 graph: Vec<MutableGraphEntry>,
88 commit_lookup: BTreeMap<CommitId, LocalCommitPosition>,
89 change_lookup: BTreeMap<ChangeId, SmallLocalCommitPositionsVec>,
90}
91
92impl Debug for MutableCommitIndexSegment {
93 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
94 f.debug_struct("MutableCommitIndexSegment")
95 .field("parent_file", &self.parent_file)
96 .finish_non_exhaustive()
97 }
98}
99
100impl MutableCommitIndexSegment {
101 pub(super) fn full(field_lengths: FieldLengths) -> Self {
102 Self {
103 parent_file: None,
104 num_parent_commits: 0,
105 field_lengths,
106 graph: vec![],
107 commit_lookup: BTreeMap::new(),
108 change_lookup: BTreeMap::new(),
109 }
110 }
111
112 pub(super) fn incremental(parent_file: Arc<ReadonlyCommitIndexSegment>) -> Self {
113 let num_parent_commits = parent_file.as_composite().num_commits();
114 let field_lengths = parent_file.field_lengths();
115 Self {
116 parent_file: Some(parent_file),
117 num_parent_commits,
118 field_lengths,
119 graph: vec![],
120 commit_lookup: BTreeMap::new(),
121 change_lookup: BTreeMap::new(),
122 }
123 }
124
125 pub(super) fn as_composite(&self) -> &CompositeCommitIndex {
126 CompositeCommitIndex::new(self)
127 }
128
129 pub(super) fn add_commit_data(
130 &mut self,
131 commit_id: CommitId,
132 change_id: ChangeId,
133 parent_ids: &[CommitId],
134 ) {
135 if self.as_composite().has_id(&commit_id) {
136 return;
137 }
138 let mut entry = MutableGraphEntry {
139 commit_id,
140 change_id,
141 generation_number: 0,
142 parent_positions: SmallVec::new(),
143 };
144 for parent_id in parent_ids {
145 let parent_entry = self
146 .as_composite()
147 .entry_by_id(parent_id)
148 .expect("parent commit is not indexed");
149 entry.generation_number = max(
150 entry.generation_number,
151 parent_entry.generation_number() + 1,
152 );
153 entry.parent_positions.push(parent_entry.position());
154 }
155 let local_pos = LocalCommitPosition(u32::try_from(self.graph.len()).unwrap());
156 self.commit_lookup
157 .insert(entry.commit_id.clone(), local_pos);
158 self.change_lookup
159 .entry(entry.change_id.clone())
160 .and_modify(|positions| positions.push(local_pos))
162 .or_insert(smallvec![local_pos]);
163 self.graph.push(entry);
164 }
165
166 pub(super) fn add_commits_from(&mut self, other_segment: &DynCommitIndexSegment) {
167 let other = CompositeCommitIndex::new(other_segment);
168 for pos in other_segment.num_parent_commits()..other.num_commits() {
169 let entry = other.entry_by_pos(GlobalCommitPosition(pos));
170 let parent_ids = entry.parents().map(|entry| entry.commit_id()).collect_vec();
171 self.add_commit_data(entry.commit_id(), entry.change_id(), &parent_ids);
172 }
173 }
174
175 pub(super) fn merge_in(&mut self, other: &Arc<ReadonlyCommitIndexSegment>) {
176 let files_to_add = itertools::merge_join_by(
178 self.as_composite().ancestor_files_without_local(),
179 iter::once(other).chain(other.as_composite().ancestor_files_without_local()),
180 |own, other| {
181 let own_num_commits = own.as_composite().num_commits();
182 let other_num_commits = other.as_composite().num_commits();
183 own_num_commits.cmp(&other_num_commits).reverse()
184 },
185 )
186 .take_while(|own_other| {
187 own_other
188 .as_ref()
189 .both()
190 .is_none_or(|(own, other)| own.id() != other.id())
191 })
192 .filter_map(|own_other| own_other.right())
193 .collect_vec();
194
195 for &file in files_to_add.iter().rev() {
196 self.add_commits_from(file.as_ref());
197 }
198 }
199
200 fn serialize_parent_filename(&self, buf: &mut Vec<u8>) {
201 if let Some(parent_file) = &self.parent_file {
202 let hex = parent_file.id().hex();
203 buf.extend(u32::try_from(hex.len()).unwrap().to_le_bytes());
204 buf.extend_from_slice(hex.as_bytes());
205 } else {
206 buf.extend(0_u32.to_le_bytes());
207 }
208 }
209
210 fn serialize_local_entries(&self, buf: &mut Vec<u8>) {
211 assert_eq!(self.graph.len(), self.commit_lookup.len());
212 debug_assert_eq!(
213 self.graph.len(),
214 self.change_lookup.values().flatten().count()
215 );
216
217 let num_commits = u32::try_from(self.graph.len()).unwrap();
218 buf.extend(num_commits.to_le_bytes());
219 let num_change_ids = u32::try_from(self.change_lookup.len()).unwrap();
220 buf.extend(num_change_ids.to_le_bytes());
221 let parent_overflow_offset = buf.len();
223 buf.extend(0_u32.to_le_bytes());
224 let change_overflow_offset = buf.len();
225 buf.extend(0_u32.to_le_bytes());
226
227 let change_id_pos_map: HashMap<&ChangeId, u32> = self
229 .change_lookup
230 .keys()
231 .enumerate()
232 .map(|(i, change_id)| (change_id, u32::try_from(i).unwrap()))
233 .collect();
234
235 let mut parent_overflow = vec![];
236 for entry in &self.graph {
237 buf.extend(entry.generation_number.to_le_bytes());
238
239 match entry.parent_positions.as_slice() {
240 [] => {
241 buf.extend((!0_u32).to_le_bytes());
242 buf.extend((!0_u32).to_le_bytes());
243 }
244 [GlobalCommitPosition(pos1)] => {
245 assert!(*pos1 < OVERFLOW_FLAG);
246 buf.extend(pos1.to_le_bytes());
247 buf.extend((!0_u32).to_le_bytes());
248 }
249 [GlobalCommitPosition(pos1), GlobalCommitPosition(pos2)] => {
250 assert!(*pos1 < OVERFLOW_FLAG);
251 assert!(*pos2 < OVERFLOW_FLAG);
252 buf.extend(pos1.to_le_bytes());
253 buf.extend(pos2.to_le_bytes());
254 }
255 positions => {
256 let overflow_pos = u32::try_from(parent_overflow.len()).unwrap();
257 let num_parents = u32::try_from(positions.len()).unwrap();
258 assert!(overflow_pos < OVERFLOW_FLAG);
259 assert!(num_parents < OVERFLOW_FLAG);
260 buf.extend((!overflow_pos).to_le_bytes());
261 buf.extend((!num_parents).to_le_bytes());
262 parent_overflow.extend_from_slice(positions);
263 }
264 }
265
266 buf.extend(change_id_pos_map[&entry.change_id].to_le_bytes());
267
268 assert_eq!(
269 entry.commit_id.as_bytes().len(),
270 self.field_lengths.commit_id
271 );
272 buf.extend_from_slice(entry.commit_id.as_bytes());
273 }
274
275 for LocalCommitPosition(pos) in self.commit_lookup.values() {
276 buf.extend(pos.to_le_bytes());
277 }
278
279 for change_id in self.change_lookup.keys() {
280 assert_eq!(change_id.as_bytes().len(), self.field_lengths.change_id);
281 buf.extend_from_slice(change_id.as_bytes());
282 }
283
284 let mut change_overflow = vec![];
285 for positions in self.change_lookup.values() {
286 match positions.as_slice() {
287 [] => panic!("change id lookup entry must not be empty"),
288 [LocalCommitPosition(pos1)] => {
290 assert!(*pos1 < OVERFLOW_FLAG);
291 buf.extend(pos1.to_le_bytes());
292 }
293 positions => {
294 let overflow_pos = u32::try_from(change_overflow.len()).unwrap();
295 assert!(overflow_pos < OVERFLOW_FLAG);
296 buf.extend((!overflow_pos).to_le_bytes());
297 change_overflow.extend_from_slice(positions);
298 }
299 }
300 }
301
302 let num_parent_overflow = u32::try_from(parent_overflow.len()).unwrap();
303 buf[parent_overflow_offset..][..4].copy_from_slice(&num_parent_overflow.to_le_bytes());
304 for GlobalCommitPosition(pos) in parent_overflow {
305 buf.extend(pos.to_le_bytes());
306 }
307
308 let num_change_overflow = u32::try_from(change_overflow.len()).unwrap();
309 buf[change_overflow_offset..][..4].copy_from_slice(&num_change_overflow.to_le_bytes());
310 for LocalCommitPosition(pos) in change_overflow {
311 buf.extend(pos.to_le_bytes());
312 }
313 }
314
315 pub(super) fn maybe_squash_with_ancestors(self) -> Self {
319 let mut num_new_commits = self.num_local_commits();
320 let mut files_to_squash = vec![];
321 let mut base_parent_file = None;
322 for parent_file in self.as_composite().ancestor_files_without_local() {
323 if 2 * num_new_commits < parent_file.num_local_commits() {
326 base_parent_file = Some(parent_file.clone());
327 break;
328 }
329 num_new_commits += parent_file.num_local_commits();
330 files_to_squash.push(parent_file.clone());
331 }
332
333 if files_to_squash.is_empty() {
334 return self;
335 }
336
337 let mut squashed = if let Some(parent_file) = base_parent_file {
338 Self::incremental(parent_file)
339 } else {
340 Self::full(self.field_lengths)
341 };
342 for parent_file in files_to_squash.iter().rev() {
343 squashed.add_commits_from(parent_file.as_ref());
344 }
345 squashed.add_commits_from(&self);
346 squashed
347 }
348
349 pub(super) fn save_in(self, dir: &Path) -> Result<Arc<ReadonlyCommitIndexSegment>, PathError> {
350 if self.num_local_commits() == 0 && self.parent_file.is_some() {
351 return Ok(self.parent_file.unwrap());
352 }
353
354 let mut buf = Vec::new();
355 buf.extend(COMMIT_INDEX_SEGMENT_FILE_FORMAT_VERSION.to_le_bytes());
356 self.serialize_parent_filename(&mut buf);
357 let local_entries_offset = buf.len();
358 self.serialize_local_entries(&mut buf);
359 let mut hasher = Blake2b512::new();
360 hasher.update(&buf);
361 let index_file_id = CommitIndexSegmentId::from_bytes(&hasher.finalize());
362 let index_file_path = dir.join(index_file_id.hex());
363
364 let mut temp_file = NamedTempFile::new_in(dir).context(dir)?;
365 let file = temp_file.as_file_mut();
366 file.write_all(&buf).context(temp_file.path())?;
367 persist_content_addressed_temp_file(temp_file, &index_file_path)
368 .context(&index_file_path)?;
369
370 Ok(ReadonlyCommitIndexSegment::load_with_parent_file(
371 &mut &buf[local_entries_offset..],
372 index_file_id,
373 self.parent_file,
374 self.field_lengths,
375 )
376 .expect("in-memory index data should be valid and readable"))
377 }
378}
379
380impl CommitIndexSegment for MutableCommitIndexSegment {
381 fn num_parent_commits(&self) -> u32 {
382 self.num_parent_commits
383 }
384
385 fn num_local_commits(&self) -> u32 {
386 self.graph.len().try_into().unwrap()
387 }
388
389 fn parent_file(&self) -> Option<&Arc<ReadonlyCommitIndexSegment>> {
390 self.parent_file.as_ref()
391 }
392
393 fn commit_id_to_pos(&self, commit_id: &CommitId) -> Option<LocalCommitPosition> {
394 self.commit_lookup.get(commit_id).copied()
395 }
396
397 fn resolve_neighbor_commit_ids(
398 &self,
399 commit_id: &CommitId,
400 ) -> (Option<CommitId>, Option<CommitId>) {
401 let (prev_id, next_id) = resolve_neighbor_ids(&self.commit_lookup, commit_id);
402 (prev_id.cloned(), next_id.cloned())
403 }
404
405 fn resolve_commit_id_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<CommitId> {
406 let min_bytes_prefix = CommitId::from_bytes(prefix.min_prefix_bytes());
407 resolve_id_prefix(&self.commit_lookup, prefix, &min_bytes_prefix).map(|(id, _)| id.clone())
408 }
409
410 fn resolve_neighbor_change_ids(
411 &self,
412 change_id: &ChangeId,
413 ) -> (Option<ChangeId>, Option<ChangeId>) {
414 let (prev_id, next_id) = resolve_neighbor_ids(&self.change_lookup, change_id);
415 (prev_id.cloned(), next_id.cloned())
416 }
417
418 fn resolve_change_id_prefix(
419 &self,
420 prefix: &HexPrefix,
421 ) -> PrefixResolution<(ChangeId, SmallLocalCommitPositionsVec)> {
422 let min_bytes_prefix = ChangeId::from_bytes(prefix.min_prefix_bytes());
423 resolve_id_prefix(&self.change_lookup, prefix, &min_bytes_prefix)
424 .map(|(id, positions)| (id.clone(), positions.clone()))
425 }
426
427 fn generation_number(&self, local_pos: LocalCommitPosition) -> u32 {
428 self.graph[local_pos.0 as usize].generation_number
429 }
430
431 fn commit_id(&self, local_pos: LocalCommitPosition) -> CommitId {
432 self.graph[local_pos.0 as usize].commit_id.clone()
433 }
434
435 fn change_id(&self, local_pos: LocalCommitPosition) -> ChangeId {
436 self.graph[local_pos.0 as usize].change_id.clone()
437 }
438
439 fn num_parents(&self, local_pos: LocalCommitPosition) -> u32 {
440 self.graph[local_pos.0 as usize]
441 .parent_positions
442 .len()
443 .try_into()
444 .unwrap()
445 }
446
447 fn parent_positions(&self, local_pos: LocalCommitPosition) -> SmallGlobalCommitPositionsVec {
448 self.graph[local_pos.0 as usize].parent_positions.clone()
449 }
450}
451
452pub struct DefaultMutableIndex(CompositeIndex);
454
455impl DefaultMutableIndex {
456 pub(super) fn full(lengths: FieldLengths) -> Self {
457 let commits = Box::new(MutableCommitIndexSegment::full(lengths));
458 let mut changed_paths = CompositeChangedPathIndex::null();
460 changed_paths.make_mutable();
461 Self(CompositeIndex::from_mutable(commits, changed_paths))
462 }
463
464 pub(super) fn incremental(parent_index: &DefaultReadonlyIndex) -> Self {
465 let commits = Box::new(MutableCommitIndexSegment::incremental(
466 parent_index.readonly_commits().clone(),
467 ));
468 let mut changed_paths = parent_index.changed_paths().clone();
469 changed_paths.make_mutable();
470 Self(CompositeIndex::from_mutable(commits, changed_paths))
471 }
472
473 pub(super) fn into_segment(
474 self,
475 ) -> (Box<MutableCommitIndexSegment>, CompositeChangedPathIndex) {
476 self.0.into_mutable().expect("must have mutable")
477 }
478
479 fn mutable_commits(&mut self) -> &mut MutableCommitIndexSegment {
480 self.0.mutable_commits().expect("must have mutable")
481 }
482
483 pub fn num_commits(&self) -> u32 {
485 self.0.commits().num_commits()
486 }
487
488 #[tracing::instrument(skip(self))]
489 pub(super) async fn add_commit(&mut self, commit: &Commit) -> BackendResult<()> {
490 let new_commit_pos = GlobalCommitPosition(self.num_commits());
491 self.add_commit_data(
492 commit.id().clone(),
493 commit.change_id().clone(),
494 commit.parent_ids(),
495 );
496 if new_commit_pos == GlobalCommitPosition(self.num_commits()) {
497 return Ok(()); }
499 if self.0.changed_paths().next_mutable_commit_pos() == Some(new_commit_pos) {
500 self.add_commit_changed_paths(commit).await?;
501 }
502 Ok(())
503 }
504
505 pub(super) fn add_commit_data(
506 &mut self,
507 commit_id: CommitId,
508 change_id: ChangeId,
509 parent_ids: &[CommitId],
510 ) {
511 self.mutable_commits()
512 .add_commit_data(commit_id, change_id, parent_ids);
513 }
514
515 async fn add_commit_changed_paths(&mut self, commit: &Commit) -> BackendResult<()> {
518 let paths = collect_changed_paths(self, commit).await?;
519 self.0.changed_paths_mut().add_changed_paths(paths);
520 Ok(())
521 }
522
523 pub(super) fn merge_in(&mut self, other: &DefaultReadonlyIndex) {
524 let start_commit_pos = GlobalCommitPosition(self.num_commits());
525 self.mutable_commits().merge_in(other.readonly_commits());
526 if self.0.changed_paths().next_mutable_commit_pos() == Some(start_commit_pos) {
527 let other_commits = other.as_composite().commits();
528 for self_pos in (start_commit_pos.0..self.num_commits()).map(GlobalCommitPosition) {
529 let entry = self.0.commits().entry_by_pos(self_pos);
530 let other_pos = other_commits.commit_id_to_pos(&entry.commit_id()).unwrap();
531 let Some(paths) = other.changed_paths().changed_paths(other_pos) else {
532 break; };
534 let paths = paths.map(|path| path.to_owned()).collect();
535 self.0.changed_paths_mut().add_changed_paths(paths);
536 }
537 }
538 }
539}
540
541impl AsCompositeIndex for DefaultMutableIndex {
542 fn as_composite(&self) -> &CompositeIndex {
543 &self.0
544 }
545}
546
547impl Index for DefaultMutableIndex {
548 fn shortest_unique_commit_id_prefix_len(&self, commit_id: &CommitId) -> usize {
549 self.0.shortest_unique_commit_id_prefix_len(commit_id)
550 }
551
552 fn resolve_commit_id_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<CommitId> {
553 self.0.resolve_commit_id_prefix(prefix)
554 }
555
556 fn has_id(&self, commit_id: &CommitId) -> bool {
557 self.0.has_id(commit_id)
558 }
559
560 fn is_ancestor(&self, ancestor_id: &CommitId, descendant_id: &CommitId) -> bool {
561 self.0.is_ancestor(ancestor_id, descendant_id)
562 }
563
564 fn common_ancestors(&self, set1: &[CommitId], set2: &[CommitId]) -> Vec<CommitId> {
565 self.0.common_ancestors(set1, set2)
566 }
567
568 fn all_heads_for_gc(
569 &self,
570 ) -> Result<Box<dyn Iterator<Item = CommitId> + '_>, AllHeadsForGcUnsupported> {
571 self.0.all_heads_for_gc()
572 }
573
574 fn heads(
575 &self,
576 candidates: &mut dyn Iterator<Item = &CommitId>,
577 ) -> Result<Vec<CommitId>, IndexError> {
578 self.0.heads(candidates)
579 }
580
581 fn changed_paths_in_commit(
582 &self,
583 commit_id: &CommitId,
584 ) -> Result<Option<Box<dyn Iterator<Item = RepoPathBuf> + '_>>, IndexError> {
585 self.0.changed_paths_in_commit(commit_id)
586 }
587
588 fn evaluate_revset(
589 &self,
590 expression: &ResolvedExpression,
591 store: &Arc<Store>,
592 ) -> Result<Box<dyn Revset + '_>, RevsetEvaluationError> {
593 self.0.evaluate_revset(expression, store)
594 }
595}
596
597impl MutableIndex for DefaultMutableIndex {
598 fn as_index(&self) -> &dyn Index {
599 self
600 }
601
602 fn change_id_index(
603 &self,
604 heads: &mut dyn Iterator<Item = &CommitId>,
605 ) -> Box<dyn ChangeIdIndex + '_> {
606 Box::new(ChangeIdIndexImpl::new(self, heads))
607 }
608
609 fn add_commit(&mut self, commit: &Commit) -> Result<(), IndexError> {
610 Self::add_commit(self, commit)
611 .block_on()
612 .map_err(|err| IndexError(err.into()))
613 }
614
615 fn merge_in(&mut self, other: &dyn ReadonlyIndex) {
616 let other: &DefaultReadonlyIndex = other
617 .downcast_ref()
618 .expect("index to merge in must be a DefaultReadonlyIndex");
619 Self::merge_in(self, other);
620 }
621}
622
623fn resolve_neighbor_ids<'a, K: Ord, V>(
624 lookup_table: &'a BTreeMap<K, V>,
625 id: &K,
626) -> (Option<&'a K>, Option<&'a K>) {
627 let prev_id = lookup_table
628 .range((Bound::Unbounded, Bound::Excluded(id)))
629 .next_back()
630 .map(|(id, _)| id);
631 let next_id = lookup_table
632 .range((Bound::Excluded(id), Bound::Unbounded))
633 .next()
634 .map(|(id, _)| id);
635 (prev_id, next_id)
636}
637
638fn resolve_id_prefix<'a, K: ObjectId + Ord, V>(
639 lookup_table: &'a BTreeMap<K, V>,
640 prefix: &HexPrefix,
641 min_bytes_prefix: &K,
642) -> PrefixResolution<(&'a K, &'a V)> {
643 let mut matches = lookup_table
644 .range((Bound::Included(min_bytes_prefix), Bound::Unbounded))
645 .take_while(|&(id, _)| prefix.matches(id))
646 .fuse();
647 match (matches.next(), matches.next()) {
648 (Some(entry), None) => PrefixResolution::SingleMatch(entry),
649 (Some(_), Some(_)) => PrefixResolution::AmbiguousMatch,
650 (None, _) => PrefixResolution::NoMatch,
651 }
652}