1#![allow(missing_docs)]
16
17use std::any::Any;
18use std::cmp::max;
19use std::collections::BTreeMap;
20use std::collections::HashMap;
21use std::fmt;
22use std::fmt::Debug;
23use std::io::Write as _;
24use std::iter;
25use std::ops::Bound;
26use std::path::Path;
27use std::sync::Arc;
28
29use blake2::Blake2b512;
30use digest::Digest as _;
31use itertools::Itertools as _;
32use pollster::FutureExt as _;
33use smallvec::SmallVec;
34use smallvec::smallvec;
35use tempfile::NamedTempFile;
36
37use super::changed_path::CompositeChangedPathIndex;
38use super::changed_path::collect_changed_paths;
39use super::composite::AsCompositeIndex;
40use super::composite::ChangeIdIndexImpl;
41use super::composite::CommitIndexSegment;
42use super::composite::CommitIndexSegmentId;
43use super::composite::CompositeCommitIndex;
44use super::composite::CompositeIndex;
45use super::composite::DynCommitIndexSegment;
46use super::entry::GlobalCommitPosition;
47use super::entry::LocalCommitPosition;
48use super::entry::SmallGlobalCommitPositionsVec;
49use super::entry::SmallLocalCommitPositionsVec;
50use super::readonly::COMMIT_INDEX_SEGMENT_FILE_FORMAT_VERSION;
51use super::readonly::DefaultReadonlyIndex;
52use super::readonly::FieldLengths;
53use super::readonly::OVERFLOW_FLAG;
54use super::readonly::ReadonlyCommitIndexSegment;
55use crate::backend::BackendResult;
56use crate::backend::ChangeId;
57use crate::backend::CommitId;
58use crate::commit::Commit;
59use crate::file_util::IoResultExt as _;
60use crate::file_util::PathError;
61use crate::file_util::persist_content_addressed_temp_file;
62use crate::index::AllHeadsForGcUnsupported;
63use crate::index::ChangeIdIndex;
64use crate::index::Index;
65use crate::index::IndexError;
66use crate::index::MutableIndex;
67use crate::index::ReadonlyIndex;
68use crate::object_id::HexPrefix;
69use crate::object_id::ObjectId;
70use crate::object_id::PrefixResolution;
71use crate::repo_path::RepoPathBuf;
72use crate::revset::ResolvedExpression;
73use crate::revset::Revset;
74use crate::revset::RevsetEvaluationError;
75use crate::store::Store;
76
77#[derive(Clone, Debug)]
78struct MutableGraphEntry {
79 commit_id: CommitId,
80 change_id: ChangeId,
81 generation_number: u32,
82 parent_positions: SmallGlobalCommitPositionsVec,
83}
84
85#[derive(Clone)]
86pub(super) struct MutableCommitIndexSegment {
87 parent_file: Option<Arc<ReadonlyCommitIndexSegment>>,
88 num_parent_commits: u32,
89 field_lengths: FieldLengths,
90 graph: Vec<MutableGraphEntry>,
91 commit_lookup: BTreeMap<CommitId, LocalCommitPosition>,
92 change_lookup: BTreeMap<ChangeId, SmallLocalCommitPositionsVec>,
93}
94
95impl Debug for MutableCommitIndexSegment {
96 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
97 f.debug_struct("MutableCommitIndexSegment")
98 .field("parent_file", &self.parent_file)
99 .finish_non_exhaustive()
100 }
101}
102
103impl MutableCommitIndexSegment {
104 pub(super) fn full(field_lengths: FieldLengths) -> Self {
105 Self {
106 parent_file: None,
107 num_parent_commits: 0,
108 field_lengths,
109 graph: vec![],
110 commit_lookup: BTreeMap::new(),
111 change_lookup: BTreeMap::new(),
112 }
113 }
114
115 pub(super) fn incremental(parent_file: Arc<ReadonlyCommitIndexSegment>) -> Self {
116 let num_parent_commits = parent_file.as_composite().num_commits();
117 let field_lengths = parent_file.field_lengths();
118 Self {
119 parent_file: Some(parent_file),
120 num_parent_commits,
121 field_lengths,
122 graph: vec![],
123 commit_lookup: BTreeMap::new(),
124 change_lookup: BTreeMap::new(),
125 }
126 }
127
128 pub(super) fn as_composite(&self) -> &CompositeCommitIndex {
129 CompositeCommitIndex::new(self)
130 }
131
132 pub(super) fn add_commit_data(
133 &mut self,
134 commit_id: CommitId,
135 change_id: ChangeId,
136 parent_ids: &[CommitId],
137 ) {
138 if self.as_composite().has_id(&commit_id) {
139 return;
140 }
141 let mut entry = MutableGraphEntry {
142 commit_id,
143 change_id,
144 generation_number: 0,
145 parent_positions: SmallVec::new(),
146 };
147 for parent_id in parent_ids {
148 let parent_entry = self
149 .as_composite()
150 .entry_by_id(parent_id)
151 .expect("parent commit is not indexed");
152 entry.generation_number = max(
153 entry.generation_number,
154 parent_entry.generation_number() + 1,
155 );
156 entry.parent_positions.push(parent_entry.position());
157 }
158 let local_pos = LocalCommitPosition(u32::try_from(self.graph.len()).unwrap());
159 self.commit_lookup
160 .insert(entry.commit_id.clone(), local_pos);
161 self.change_lookup
162 .entry(entry.change_id.clone())
163 .and_modify(|positions| positions.push(local_pos))
165 .or_insert(smallvec![local_pos]);
166 self.graph.push(entry);
167 }
168
169 pub(super) fn add_commits_from(&mut self, other_segment: &DynCommitIndexSegment) {
170 let other = CompositeCommitIndex::new(other_segment);
171 for pos in other_segment.num_parent_commits()..other.num_commits() {
172 let entry = other.entry_by_pos(GlobalCommitPosition(pos));
173 let parent_ids = entry.parents().map(|entry| entry.commit_id()).collect_vec();
174 self.add_commit_data(entry.commit_id(), entry.change_id(), &parent_ids);
175 }
176 }
177
178 pub(super) fn merge_in(&mut self, other: &Arc<ReadonlyCommitIndexSegment>) {
179 let files_to_add = itertools::merge_join_by(
181 self.as_composite().ancestor_files_without_local(),
182 iter::once(other).chain(other.as_composite().ancestor_files_without_local()),
183 |own, other| {
184 let own_num_commits = own.as_composite().num_commits();
185 let other_num_commits = other.as_composite().num_commits();
186 own_num_commits.cmp(&other_num_commits).reverse()
187 },
188 )
189 .take_while(|own_other| {
190 own_other
191 .as_ref()
192 .both()
193 .is_none_or(|(own, other)| own.id() != other.id())
194 })
195 .filter_map(|own_other| own_other.right())
196 .collect_vec();
197
198 for &file in files_to_add.iter().rev() {
199 self.add_commits_from(file.as_ref());
200 }
201 }
202
203 fn serialize_parent_filename(&self, buf: &mut Vec<u8>) {
204 if let Some(parent_file) = &self.parent_file {
205 let hex = parent_file.id().hex();
206 buf.extend(u32::try_from(hex.len()).unwrap().to_le_bytes());
207 buf.extend_from_slice(hex.as_bytes());
208 } else {
209 buf.extend(0_u32.to_le_bytes());
210 }
211 }
212
213 fn serialize_local_entries(&self, buf: &mut Vec<u8>) {
214 assert_eq!(self.graph.len(), self.commit_lookup.len());
215 debug_assert_eq!(
216 self.graph.len(),
217 self.change_lookup.values().flatten().count()
218 );
219
220 let num_commits = u32::try_from(self.graph.len()).unwrap();
221 buf.extend(num_commits.to_le_bytes());
222 let num_change_ids = u32::try_from(self.change_lookup.len()).unwrap();
223 buf.extend(num_change_ids.to_le_bytes());
224 let parent_overflow_offset = buf.len();
226 buf.extend(0_u32.to_le_bytes());
227 let change_overflow_offset = buf.len();
228 buf.extend(0_u32.to_le_bytes());
229
230 let change_id_pos_map: HashMap<&ChangeId, u32> = self
232 .change_lookup
233 .keys()
234 .enumerate()
235 .map(|(i, change_id)| (change_id, u32::try_from(i).unwrap()))
236 .collect();
237
238 let mut parent_overflow = vec![];
239 for entry in &self.graph {
240 buf.extend(entry.generation_number.to_le_bytes());
241
242 match entry.parent_positions.as_slice() {
243 [] => {
244 buf.extend((!0_u32).to_le_bytes());
245 buf.extend((!0_u32).to_le_bytes());
246 }
247 [GlobalCommitPosition(pos1)] => {
248 assert!(*pos1 < OVERFLOW_FLAG);
249 buf.extend(pos1.to_le_bytes());
250 buf.extend((!0_u32).to_le_bytes());
251 }
252 [GlobalCommitPosition(pos1), GlobalCommitPosition(pos2)] => {
253 assert!(*pos1 < OVERFLOW_FLAG);
254 assert!(*pos2 < OVERFLOW_FLAG);
255 buf.extend(pos1.to_le_bytes());
256 buf.extend(pos2.to_le_bytes());
257 }
258 positions => {
259 let overflow_pos = u32::try_from(parent_overflow.len()).unwrap();
260 let num_parents = u32::try_from(positions.len()).unwrap();
261 assert!(overflow_pos < OVERFLOW_FLAG);
262 assert!(num_parents < OVERFLOW_FLAG);
263 buf.extend((!overflow_pos).to_le_bytes());
264 buf.extend((!num_parents).to_le_bytes());
265 parent_overflow.extend_from_slice(positions);
266 }
267 }
268
269 buf.extend(change_id_pos_map[&entry.change_id].to_le_bytes());
270
271 assert_eq!(
272 entry.commit_id.as_bytes().len(),
273 self.field_lengths.commit_id
274 );
275 buf.extend_from_slice(entry.commit_id.as_bytes());
276 }
277
278 for LocalCommitPosition(pos) in self.commit_lookup.values() {
279 buf.extend(pos.to_le_bytes());
280 }
281
282 for change_id in self.change_lookup.keys() {
283 assert_eq!(change_id.as_bytes().len(), self.field_lengths.change_id);
284 buf.extend_from_slice(change_id.as_bytes());
285 }
286
287 let mut change_overflow = vec![];
288 for positions in self.change_lookup.values() {
289 match positions.as_slice() {
290 [] => panic!("change id lookup entry must not be empty"),
291 [LocalCommitPosition(pos1)] => {
293 assert!(*pos1 < OVERFLOW_FLAG);
294 buf.extend(pos1.to_le_bytes());
295 }
296 positions => {
297 let overflow_pos = u32::try_from(change_overflow.len()).unwrap();
298 assert!(overflow_pos < OVERFLOW_FLAG);
299 buf.extend((!overflow_pos).to_le_bytes());
300 change_overflow.extend_from_slice(positions);
301 }
302 }
303 }
304
305 let num_parent_overflow = u32::try_from(parent_overflow.len()).unwrap();
306 buf[parent_overflow_offset..][..4].copy_from_slice(&num_parent_overflow.to_le_bytes());
307 for GlobalCommitPosition(pos) in parent_overflow {
308 buf.extend(pos.to_le_bytes());
309 }
310
311 let num_change_overflow = u32::try_from(change_overflow.len()).unwrap();
312 buf[change_overflow_offset..][..4].copy_from_slice(&num_change_overflow.to_le_bytes());
313 for LocalCommitPosition(pos) in change_overflow {
314 buf.extend(pos.to_le_bytes());
315 }
316 }
317
318 pub(super) fn maybe_squash_with_ancestors(self) -> Self {
322 let mut num_new_commits = self.num_local_commits();
323 let mut files_to_squash = vec![];
324 let mut base_parent_file = None;
325 for parent_file in self.as_composite().ancestor_files_without_local() {
326 if 2 * num_new_commits < parent_file.num_local_commits() {
329 base_parent_file = Some(parent_file.clone());
330 break;
331 }
332 num_new_commits += parent_file.num_local_commits();
333 files_to_squash.push(parent_file.clone());
334 }
335
336 if files_to_squash.is_empty() {
337 return self;
338 }
339
340 let mut squashed = if let Some(parent_file) = base_parent_file {
341 Self::incremental(parent_file)
342 } else {
343 Self::full(self.field_lengths)
344 };
345 for parent_file in files_to_squash.iter().rev() {
346 squashed.add_commits_from(parent_file.as_ref());
347 }
348 squashed.add_commits_from(&self);
349 squashed
350 }
351
352 pub(super) fn save_in(self, dir: &Path) -> Result<Arc<ReadonlyCommitIndexSegment>, PathError> {
353 if self.num_local_commits() == 0 && self.parent_file.is_some() {
354 return Ok(self.parent_file.unwrap());
355 }
356
357 let mut buf = Vec::new();
358 buf.extend(COMMIT_INDEX_SEGMENT_FILE_FORMAT_VERSION.to_le_bytes());
359 self.serialize_parent_filename(&mut buf);
360 let local_entries_offset = buf.len();
361 self.serialize_local_entries(&mut buf);
362 let mut hasher = Blake2b512::new();
363 hasher.update(&buf);
364 let index_file_id = CommitIndexSegmentId::from_bytes(&hasher.finalize());
365 let index_file_path = dir.join(index_file_id.hex());
366
367 let mut temp_file = NamedTempFile::new_in(dir).context(dir)?;
368 let file = temp_file.as_file_mut();
369 file.write_all(&buf).context(temp_file.path())?;
370 persist_content_addressed_temp_file(temp_file, &index_file_path)
371 .context(&index_file_path)?;
372
373 Ok(ReadonlyCommitIndexSegment::load_with_parent_file(
374 &mut &buf[local_entries_offset..],
375 index_file_id,
376 self.parent_file,
377 self.field_lengths,
378 )
379 .expect("in-memory index data should be valid and readable"))
380 }
381}
382
383impl CommitIndexSegment for MutableCommitIndexSegment {
384 fn num_parent_commits(&self) -> u32 {
385 self.num_parent_commits
386 }
387
388 fn num_local_commits(&self) -> u32 {
389 self.graph.len().try_into().unwrap()
390 }
391
392 fn parent_file(&self) -> Option<&Arc<ReadonlyCommitIndexSegment>> {
393 self.parent_file.as_ref()
394 }
395
396 fn commit_id_to_pos(&self, commit_id: &CommitId) -> Option<LocalCommitPosition> {
397 self.commit_lookup.get(commit_id).copied()
398 }
399
400 fn resolve_neighbor_commit_ids(
401 &self,
402 commit_id: &CommitId,
403 ) -> (Option<CommitId>, Option<CommitId>) {
404 let (prev_id, next_id) = resolve_neighbor_ids(&self.commit_lookup, commit_id);
405 (prev_id.cloned(), next_id.cloned())
406 }
407
408 fn resolve_commit_id_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<CommitId> {
409 let min_bytes_prefix = CommitId::from_bytes(prefix.min_prefix_bytes());
410 resolve_id_prefix(&self.commit_lookup, prefix, &min_bytes_prefix).map(|(id, _)| id.clone())
411 }
412
413 fn resolve_neighbor_change_ids(
414 &self,
415 change_id: &ChangeId,
416 ) -> (Option<ChangeId>, Option<ChangeId>) {
417 let (prev_id, next_id) = resolve_neighbor_ids(&self.change_lookup, change_id);
418 (prev_id.cloned(), next_id.cloned())
419 }
420
421 fn resolve_change_id_prefix(
422 &self,
423 prefix: &HexPrefix,
424 ) -> PrefixResolution<(ChangeId, SmallLocalCommitPositionsVec)> {
425 let min_bytes_prefix = ChangeId::from_bytes(prefix.min_prefix_bytes());
426 resolve_id_prefix(&self.change_lookup, prefix, &min_bytes_prefix)
427 .map(|(id, positions)| (id.clone(), positions.clone()))
428 }
429
430 fn generation_number(&self, local_pos: LocalCommitPosition) -> u32 {
431 self.graph[local_pos.0 as usize].generation_number
432 }
433
434 fn commit_id(&self, local_pos: LocalCommitPosition) -> CommitId {
435 self.graph[local_pos.0 as usize].commit_id.clone()
436 }
437
438 fn change_id(&self, local_pos: LocalCommitPosition) -> ChangeId {
439 self.graph[local_pos.0 as usize].change_id.clone()
440 }
441
442 fn num_parents(&self, local_pos: LocalCommitPosition) -> u32 {
443 self.graph[local_pos.0 as usize]
444 .parent_positions
445 .len()
446 .try_into()
447 .unwrap()
448 }
449
450 fn parent_positions(&self, local_pos: LocalCommitPosition) -> SmallGlobalCommitPositionsVec {
451 self.graph[local_pos.0 as usize].parent_positions.clone()
452 }
453}
454
455pub struct DefaultMutableIndex(CompositeIndex);
457
458impl DefaultMutableIndex {
459 pub(super) fn full(lengths: FieldLengths) -> Self {
460 let commits = Box::new(MutableCommitIndexSegment::full(lengths));
461 let mut changed_paths = CompositeChangedPathIndex::null();
463 changed_paths.make_mutable();
464 Self(CompositeIndex::from_mutable(commits, changed_paths))
465 }
466
467 pub(super) fn incremental(parent_index: &DefaultReadonlyIndex) -> Self {
468 let commits = Box::new(MutableCommitIndexSegment::incremental(
469 parent_index.readonly_commits().clone(),
470 ));
471 let mut changed_paths = parent_index.changed_paths().clone();
472 changed_paths.make_mutable();
473 Self(CompositeIndex::from_mutable(commits, changed_paths))
474 }
475
476 pub(super) fn into_segment(
477 self,
478 ) -> (Box<MutableCommitIndexSegment>, CompositeChangedPathIndex) {
479 self.0.into_mutable().expect("must have mutable")
480 }
481
482 fn mutable_commits(&mut self) -> &mut MutableCommitIndexSegment {
483 self.0.mutable_commits().expect("must have mutable")
484 }
485
486 pub fn num_commits(&self) -> u32 {
488 self.0.commits().num_commits()
489 }
490
491 #[tracing::instrument(skip(self))]
492 pub(super) async fn add_commit(&mut self, commit: &Commit) -> BackendResult<()> {
493 let new_commit_pos = GlobalCommitPosition(self.num_commits());
494 self.add_commit_data(
495 commit.id().clone(),
496 commit.change_id().clone(),
497 commit.parent_ids(),
498 );
499 if new_commit_pos == GlobalCommitPosition(self.num_commits()) {
500 return Ok(()); }
502 if self.0.changed_paths().next_mutable_commit_pos() == Some(new_commit_pos) {
503 self.add_commit_changed_paths(commit).await?;
504 }
505 Ok(())
506 }
507
508 pub(super) fn add_commit_data(
509 &mut self,
510 commit_id: CommitId,
511 change_id: ChangeId,
512 parent_ids: &[CommitId],
513 ) {
514 self.mutable_commits()
515 .add_commit_data(commit_id, change_id, parent_ids);
516 }
517
518 async fn add_commit_changed_paths(&mut self, commit: &Commit) -> BackendResult<()> {
521 let paths = collect_changed_paths(self, commit).await?;
522 self.0.changed_paths_mut().add_changed_paths(paths);
523 Ok(())
524 }
525
526 pub(super) fn merge_in(&mut self, other: &DefaultReadonlyIndex) {
527 let start_commit_pos = GlobalCommitPosition(self.num_commits());
528 self.mutable_commits().merge_in(other.readonly_commits());
529 if self.0.changed_paths().next_mutable_commit_pos() == Some(start_commit_pos) {
530 let other_commits = other.as_composite().commits();
531 for self_pos in (start_commit_pos.0..self.num_commits()).map(GlobalCommitPosition) {
532 let entry = self.0.commits().entry_by_pos(self_pos);
533 let other_pos = other_commits.commit_id_to_pos(&entry.commit_id()).unwrap();
534 let Some(paths) = other.changed_paths().changed_paths(other_pos) else {
535 break; };
537 let paths = paths.map(|path| path.to_owned()).collect();
538 self.0.changed_paths_mut().add_changed_paths(paths);
539 }
540 }
541 }
542}
543
544impl AsCompositeIndex for DefaultMutableIndex {
545 fn as_composite(&self) -> &CompositeIndex {
546 &self.0
547 }
548}
549
550impl Index for DefaultMutableIndex {
551 fn shortest_unique_commit_id_prefix_len(&self, commit_id: &CommitId) -> usize {
552 self.0.shortest_unique_commit_id_prefix_len(commit_id)
553 }
554
555 fn resolve_commit_id_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<CommitId> {
556 self.0.resolve_commit_id_prefix(prefix)
557 }
558
559 fn has_id(&self, commit_id: &CommitId) -> bool {
560 self.0.has_id(commit_id)
561 }
562
563 fn is_ancestor(&self, ancestor_id: &CommitId, descendant_id: &CommitId) -> bool {
564 self.0.is_ancestor(ancestor_id, descendant_id)
565 }
566
567 fn common_ancestors(&self, set1: &[CommitId], set2: &[CommitId]) -> Vec<CommitId> {
568 self.0.common_ancestors(set1, set2)
569 }
570
571 fn all_heads_for_gc(
572 &self,
573 ) -> Result<Box<dyn Iterator<Item = CommitId> + '_>, AllHeadsForGcUnsupported> {
574 self.0.all_heads_for_gc()
575 }
576
577 fn heads(
578 &self,
579 candidates: &mut dyn Iterator<Item = &CommitId>,
580 ) -> Result<Vec<CommitId>, IndexError> {
581 self.0.heads(candidates)
582 }
583
584 fn changed_paths_in_commit(
585 &self,
586 commit_id: &CommitId,
587 ) -> Result<Option<Box<dyn Iterator<Item = RepoPathBuf> + '_>>, IndexError> {
588 self.0.changed_paths_in_commit(commit_id)
589 }
590
591 fn evaluate_revset(
592 &self,
593 expression: &ResolvedExpression,
594 store: &Arc<Store>,
595 ) -> Result<Box<dyn Revset + '_>, RevsetEvaluationError> {
596 self.0.evaluate_revset(expression, store)
597 }
598}
599
600impl MutableIndex for DefaultMutableIndex {
601 fn as_any(&self) -> &dyn Any {
602 self
603 }
604
605 fn into_any(self: Box<Self>) -> Box<dyn Any> {
606 Box::new(*self)
607 }
608
609 fn as_index(&self) -> &dyn Index {
610 self
611 }
612
613 fn change_id_index(
614 &self,
615 heads: &mut dyn Iterator<Item = &CommitId>,
616 ) -> Box<dyn ChangeIdIndex + '_> {
617 Box::new(ChangeIdIndexImpl::new(self, heads))
618 }
619
620 fn add_commit(&mut self, commit: &Commit) -> Result<(), IndexError> {
621 Self::add_commit(self, commit)
622 .block_on()
623 .map_err(|err| IndexError(err.into()))
624 }
625
626 fn merge_in(&mut self, other: &dyn ReadonlyIndex) {
627 let other = other
628 .as_any()
629 .downcast_ref::<DefaultReadonlyIndex>()
630 .expect("index to merge in must be a DefaultReadonlyIndex");
631 Self::merge_in(self, other);
632 }
633}
634
635fn resolve_neighbor_ids<'a, K: Ord, V>(
636 lookup_table: &'a BTreeMap<K, V>,
637 id: &K,
638) -> (Option<&'a K>, Option<&'a K>) {
639 let prev_id = lookup_table
640 .range((Bound::Unbounded, Bound::Excluded(id)))
641 .next_back()
642 .map(|(id, _)| id);
643 let next_id = lookup_table
644 .range((Bound::Excluded(id), Bound::Unbounded))
645 .next()
646 .map(|(id, _)| id);
647 (prev_id, next_id)
648}
649
650fn resolve_id_prefix<'a, K: ObjectId + Ord, V>(
651 lookup_table: &'a BTreeMap<K, V>,
652 prefix: &HexPrefix,
653 min_bytes_prefix: &K,
654) -> PrefixResolution<(&'a K, &'a V)> {
655 let mut matches = lookup_table
656 .range((Bound::Included(min_bytes_prefix), Bound::Unbounded))
657 .take_while(|&(id, _)| prefix.matches(id))
658 .fuse();
659 match (matches.next(), matches.next()) {
660 (Some(entry), None) => PrefixResolution::SingleMatch(entry),
661 (Some(_), Some(_)) => PrefixResolution::AmbiguousMatch,
662 (None, _) => PrefixResolution::NoMatch,
663 }
664}