1#![allow(missing_docs)]
16
17use std::any::Any;
18use std::cmp::max;
19use std::collections::BTreeMap;
20use std::collections::HashMap;
21use std::io;
22use std::io::Write as _;
23use std::ops::Bound;
24use std::path::Path;
25use std::sync::Arc;
26
27use blake2::Blake2b512;
28use digest::Digest as _;
29use itertools::Itertools as _;
30use smallvec::smallvec;
31use smallvec::SmallVec;
32use tempfile::NamedTempFile;
33
34use super::composite::AsCompositeIndex;
35use super::composite::ChangeIdIndexImpl;
36use super::composite::CompositeIndex;
37use super::composite::DynIndexSegment;
38use super::composite::IndexSegment;
39use super::entry::IndexPosition;
40use super::entry::LocalPosition;
41use super::entry::SmallIndexPositionsVec;
42use super::entry::SmallLocalPositionsVec;
43use super::readonly::DefaultReadonlyIndex;
44use super::readonly::ReadonlyIndexSegment;
45use super::readonly::INDEX_SEGMENT_FILE_FORMAT_VERSION;
46use super::readonly::OVERFLOW_FLAG;
47use crate::backend::ChangeId;
48use crate::backend::CommitId;
49use crate::commit::Commit;
50use crate::file_util::persist_content_addressed_temp_file;
51use crate::index::AllHeadsForGcUnsupported;
52use crate::index::ChangeIdIndex;
53use crate::index::Index;
54use crate::index::IndexError;
55use crate::index::MutableIndex;
56use crate::index::ReadonlyIndex;
57use crate::object_id::HexPrefix;
58use crate::object_id::ObjectId;
59use crate::object_id::PrefixResolution;
60use crate::revset::ResolvedExpression;
61use crate::revset::Revset;
62use crate::revset::RevsetEvaluationError;
63use crate::store::Store;
64
65#[derive(Debug)]
66struct MutableGraphEntry {
67 commit_id: CommitId,
68 change_id: ChangeId,
69 generation_number: u32,
70 parent_positions: SmallIndexPositionsVec,
71}
72
73pub(super) struct MutableIndexSegment {
74 parent_file: Option<Arc<ReadonlyIndexSegment>>,
75 num_parent_commits: u32,
76 commit_id_length: usize,
77 change_id_length: usize,
78 graph: Vec<MutableGraphEntry>,
79 commit_lookup: BTreeMap<CommitId, LocalPosition>,
80 change_lookup: BTreeMap<ChangeId, SmallLocalPositionsVec>,
81}
82
83impl MutableIndexSegment {
84 pub(super) fn full(commit_id_length: usize, change_id_length: usize) -> Self {
85 Self {
86 parent_file: None,
87 num_parent_commits: 0,
88 commit_id_length,
89 change_id_length,
90 graph: vec![],
91 commit_lookup: BTreeMap::new(),
92 change_lookup: BTreeMap::new(),
93 }
94 }
95
96 pub(super) fn incremental(parent_file: Arc<ReadonlyIndexSegment>) -> Self {
97 let num_parent_commits = parent_file.as_composite().num_commits();
98 let commit_id_length = parent_file.commit_id_length();
99 let change_id_length = parent_file.change_id_length();
100 Self {
101 parent_file: Some(parent_file),
102 num_parent_commits,
103 commit_id_length,
104 change_id_length,
105 graph: vec![],
106 commit_lookup: BTreeMap::new(),
107 change_lookup: BTreeMap::new(),
108 }
109 }
110
111 pub(super) fn as_composite(&self) -> &CompositeIndex {
112 CompositeIndex::new(self)
113 }
114
115 pub(super) fn add_commit(&mut self, commit: &Commit) {
116 self.add_commit_data(
117 commit.id().clone(),
118 commit.change_id().clone(),
119 commit.parent_ids(),
120 );
121 }
122
123 pub(super) fn add_commit_data(
124 &mut self,
125 commit_id: CommitId,
126 change_id: ChangeId,
127 parent_ids: &[CommitId],
128 ) {
129 if self.as_composite().has_id(&commit_id) {
130 return;
131 }
132 let mut entry = MutableGraphEntry {
133 commit_id,
134 change_id,
135 generation_number: 0,
136 parent_positions: SmallVec::new(),
137 };
138 for parent_id in parent_ids {
139 let parent_entry = self
140 .as_composite()
141 .entry_by_id(parent_id)
142 .expect("parent commit is not indexed");
143 entry.generation_number = max(
144 entry.generation_number,
145 parent_entry.generation_number() + 1,
146 );
147 entry.parent_positions.push(parent_entry.position());
148 }
149 let local_pos = LocalPosition(u32::try_from(self.graph.len()).unwrap());
150 self.commit_lookup
151 .insert(entry.commit_id.clone(), local_pos);
152 self.change_lookup
153 .entry(entry.change_id.clone())
154 .and_modify(|positions| positions.push(local_pos))
156 .or_insert(smallvec![local_pos]);
157 self.graph.push(entry);
158 }
159
160 pub(super) fn add_commits_from(&mut self, other_segment: &DynIndexSegment) {
161 let other = CompositeIndex::new(other_segment);
162 for pos in other_segment.num_parent_commits()..other.num_commits() {
163 let entry = other.entry_by_pos(IndexPosition(pos));
164 let parent_ids = entry.parents().map(|entry| entry.commit_id()).collect_vec();
165 self.add_commit_data(entry.commit_id(), entry.change_id(), &parent_ids);
166 }
167 }
168
169 pub(super) fn merge_in(&mut self, other: Arc<ReadonlyIndexSegment>) {
170 let mut maybe_own_ancestor = self.parent_file.clone();
171 let mut maybe_other_ancestor = Some(other);
172 let mut files_to_add = vec![];
173 loop {
174 if maybe_other_ancestor.is_none() {
175 break;
176 }
177 let other_ancestor = maybe_other_ancestor.as_ref().unwrap();
178 if maybe_own_ancestor.is_none() {
179 files_to_add.push(other_ancestor.clone());
180 maybe_other_ancestor = other_ancestor.parent_file().cloned();
181 continue;
182 }
183 let own_ancestor = maybe_own_ancestor.as_ref().unwrap();
184 if own_ancestor.name() == other_ancestor.name() {
185 break;
186 }
187 if own_ancestor.as_composite().num_commits()
188 < other_ancestor.as_composite().num_commits()
189 {
190 files_to_add.push(other_ancestor.clone());
191 maybe_other_ancestor = other_ancestor.parent_file().cloned();
192 } else {
193 maybe_own_ancestor = own_ancestor.parent_file().cloned();
194 }
195 }
196
197 for file in files_to_add.iter().rev() {
198 self.add_commits_from(file.as_ref());
199 }
200 }
201
202 fn serialize_parent_filename(&self, buf: &mut Vec<u8>) {
203 if let Some(parent_file) = &self.parent_file {
204 buf.extend(
205 u32::try_from(parent_file.name().len())
206 .unwrap()
207 .to_le_bytes(),
208 );
209 buf.extend_from_slice(parent_file.name().as_bytes());
210 } else {
211 buf.extend(0_u32.to_le_bytes());
212 }
213 }
214
215 fn serialize_local_entries(&self, buf: &mut Vec<u8>) {
216 assert_eq!(self.graph.len(), self.commit_lookup.len());
217 debug_assert_eq!(
218 self.graph.len(),
219 self.change_lookup.values().flatten().count()
220 );
221
222 let num_commits = u32::try_from(self.graph.len()).unwrap();
223 buf.extend(num_commits.to_le_bytes());
224 let num_change_ids = u32::try_from(self.change_lookup.len()).unwrap();
225 buf.extend(num_change_ids.to_le_bytes());
226 let parent_overflow_offset = buf.len();
228 buf.extend(0_u32.to_le_bytes());
229 let change_overflow_offset = buf.len();
230 buf.extend(0_u32.to_le_bytes());
231
232 let change_id_pos_map: HashMap<&ChangeId, u32> = self
234 .change_lookup
235 .keys()
236 .enumerate()
237 .map(|(i, change_id)| (change_id, u32::try_from(i).unwrap()))
238 .collect();
239
240 let mut parent_overflow = vec![];
241 for entry in &self.graph {
242 buf.extend(entry.generation_number.to_le_bytes());
243
244 match entry.parent_positions.as_slice() {
245 [] => {
246 buf.extend((!0_u32).to_le_bytes());
247 buf.extend((!0_u32).to_le_bytes());
248 }
249 [IndexPosition(pos1)] => {
250 assert!(*pos1 < OVERFLOW_FLAG);
251 buf.extend(pos1.to_le_bytes());
252 buf.extend((!0_u32).to_le_bytes());
253 }
254 [IndexPosition(pos1), IndexPosition(pos2)] => {
255 assert!(*pos1 < OVERFLOW_FLAG);
256 assert!(*pos2 < OVERFLOW_FLAG);
257 buf.extend(pos1.to_le_bytes());
258 buf.extend(pos2.to_le_bytes());
259 }
260 positions => {
261 let overflow_pos = u32::try_from(parent_overflow.len()).unwrap();
262 let num_parents = u32::try_from(positions.len()).unwrap();
263 assert!(overflow_pos < OVERFLOW_FLAG);
264 assert!(num_parents < OVERFLOW_FLAG);
265 buf.extend((!overflow_pos).to_le_bytes());
266 buf.extend((!num_parents).to_le_bytes());
267 parent_overflow.extend_from_slice(positions);
268 }
269 }
270
271 buf.extend(change_id_pos_map[&entry.change_id].to_le_bytes());
272
273 assert_eq!(entry.commit_id.as_bytes().len(), self.commit_id_length);
274 buf.extend_from_slice(entry.commit_id.as_bytes());
275 }
276
277 for LocalPosition(pos) in self.commit_lookup.values() {
278 buf.extend(pos.to_le_bytes());
279 }
280
281 for change_id in self.change_lookup.keys() {
282 assert_eq!(change_id.as_bytes().len(), self.change_id_length);
283 buf.extend_from_slice(change_id.as_bytes());
284 }
285
286 let mut change_overflow = vec![];
287 for positions in self.change_lookup.values() {
288 match positions.as_slice() {
289 [] => panic!("change id lookup entry must not be empty"),
290 [LocalPosition(pos1)] => {
292 assert!(*pos1 < OVERFLOW_FLAG);
293 buf.extend(pos1.to_le_bytes());
294 }
295 positions => {
296 let overflow_pos = u32::try_from(change_overflow.len()).unwrap();
297 assert!(overflow_pos < OVERFLOW_FLAG);
298 buf.extend((!overflow_pos).to_le_bytes());
299 change_overflow.extend_from_slice(positions);
300 }
301 }
302 }
303
304 let num_parent_overflow = u32::try_from(parent_overflow.len()).unwrap();
305 buf[parent_overflow_offset..][..4].copy_from_slice(&num_parent_overflow.to_le_bytes());
306 for IndexPosition(pos) in parent_overflow {
307 buf.extend(pos.to_le_bytes());
308 }
309
310 let num_change_overflow = u32::try_from(change_overflow.len()).unwrap();
311 buf[change_overflow_offset..][..4].copy_from_slice(&num_change_overflow.to_le_bytes());
312 for LocalPosition(pos) in change_overflow {
313 buf.extend(pos.to_le_bytes());
314 }
315 }
316
317 fn maybe_squash_with_ancestors(self) -> MutableIndexSegment {
321 let mut num_new_commits = self.num_local_commits();
322 let mut files_to_squash = vec![];
323 let mut base_parent_file = None;
324 for parent_file in self.as_composite().ancestor_files_without_local() {
325 if 2 * num_new_commits < parent_file.num_local_commits() {
328 base_parent_file = Some(parent_file.clone());
329 break;
330 }
331 num_new_commits += parent_file.num_local_commits();
332 files_to_squash.push(parent_file.clone());
333 }
334
335 if files_to_squash.is_empty() {
336 return self;
337 }
338
339 let mut squashed = if let Some(parent_file) = base_parent_file {
340 MutableIndexSegment::incremental(parent_file)
341 } else {
342 MutableIndexSegment::full(self.commit_id_length, self.change_id_length)
343 };
344 for parent_file in files_to_squash.iter().rev() {
345 squashed.add_commits_from(parent_file.as_ref());
346 }
347 squashed.add_commits_from(&self);
348 squashed
349 }
350
351 pub(super) fn save_in(self, dir: &Path) -> io::Result<Arc<ReadonlyIndexSegment>> {
352 if self.num_local_commits() == 0 && self.parent_file.is_some() {
353 return Ok(self.parent_file.unwrap());
354 }
355
356 let mut buf = Vec::new();
357 buf.extend(INDEX_SEGMENT_FILE_FORMAT_VERSION.to_le_bytes());
358 self.serialize_parent_filename(&mut buf);
359 let local_entries_offset = buf.len();
360 self.serialize_local_entries(&mut buf);
361 let mut hasher = Blake2b512::new();
362 hasher.update(&buf);
363 let index_file_id_hex = hex::encode(hasher.finalize());
364 let index_file_path = dir.join(&index_file_id_hex);
365
366 let mut temp_file = NamedTempFile::new_in(dir)?;
367 let file = temp_file.as_file_mut();
368 file.write_all(&buf)?;
369 persist_content_addressed_temp_file(temp_file, index_file_path)?;
370
371 Ok(ReadonlyIndexSegment::load_with_parent_file(
372 &mut &buf[local_entries_offset..],
373 index_file_id_hex,
374 self.parent_file,
375 self.commit_id_length,
376 self.change_id_length,
377 )
378 .expect("in-memory index data should be valid and readable"))
379 }
380}
381
382impl IndexSegment for MutableIndexSegment {
383 fn num_parent_commits(&self) -> u32 {
384 self.num_parent_commits
385 }
386
387 fn num_local_commits(&self) -> u32 {
388 self.graph.len().try_into().unwrap()
389 }
390
391 fn parent_file(&self) -> Option<&Arc<ReadonlyIndexSegment>> {
392 self.parent_file.as_ref()
393 }
394
395 fn name(&self) -> Option<String> {
396 None
397 }
398
399 fn commit_id_to_pos(&self, commit_id: &CommitId) -> Option<LocalPosition> {
400 self.commit_lookup.get(commit_id).copied()
401 }
402
403 fn resolve_neighbor_commit_ids(
404 &self,
405 commit_id: &CommitId,
406 ) -> (Option<CommitId>, Option<CommitId>) {
407 let (prev_id, next_id) = resolve_neighbor_ids(&self.commit_lookup, commit_id);
408 (prev_id.cloned(), next_id.cloned())
409 }
410
411 fn resolve_commit_id_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<CommitId> {
412 let min_bytes_prefix = CommitId::from_bytes(prefix.min_prefix_bytes());
413 resolve_id_prefix(&self.commit_lookup, prefix, &min_bytes_prefix).map(|(id, _)| id.clone())
414 }
415
416 fn resolve_neighbor_change_ids(
417 &self,
418 change_id: &ChangeId,
419 ) -> (Option<ChangeId>, Option<ChangeId>) {
420 let (prev_id, next_id) = resolve_neighbor_ids(&self.change_lookup, change_id);
421 (prev_id.cloned(), next_id.cloned())
422 }
423
424 fn resolve_change_id_prefix(
425 &self,
426 prefix: &HexPrefix,
427 ) -> PrefixResolution<(ChangeId, SmallLocalPositionsVec)> {
428 let min_bytes_prefix = ChangeId::from_bytes(prefix.min_prefix_bytes());
429 resolve_id_prefix(&self.change_lookup, prefix, &min_bytes_prefix)
430 .map(|(id, positions)| (id.clone(), positions.clone()))
431 }
432
433 fn generation_number(&self, local_pos: LocalPosition) -> u32 {
434 self.graph[local_pos.0 as usize].generation_number
435 }
436
437 fn commit_id(&self, local_pos: LocalPosition) -> CommitId {
438 self.graph[local_pos.0 as usize].commit_id.clone()
439 }
440
441 fn change_id(&self, local_pos: LocalPosition) -> ChangeId {
442 self.graph[local_pos.0 as usize].change_id.clone()
443 }
444
445 fn num_parents(&self, local_pos: LocalPosition) -> u32 {
446 self.graph[local_pos.0 as usize]
447 .parent_positions
448 .len()
449 .try_into()
450 .unwrap()
451 }
452
453 fn parent_positions(&self, local_pos: LocalPosition) -> SmallIndexPositionsVec {
454 self.graph[local_pos.0 as usize].parent_positions.clone()
455 }
456}
457
458pub struct DefaultMutableIndex(MutableIndexSegment);
460
461impl DefaultMutableIndex {
462 pub(crate) fn full(commit_id_length: usize, change_id_length: usize) -> Self {
463 let mutable_segment = MutableIndexSegment::full(commit_id_length, change_id_length);
464 DefaultMutableIndex(mutable_segment)
465 }
466
467 pub(super) fn incremental(parent_file: Arc<ReadonlyIndexSegment>) -> Self {
468 let mutable_segment = MutableIndexSegment::incremental(parent_file);
469 DefaultMutableIndex(mutable_segment)
470 }
471
472 #[cfg(test)]
473 pub(crate) fn add_commit_data(
474 &mut self,
475 commit_id: CommitId,
476 change_id: ChangeId,
477 parent_ids: &[CommitId],
478 ) {
479 self.0.add_commit_data(commit_id, change_id, parent_ids);
480 }
481
482 pub(super) fn squash_and_save_in(self, dir: &Path) -> io::Result<Arc<ReadonlyIndexSegment>> {
483 self.0.maybe_squash_with_ancestors().save_in(dir)
484 }
485}
486
487impl AsCompositeIndex for DefaultMutableIndex {
488 fn as_composite(&self) -> &CompositeIndex {
489 self.0.as_composite()
490 }
491}
492
493impl Index for DefaultMutableIndex {
494 fn shortest_unique_commit_id_prefix_len(&self, commit_id: &CommitId) -> usize {
495 self.as_composite()
496 .shortest_unique_commit_id_prefix_len(commit_id)
497 }
498
499 fn resolve_commit_id_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<CommitId> {
500 self.as_composite().resolve_commit_id_prefix(prefix)
501 }
502
503 fn has_id(&self, commit_id: &CommitId) -> bool {
504 self.as_composite().has_id(commit_id)
505 }
506
507 fn is_ancestor(&self, ancestor_id: &CommitId, descendant_id: &CommitId) -> bool {
508 self.as_composite().is_ancestor(ancestor_id, descendant_id)
509 }
510
511 fn common_ancestors(&self, set1: &[CommitId], set2: &[CommitId]) -> Vec<CommitId> {
512 self.as_composite().common_ancestors(set1, set2)
513 }
514
515 fn all_heads_for_gc(
516 &self,
517 ) -> Result<Box<dyn Iterator<Item = CommitId> + '_>, AllHeadsForGcUnsupported> {
518 Ok(Box::new(self.as_composite().all_heads()))
519 }
520
521 fn heads(
522 &self,
523 candidates: &mut dyn Iterator<Item = &CommitId>,
524 ) -> Result<Vec<CommitId>, IndexError> {
525 self.as_composite().heads(candidates)
526 }
527
528 fn evaluate_revset<'index>(
529 &'index self,
530 expression: &ResolvedExpression,
531 store: &Arc<Store>,
532 ) -> Result<Box<dyn Revset + 'index>, RevsetEvaluationError> {
533 self.as_composite().evaluate_revset(expression, store)
534 }
535}
536
537impl MutableIndex for DefaultMutableIndex {
538 fn as_any(&self) -> &dyn Any {
539 self
540 }
541
542 fn into_any(self: Box<Self>) -> Box<dyn Any> {
543 Box::new(*self)
544 }
545
546 fn as_index(&self) -> &dyn Index {
547 self
548 }
549
550 fn change_id_index(
551 &self,
552 heads: &mut dyn Iterator<Item = &CommitId>,
553 ) -> Box<dyn ChangeIdIndex + '_> {
554 Box::new(ChangeIdIndexImpl::new(self, heads))
555 }
556
557 fn add_commit(&mut self, commit: &Commit) {
558 self.0.add_commit(commit);
559 }
560
561 fn merge_in(&mut self, other: &dyn ReadonlyIndex) {
562 let other = other
563 .as_any()
564 .downcast_ref::<DefaultReadonlyIndex>()
565 .expect("index to merge in must be a DefaultReadonlyIndex");
566 self.0.merge_in(other.as_segment().clone());
567 }
568}
569
570fn resolve_neighbor_ids<'a, K: Ord, V>(
571 lookup_table: &'a BTreeMap<K, V>,
572 id: &K,
573) -> (Option<&'a K>, Option<&'a K>) {
574 let prev_id = lookup_table
575 .range((Bound::Unbounded, Bound::Excluded(id)))
576 .next_back()
577 .map(|(id, _)| id);
578 let next_id = lookup_table
579 .range((Bound::Excluded(id), Bound::Unbounded))
580 .next()
581 .map(|(id, _)| id);
582 (prev_id, next_id)
583}
584
585fn resolve_id_prefix<'a, K: ObjectId + Ord, V>(
586 lookup_table: &'a BTreeMap<K, V>,
587 prefix: &HexPrefix,
588 min_bytes_prefix: &K,
589) -> PrefixResolution<(&'a K, &'a V)> {
590 let mut matches = lookup_table
591 .range((Bound::Included(min_bytes_prefix), Bound::Unbounded))
592 .take_while(|&(id, _)| prefix.matches(id))
593 .fuse();
594 match (matches.next(), matches.next()) {
595 (Some(entry), None) => PrefixResolution::SingleMatch(entry),
596 (Some(_), Some(_)) => PrefixResolution::AmbiguousMatch,
597 (None, _) => PrefixResolution::NoMatch,
598 }
599}