jj_lib/default_index/
composite.rs

1// Copyright 2023 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![allow(missing_docs)]
16
17use std::cmp::max;
18use std::cmp::min;
19use std::cmp::Ordering;
20use std::collections::BTreeSet;
21use std::collections::BinaryHeap;
22use std::collections::HashSet;
23use std::iter;
24use std::sync::Arc;
25use std::sync::Mutex;
26
27use itertools::Itertools as _;
28use ref_cast::ref_cast_custom;
29use ref_cast::RefCastCustom;
30
31use super::entry::IndexEntry;
32use super::entry::IndexPosition;
33use super::entry::IndexPositionByGeneration;
34use super::entry::LocalPosition;
35use super::entry::SmallIndexPositionsVec;
36use super::entry::SmallLocalPositionsVec;
37use super::readonly::ReadonlyIndexSegment;
38use super::rev_walk::AncestorsBitSet;
39use super::revset_engine;
40use crate::backend::ChangeId;
41use crate::backend::CommitId;
42use crate::hex_util;
43use crate::index::AllHeadsForGcUnsupported;
44use crate::index::ChangeIdIndex;
45use crate::index::Index;
46use crate::index::IndexError;
47use crate::object_id::HexPrefix;
48use crate::object_id::ObjectId as _;
49use crate::object_id::PrefixResolution;
50use crate::revset::ResolvedExpression;
51use crate::revset::Revset;
52use crate::revset::RevsetEvaluationError;
53use crate::store::Store;
54
55pub(super) trait IndexSegment: Send + Sync {
56    fn num_parent_commits(&self) -> u32;
57
58    fn num_local_commits(&self) -> u32;
59
60    fn parent_file(&self) -> Option<&Arc<ReadonlyIndexSegment>>;
61
62    fn name(&self) -> Option<String>;
63
64    fn commit_id_to_pos(&self, commit_id: &CommitId) -> Option<LocalPosition>;
65
66    /// Suppose the given `commit_id` exists, returns the previous and next
67    /// commit ids in lexicographical order.
68    fn resolve_neighbor_commit_ids(
69        &self,
70        commit_id: &CommitId,
71    ) -> (Option<CommitId>, Option<CommitId>);
72
73    fn resolve_commit_id_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<CommitId>;
74
75    fn resolve_neighbor_change_ids(
76        &self,
77        change_id: &ChangeId,
78    ) -> (Option<ChangeId>, Option<ChangeId>);
79
80    fn resolve_change_id_prefix(
81        &self,
82        prefix: &HexPrefix,
83    ) -> PrefixResolution<(ChangeId, SmallLocalPositionsVec)>;
84
85    fn generation_number(&self, local_pos: LocalPosition) -> u32;
86
87    fn commit_id(&self, local_pos: LocalPosition) -> CommitId;
88
89    fn change_id(&self, local_pos: LocalPosition) -> ChangeId;
90
91    fn num_parents(&self, local_pos: LocalPosition) -> u32;
92
93    fn parent_positions(&self, local_pos: LocalPosition) -> SmallIndexPositionsVec;
94}
95
96pub(super) type DynIndexSegment = dyn IndexSegment;
97
98/// Abstraction over owned and borrowed types that can be cheaply converted to
99/// a `CompositeIndex` reference.
100pub trait AsCompositeIndex {
101    /// Returns reference wrapper that provides global access to this index.
102    fn as_composite(&self) -> &CompositeIndex;
103}
104
105impl<T: AsCompositeIndex + ?Sized> AsCompositeIndex for &T {
106    fn as_composite(&self) -> &CompositeIndex {
107        <T as AsCompositeIndex>::as_composite(self)
108    }
109}
110
111impl<T: AsCompositeIndex + ?Sized> AsCompositeIndex for &mut T {
112    fn as_composite(&self) -> &CompositeIndex {
113        <T as AsCompositeIndex>::as_composite(self)
114    }
115}
116
117/// `CompositeIndex` provides an index of both commit IDs and change IDs.
118///
119/// We refer to this as a composite index because it's a composite of multiple
120/// nested index segments where each parent segment is roughly twice as large
121/// its child. segment. This provides a good balance between read and write
122/// performance.
123// Reference wrapper that provides global access to nested index segments.
124#[derive(RefCastCustom)]
125#[repr(transparent)]
126pub struct CompositeIndex(DynIndexSegment);
127
128impl CompositeIndex {
129    #[ref_cast_custom]
130    pub(super) const fn new(segment: &DynIndexSegment) -> &Self;
131
132    /// Iterates parent and its ancestor readonly index segments.
133    pub(super) fn ancestor_files_without_local(
134        &self,
135    ) -> impl Iterator<Item = &Arc<ReadonlyIndexSegment>> {
136        let parent_file = self.0.parent_file();
137        iter::successors(parent_file, |file| file.parent_file())
138    }
139
140    /// Iterates self and its ancestor index segments.
141    pub(super) fn ancestor_index_segments(&self) -> impl Iterator<Item = &DynIndexSegment> {
142        iter::once(&self.0).chain(
143            self.ancestor_files_without_local()
144                .map(|file| file.as_ref() as &DynIndexSegment),
145        )
146    }
147
148    pub fn num_commits(&self) -> u32 {
149        self.0.num_parent_commits() + self.0.num_local_commits()
150    }
151
152    pub fn stats(&self) -> IndexStats {
153        let num_commits = self.num_commits();
154        let mut num_merges = 0;
155        let mut max_generation_number = 0;
156        let mut change_ids = HashSet::new();
157        for pos in 0..num_commits {
158            let entry = self.entry_by_pos(IndexPosition(pos));
159            max_generation_number = max(max_generation_number, entry.generation_number());
160            if entry.num_parents() > 1 {
161                num_merges += 1;
162            }
163            change_ids.insert(entry.change_id());
164        }
165        let num_heads = u32::try_from(self.all_heads_pos().count()).unwrap();
166
167        let mut levels = self
168            .ancestor_index_segments()
169            .map(|segment| IndexLevelStats {
170                num_commits: segment.num_local_commits(),
171                name: segment.name(),
172            })
173            .collect_vec();
174        levels.reverse();
175
176        IndexStats {
177            num_commits,
178            num_merges,
179            max_generation_number,
180            num_heads,
181            num_changes: change_ids.len().try_into().unwrap(),
182            levels,
183        }
184    }
185
186    pub fn entry_by_pos(&self, pos: IndexPosition) -> IndexEntry<'_> {
187        self.ancestor_index_segments()
188            .find_map(|segment| {
189                u32::checked_sub(pos.0, segment.num_parent_commits())
190                    .map(|local_pos| IndexEntry::new(segment, pos, LocalPosition(local_pos)))
191            })
192            .unwrap()
193    }
194
195    pub fn entry_by_id(&self, commit_id: &CommitId) -> Option<IndexEntry<'_>> {
196        self.ancestor_index_segments().find_map(|segment| {
197            let local_pos = segment.commit_id_to_pos(commit_id)?;
198            let pos = IndexPosition(local_pos.0 + segment.num_parent_commits());
199            Some(IndexEntry::new(segment, pos, local_pos))
200        })
201    }
202
203    pub fn commit_id_to_pos(&self, commit_id: &CommitId) -> Option<IndexPosition> {
204        self.ancestor_index_segments().find_map(|segment| {
205            let LocalPosition(local_pos) = segment.commit_id_to_pos(commit_id)?;
206            Some(IndexPosition(local_pos + segment.num_parent_commits()))
207        })
208    }
209
210    /// Suppose the given `commit_id` exists, returns the previous and next
211    /// commit ids in lexicographical order.
212    pub(super) fn resolve_neighbor_commit_ids(
213        &self,
214        commit_id: &CommitId,
215    ) -> (Option<CommitId>, Option<CommitId>) {
216        self.ancestor_index_segments()
217            .map(|segment| segment.resolve_neighbor_commit_ids(commit_id))
218            .reduce(|(acc_prev_id, acc_next_id), (prev_id, next_id)| {
219                (
220                    acc_prev_id.into_iter().chain(prev_id).max(),
221                    acc_next_id.into_iter().chain(next_id).min(),
222                )
223            })
224            .unwrap()
225    }
226
227    /// Suppose the given `change_id` exists, returns the minimum prefix length
228    /// to disambiguate it within all the indexed ids including hidden ones.
229    pub(super) fn shortest_unique_change_id_prefix_len(&self, change_id: &ChangeId) -> usize {
230        let (prev_id, next_id) = self.resolve_neighbor_change_ids(change_id);
231        itertools::chain(prev_id, next_id)
232            .map(|id| hex_util::common_hex_len(change_id.as_bytes(), id.as_bytes()) + 1)
233            .max()
234            .unwrap_or(0)
235    }
236
237    /// Suppose the given `change_id` exists, returns the previous and next
238    /// change ids in lexicographical order. The returned change ids may be
239    /// hidden.
240    pub(super) fn resolve_neighbor_change_ids(
241        &self,
242        change_id: &ChangeId,
243    ) -> (Option<ChangeId>, Option<ChangeId>) {
244        self.ancestor_index_segments()
245            .map(|segment| segment.resolve_neighbor_change_ids(change_id))
246            .reduce(|(acc_prev_id, acc_next_id), (prev_id, next_id)| {
247                (
248                    acc_prev_id.into_iter().chain(prev_id).max(),
249                    acc_next_id.into_iter().chain(next_id).min(),
250                )
251            })
252            .unwrap()
253    }
254
255    /// Resolves the given change id `prefix` to the associated entries. The
256    /// returned entries may be hidden.
257    ///
258    /// The returned index positions are sorted in ascending order.
259    pub(super) fn resolve_change_id_prefix(
260        &self,
261        prefix: &HexPrefix,
262    ) -> PrefixResolution<(ChangeId, SmallIndexPositionsVec)> {
263        use PrefixResolution::*;
264        self.ancestor_index_segments()
265            .fold(NoMatch, |acc_match, segment| {
266                if acc_match == AmbiguousMatch {
267                    return acc_match; // avoid checking the parent file(s)
268                }
269                let to_global_pos = {
270                    let num_parent_commits = segment.num_parent_commits();
271                    move |LocalPosition(pos)| IndexPosition(pos + num_parent_commits)
272                };
273                // Similar to PrefixResolution::plus(), but merges matches of the same id.
274                match (acc_match, segment.resolve_change_id_prefix(prefix)) {
275                    (NoMatch, local_match) => local_match.map(|(id, positions)| {
276                        (id, positions.into_iter().map(to_global_pos).collect())
277                    }),
278                    (acc_match, NoMatch) => acc_match,
279                    (AmbiguousMatch, _) => AmbiguousMatch,
280                    (_, AmbiguousMatch) => AmbiguousMatch,
281                    (SingleMatch((id1, _)), SingleMatch((id2, _))) if id1 != id2 => AmbiguousMatch,
282                    (SingleMatch((id, mut acc_positions)), SingleMatch((_, local_positions))) => {
283                        acc_positions
284                            .insert_many(0, local_positions.into_iter().map(to_global_pos));
285                        SingleMatch((id, acc_positions))
286                    }
287                }
288            })
289    }
290
291    pub(super) fn is_ancestor_pos(
292        &self,
293        ancestor_pos: IndexPosition,
294        descendant_pos: IndexPosition,
295    ) -> bool {
296        let ancestor_generation = self.entry_by_pos(ancestor_pos).generation_number();
297        let mut work = vec![descendant_pos];
298        let mut visited = HashSet::new();
299        while let Some(descendant_pos) = work.pop() {
300            let descendant_entry = self.entry_by_pos(descendant_pos);
301            if descendant_pos == ancestor_pos {
302                return true;
303            }
304            if !visited.insert(descendant_entry.position()) {
305                continue;
306            }
307            if descendant_entry.generation_number() <= ancestor_generation {
308                continue;
309            }
310            work.extend(descendant_entry.parent_positions());
311        }
312        false
313    }
314
315    pub(super) fn common_ancestors_pos(
316        &self,
317        set1: &[IndexPosition],
318        set2: &[IndexPosition],
319    ) -> BTreeSet<IndexPosition> {
320        let mut items1: BinaryHeap<_> = set1
321            .iter()
322            .map(|pos| IndexPositionByGeneration::from(&self.entry_by_pos(*pos)))
323            .collect();
324        let mut items2: BinaryHeap<_> = set2
325            .iter()
326            .map(|pos| IndexPositionByGeneration::from(&self.entry_by_pos(*pos)))
327            .collect();
328
329        let mut result = BTreeSet::new();
330        while let (Some(item1), Some(item2)) = (items1.peek(), items2.peek()) {
331            match item1.cmp(item2) {
332                Ordering::Greater => {
333                    let item1 = dedup_pop(&mut items1).unwrap();
334                    let entry1 = self.entry_by_pos(item1.pos);
335                    for parent_entry in entry1.parents() {
336                        assert!(parent_entry.position() < entry1.position());
337                        items1.push(IndexPositionByGeneration::from(&parent_entry));
338                    }
339                }
340                Ordering::Less => {
341                    let item2 = dedup_pop(&mut items2).unwrap();
342                    let entry2 = self.entry_by_pos(item2.pos);
343                    for parent_entry in entry2.parents() {
344                        assert!(parent_entry.position() < entry2.position());
345                        items2.push(IndexPositionByGeneration::from(&parent_entry));
346                    }
347                }
348                Ordering::Equal => {
349                    result.insert(item1.pos);
350                    dedup_pop(&mut items1).unwrap();
351                    dedup_pop(&mut items2).unwrap();
352                }
353            }
354        }
355        self.heads_pos(result)
356    }
357
358    pub(super) fn all_heads(&self) -> impl Iterator<Item = CommitId> + use<'_> {
359        self.all_heads_pos()
360            .map(move |pos| self.entry_by_pos(pos).commit_id())
361    }
362
363    pub(super) fn all_heads_pos(&self) -> impl Iterator<Item = IndexPosition> + use<> {
364        // TODO: can be optimized to use bit vec and leading/trailing_ones()
365        let num_commits = self.num_commits();
366        let mut not_head: Vec<bool> = vec![false; num_commits as usize];
367        for pos in 0..num_commits {
368            let entry = self.entry_by_pos(IndexPosition(pos));
369            for IndexPosition(parent_pos) in entry.parent_positions() {
370                not_head[parent_pos as usize] = true;
371            }
372        }
373        not_head
374            .into_iter()
375            .enumerate()
376            .filter(|&(_, b)| !b)
377            .map(|(i, _)| IndexPosition(u32::try_from(i).unwrap()))
378    }
379
380    /// Returns the subset of positions in `candidate_positions` which refer to
381    /// entries that are heads in the repository.
382    pub fn heads_pos(
383        &self,
384        mut candidate_positions: BTreeSet<IndexPosition>,
385    ) -> BTreeSet<IndexPosition> {
386        // Add all parents of the candidates to the work queue. The parents and their
387        // ancestors are not heads.
388        // Also find the smallest generation number among the candidates.
389        let mut work = BinaryHeap::new();
390        let mut min_generation = u32::MAX;
391        for pos in &candidate_positions {
392            let entry = self.entry_by_pos(*pos);
393            min_generation = min(min_generation, entry.generation_number());
394            for parent_entry in entry.parents() {
395                work.push(IndexPositionByGeneration::from(&parent_entry));
396            }
397        }
398
399        // Walk ancestors of the parents of the candidates. Remove visited commits from
400        // set of candidates. Stop walking when we have gone past the minimum
401        // candidate generation.
402        while let Some(item) = dedup_pop(&mut work) {
403            if item.generation < min_generation {
404                break;
405            }
406            candidate_positions.remove(&item.pos);
407            let entry = self.entry_by_pos(item.pos);
408            for parent_entry in entry.parents() {
409                assert!(parent_entry.position() < entry.position());
410                work.push(IndexPositionByGeneration::from(&parent_entry));
411            }
412        }
413        candidate_positions
414    }
415
416    pub(super) fn evaluate_revset(
417        &self,
418        expression: &ResolvedExpression,
419        store: &Arc<Store>,
420    ) -> Result<Box<dyn Revset + '_>, RevsetEvaluationError> {
421        let revset_impl = revset_engine::evaluate(expression, store, self)?;
422        Ok(Box::new(revset_impl))
423    }
424}
425
426impl AsCompositeIndex for CompositeIndex {
427    fn as_composite(&self) -> &CompositeIndex {
428        self
429    }
430}
431
432// In revset engine, we need to convert &CompositeIndex to &dyn Index.
433impl Index for &CompositeIndex {
434    /// Suppose the given `commit_id` exists, returns the minimum prefix length
435    /// to disambiguate it. The length to be returned is a number of hexadecimal
436    /// digits.
437    ///
438    /// If the given `commit_id` doesn't exist, this will return the prefix
439    /// length that never matches with any commit ids.
440    fn shortest_unique_commit_id_prefix_len(&self, commit_id: &CommitId) -> usize {
441        let (prev_id, next_id) = self.resolve_neighbor_commit_ids(commit_id);
442        itertools::chain(prev_id, next_id)
443            .map(|id| hex_util::common_hex_len(commit_id.as_bytes(), id.as_bytes()) + 1)
444            .max()
445            .unwrap_or(0)
446    }
447
448    fn resolve_commit_id_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<CommitId> {
449        self.ancestor_index_segments()
450            .fold(PrefixResolution::NoMatch, |acc_match, segment| {
451                if acc_match == PrefixResolution::AmbiguousMatch {
452                    acc_match // avoid checking the parent file(s)
453                } else {
454                    let local_match = segment.resolve_commit_id_prefix(prefix);
455                    acc_match.plus(&local_match)
456                }
457            })
458    }
459
460    fn has_id(&self, commit_id: &CommitId) -> bool {
461        self.commit_id_to_pos(commit_id).is_some()
462    }
463
464    fn is_ancestor(&self, ancestor_id: &CommitId, descendant_id: &CommitId) -> bool {
465        let ancestor_pos = self.commit_id_to_pos(ancestor_id).unwrap();
466        let descendant_pos = self.commit_id_to_pos(descendant_id).unwrap();
467        self.is_ancestor_pos(ancestor_pos, descendant_pos)
468    }
469
470    fn common_ancestors(&self, set1: &[CommitId], set2: &[CommitId]) -> Vec<CommitId> {
471        let pos1 = set1
472            .iter()
473            .map(|id| self.commit_id_to_pos(id).unwrap())
474            .collect_vec();
475        let pos2 = set2
476            .iter()
477            .map(|id| self.commit_id_to_pos(id).unwrap())
478            .collect_vec();
479        self.common_ancestors_pos(&pos1, &pos2)
480            .iter()
481            .map(|pos| self.entry_by_pos(*pos).commit_id())
482            .collect()
483    }
484
485    fn all_heads_for_gc(
486        &self,
487    ) -> Result<Box<dyn Iterator<Item = CommitId> + '_>, AllHeadsForGcUnsupported> {
488        Ok(Box::new(self.all_heads()))
489    }
490
491    fn heads(
492        &self,
493        candidate_ids: &mut dyn Iterator<Item = &CommitId>,
494    ) -> Result<Vec<CommitId>, IndexError> {
495        let candidate_positions: BTreeSet<_> = candidate_ids
496            .map(|id| self.commit_id_to_pos(id).unwrap())
497            .collect();
498
499        Ok(self
500            .heads_pos(candidate_positions)
501            .iter()
502            .map(|pos| self.entry_by_pos(*pos).commit_id())
503            .collect())
504    }
505
506    fn evaluate_revset<'index>(
507        &'index self,
508        expression: &ResolvedExpression,
509        store: &Arc<Store>,
510    ) -> Result<Box<dyn Revset + 'index>, RevsetEvaluationError> {
511        CompositeIndex::evaluate_revset(self, expression, store)
512    }
513}
514
515pub(super) struct ChangeIdIndexImpl<I> {
516    index: I,
517    reachable_set: Mutex<AncestorsBitSet>,
518}
519
520impl<I: AsCompositeIndex> ChangeIdIndexImpl<I> {
521    pub fn new(index: I, heads: &mut dyn Iterator<Item = &CommitId>) -> ChangeIdIndexImpl<I> {
522        let composite = index.as_composite();
523        let mut reachable_set = AncestorsBitSet::with_capacity(composite.num_commits());
524        for id in heads {
525            reachable_set.add_head(composite.commit_id_to_pos(id).unwrap());
526        }
527        ChangeIdIndexImpl {
528            index,
529            reachable_set: Mutex::new(reachable_set),
530        }
531    }
532}
533
534impl<I: AsCompositeIndex + Send + Sync> ChangeIdIndex for ChangeIdIndexImpl<I> {
535    // Resolves change id prefix among all ids, then filters out hidden
536    // entries.
537    //
538    // If `SingleMatch` is returned, the commits including in the set are all
539    // visible. `AmbiguousMatch` may be returned even if the prefix is unique
540    // within the visible entries.
541    fn resolve_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<Vec<CommitId>> {
542        let index = self.index.as_composite();
543        match index.resolve_change_id_prefix(prefix) {
544            PrefixResolution::NoMatch => PrefixResolution::NoMatch,
545            PrefixResolution::SingleMatch((_change_id, positions)) => {
546                debug_assert!(positions.iter().tuple_windows().all(|(a, b)| a < b));
547                let mut reachable_set = self.reachable_set.lock().unwrap();
548                reachable_set.visit_until(index, *positions.first().unwrap());
549                let reachable_commit_ids = positions
550                    .iter()
551                    .filter(|&&pos| reachable_set.contains(pos))
552                    .map(|&pos| index.entry_by_pos(pos).commit_id())
553                    .collect_vec();
554                if reachable_commit_ids.is_empty() {
555                    PrefixResolution::NoMatch
556                } else {
557                    PrefixResolution::SingleMatch(reachable_commit_ids)
558                }
559            }
560            PrefixResolution::AmbiguousMatch => PrefixResolution::AmbiguousMatch,
561        }
562    }
563
564    // Calculates the shortest prefix length of the given `change_id` among all
565    // IDs, including hidden entries.
566    //
567    // The returned length is usually a few digits longer than the minimum
568    // length necessary to disambiguate within the visible entries since hidden
569    // entries are also considered when determining the prefix length.
570    fn shortest_unique_prefix_len(&self, change_id: &ChangeId) -> usize {
571        self.index
572            .as_composite()
573            .shortest_unique_change_id_prefix_len(change_id)
574    }
575}
576
577pub struct IndexLevelStats {
578    pub num_commits: u32,
579    pub name: Option<String>,
580}
581
582pub struct IndexStats {
583    pub num_commits: u32,
584    pub num_merges: u32,
585    pub max_generation_number: u32,
586    pub num_heads: u32,
587    pub num_changes: u32,
588    pub levels: Vec<IndexLevelStats>,
589}
590
591/// Removes the greatest items (including duplicates) from the heap, returns
592/// one.
593fn dedup_pop<T: Ord>(heap: &mut BinaryHeap<T>) -> Option<T> {
594    let item = heap.pop()?;
595    while heap.peek() == Some(&item) {
596        heap.pop().unwrap();
597    }
598    Some(item)
599}