jj_lib/default_index/
mutable.rs

1// Copyright 2023 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![allow(missing_docs)]
16
17use std::any::Any;
18use std::cmp::max;
19use std::collections::BTreeMap;
20use std::collections::HashMap;
21use std::io;
22use std::io::Write as _;
23use std::ops::Bound;
24use std::path::Path;
25use std::sync::Arc;
26
27use blake2::Blake2b512;
28use digest::Digest as _;
29use itertools::Itertools as _;
30use smallvec::smallvec;
31use smallvec::SmallVec;
32use tempfile::NamedTempFile;
33
34use super::composite::AsCompositeIndex;
35use super::composite::ChangeIdIndexImpl;
36use super::composite::CompositeIndex;
37use super::composite::DynIndexSegment;
38use super::composite::IndexSegment;
39use super::entry::IndexPosition;
40use super::entry::LocalPosition;
41use super::entry::SmallIndexPositionsVec;
42use super::entry::SmallLocalPositionsVec;
43use super::readonly::DefaultReadonlyIndex;
44use super::readonly::ReadonlyIndexSegment;
45use super::readonly::INDEX_SEGMENT_FILE_FORMAT_VERSION;
46use super::readonly::OVERFLOW_FLAG;
47use crate::backend::ChangeId;
48use crate::backend::CommitId;
49use crate::commit::Commit;
50use crate::file_util::persist_content_addressed_temp_file;
51use crate::hex_util;
52use crate::index::AllHeadsForGcUnsupported;
53use crate::index::ChangeIdIndex;
54use crate::index::Index;
55use crate::index::IndexError;
56use crate::index::MutableIndex;
57use crate::index::ReadonlyIndex;
58use crate::object_id::HexPrefix;
59use crate::object_id::ObjectId;
60use crate::object_id::PrefixResolution;
61use crate::revset::ResolvedExpression;
62use crate::revset::Revset;
63use crate::revset::RevsetEvaluationError;
64use crate::store::Store;
65
66#[derive(Debug)]
67struct MutableGraphEntry {
68    commit_id: CommitId,
69    change_id: ChangeId,
70    generation_number: u32,
71    parent_positions: SmallIndexPositionsVec,
72}
73
74pub(super) struct MutableIndexSegment {
75    parent_file: Option<Arc<ReadonlyIndexSegment>>,
76    num_parent_commits: u32,
77    commit_id_length: usize,
78    change_id_length: usize,
79    graph: Vec<MutableGraphEntry>,
80    commit_lookup: BTreeMap<CommitId, LocalPosition>,
81    change_lookup: BTreeMap<ChangeId, SmallLocalPositionsVec>,
82}
83
84impl MutableIndexSegment {
85    pub(super) fn full(commit_id_length: usize, change_id_length: usize) -> Self {
86        Self {
87            parent_file: None,
88            num_parent_commits: 0,
89            commit_id_length,
90            change_id_length,
91            graph: vec![],
92            commit_lookup: BTreeMap::new(),
93            change_lookup: BTreeMap::new(),
94        }
95    }
96
97    pub(super) fn incremental(parent_file: Arc<ReadonlyIndexSegment>) -> Self {
98        let num_parent_commits = parent_file.as_composite().num_commits();
99        let commit_id_length = parent_file.commit_id_length();
100        let change_id_length = parent_file.change_id_length();
101        Self {
102            parent_file: Some(parent_file),
103            num_parent_commits,
104            commit_id_length,
105            change_id_length,
106            graph: vec![],
107            commit_lookup: BTreeMap::new(),
108            change_lookup: BTreeMap::new(),
109        }
110    }
111
112    pub(super) fn as_composite(&self) -> &CompositeIndex {
113        CompositeIndex::new(self)
114    }
115
116    pub(super) fn add_commit(&mut self, commit: &Commit) {
117        self.add_commit_data(
118            commit.id().clone(),
119            commit.change_id().clone(),
120            commit.parent_ids(),
121        );
122    }
123
124    pub(super) fn add_commit_data(
125        &mut self,
126        commit_id: CommitId,
127        change_id: ChangeId,
128        parent_ids: &[CommitId],
129    ) {
130        if self.as_composite().has_id(&commit_id) {
131            return;
132        }
133        let mut entry = MutableGraphEntry {
134            commit_id,
135            change_id,
136            generation_number: 0,
137            parent_positions: SmallVec::new(),
138        };
139        for parent_id in parent_ids {
140            let parent_entry = self
141                .as_composite()
142                .entry_by_id(parent_id)
143                .expect("parent commit is not indexed");
144            entry.generation_number = max(
145                entry.generation_number,
146                parent_entry.generation_number() + 1,
147            );
148            entry.parent_positions.push(parent_entry.position());
149        }
150        let local_pos = LocalPosition(u32::try_from(self.graph.len()).unwrap());
151        self.commit_lookup
152            .insert(entry.commit_id.clone(), local_pos);
153        self.change_lookup
154            .entry(entry.change_id.clone())
155            // positions are inherently sorted
156            .and_modify(|positions| positions.push(local_pos))
157            .or_insert(smallvec![local_pos]);
158        self.graph.push(entry);
159    }
160
161    pub(super) fn add_commits_from(&mut self, other_segment: &DynIndexSegment) {
162        let other = CompositeIndex::new(other_segment);
163        for pos in other_segment.num_parent_commits()..other.num_commits() {
164            let entry = other.entry_by_pos(IndexPosition(pos));
165            let parent_ids = entry.parents().map(|entry| entry.commit_id()).collect_vec();
166            self.add_commit_data(entry.commit_id(), entry.change_id(), &parent_ids);
167        }
168    }
169
170    pub(super) fn merge_in(&mut self, other: Arc<ReadonlyIndexSegment>) {
171        let mut maybe_own_ancestor = self.parent_file.clone();
172        let mut maybe_other_ancestor = Some(other);
173        let mut files_to_add = vec![];
174        loop {
175            if maybe_other_ancestor.is_none() {
176                break;
177            }
178            let other_ancestor = maybe_other_ancestor.as_ref().unwrap();
179            if maybe_own_ancestor.is_none() {
180                files_to_add.push(other_ancestor.clone());
181                maybe_other_ancestor = other_ancestor.parent_file().cloned();
182                continue;
183            }
184            let own_ancestor = maybe_own_ancestor.as_ref().unwrap();
185            if own_ancestor.name() == other_ancestor.name() {
186                break;
187            }
188            if own_ancestor.as_composite().num_commits()
189                < other_ancestor.as_composite().num_commits()
190            {
191                files_to_add.push(other_ancestor.clone());
192                maybe_other_ancestor = other_ancestor.parent_file().cloned();
193            } else {
194                maybe_own_ancestor = own_ancestor.parent_file().cloned();
195            }
196        }
197
198        for file in files_to_add.iter().rev() {
199            self.add_commits_from(file.as_ref());
200        }
201    }
202
203    fn serialize_parent_filename(&self, buf: &mut Vec<u8>) {
204        if let Some(parent_file) = &self.parent_file {
205            buf.extend(
206                u32::try_from(parent_file.name().len())
207                    .unwrap()
208                    .to_le_bytes(),
209            );
210            buf.extend_from_slice(parent_file.name().as_bytes());
211        } else {
212            buf.extend(0_u32.to_le_bytes());
213        }
214    }
215
216    fn serialize_local_entries(&self, buf: &mut Vec<u8>) {
217        assert_eq!(self.graph.len(), self.commit_lookup.len());
218        debug_assert_eq!(
219            self.graph.len(),
220            self.change_lookup.values().flatten().count()
221        );
222
223        let num_commits = u32::try_from(self.graph.len()).unwrap();
224        buf.extend(num_commits.to_le_bytes());
225        let num_change_ids = u32::try_from(self.change_lookup.len()).unwrap();
226        buf.extend(num_change_ids.to_le_bytes());
227        // We'll write the actual values later
228        let parent_overflow_offset = buf.len();
229        buf.extend(0_u32.to_le_bytes());
230        let change_overflow_offset = buf.len();
231        buf.extend(0_u32.to_le_bytes());
232
233        // Positions of change ids in the sorted table
234        let change_id_pos_map: HashMap<&ChangeId, u32> = self
235            .change_lookup
236            .keys()
237            .enumerate()
238            .map(|(i, change_id)| (change_id, u32::try_from(i).unwrap()))
239            .collect();
240
241        let mut parent_overflow = vec![];
242        for entry in &self.graph {
243            buf.extend(entry.generation_number.to_le_bytes());
244
245            match entry.parent_positions.as_slice() {
246                [] => {
247                    buf.extend((!0_u32).to_le_bytes());
248                    buf.extend((!0_u32).to_le_bytes());
249                }
250                [IndexPosition(pos1)] => {
251                    assert!(*pos1 < OVERFLOW_FLAG);
252                    buf.extend(pos1.to_le_bytes());
253                    buf.extend((!0_u32).to_le_bytes());
254                }
255                [IndexPosition(pos1), IndexPosition(pos2)] => {
256                    assert!(*pos1 < OVERFLOW_FLAG);
257                    assert!(*pos2 < OVERFLOW_FLAG);
258                    buf.extend(pos1.to_le_bytes());
259                    buf.extend(pos2.to_le_bytes());
260                }
261                positions => {
262                    let overflow_pos = u32::try_from(parent_overflow.len()).unwrap();
263                    let num_parents = u32::try_from(positions.len()).unwrap();
264                    assert!(overflow_pos < OVERFLOW_FLAG);
265                    assert!(num_parents < OVERFLOW_FLAG);
266                    buf.extend((!overflow_pos).to_le_bytes());
267                    buf.extend((!num_parents).to_le_bytes());
268                    parent_overflow.extend_from_slice(positions);
269                }
270            }
271
272            buf.extend(change_id_pos_map[&entry.change_id].to_le_bytes());
273
274            assert_eq!(entry.commit_id.as_bytes().len(), self.commit_id_length);
275            buf.extend_from_slice(entry.commit_id.as_bytes());
276        }
277
278        for LocalPosition(pos) in self.commit_lookup.values() {
279            buf.extend(pos.to_le_bytes());
280        }
281
282        for change_id in self.change_lookup.keys() {
283            assert_eq!(change_id.as_bytes().len(), self.change_id_length);
284            buf.extend_from_slice(change_id.as_bytes());
285        }
286
287        let mut change_overflow = vec![];
288        for positions in self.change_lookup.values() {
289            match positions.as_slice() {
290                [] => panic!("change id lookup entry must not be empty"),
291                // Optimize for imported commits
292                [LocalPosition(pos1)] => {
293                    assert!(*pos1 < OVERFLOW_FLAG);
294                    buf.extend(pos1.to_le_bytes());
295                }
296                positions => {
297                    let overflow_pos = u32::try_from(change_overflow.len()).unwrap();
298                    assert!(overflow_pos < OVERFLOW_FLAG);
299                    buf.extend((!overflow_pos).to_le_bytes());
300                    change_overflow.extend_from_slice(positions);
301                }
302            }
303        }
304
305        let num_parent_overflow = u32::try_from(parent_overflow.len()).unwrap();
306        buf[parent_overflow_offset..][..4].copy_from_slice(&num_parent_overflow.to_le_bytes());
307        for IndexPosition(pos) in parent_overflow {
308            buf.extend(pos.to_le_bytes());
309        }
310
311        let num_change_overflow = u32::try_from(change_overflow.len()).unwrap();
312        buf[change_overflow_offset..][..4].copy_from_slice(&num_change_overflow.to_le_bytes());
313        for LocalPosition(pos) in change_overflow {
314            buf.extend(pos.to_le_bytes());
315        }
316    }
317
318    /// If the MutableIndex has more than half the commits of its parent
319    /// ReadonlyIndex, return MutableIndex with the commits from both. This
320    /// is done recursively, so the stack of index files has O(log n) files.
321    fn maybe_squash_with_ancestors(self) -> MutableIndexSegment {
322        let mut num_new_commits = self.num_local_commits();
323        let mut files_to_squash = vec![];
324        let mut base_parent_file = None;
325        for parent_file in self.as_composite().ancestor_files_without_local() {
326            // TODO: We should probably also squash if the parent file has less than N
327            // commits, regardless of how many (few) are in `self`.
328            if 2 * num_new_commits < parent_file.num_local_commits() {
329                base_parent_file = Some(parent_file.clone());
330                break;
331            }
332            num_new_commits += parent_file.num_local_commits();
333            files_to_squash.push(parent_file.clone());
334        }
335
336        if files_to_squash.is_empty() {
337            return self;
338        }
339
340        let mut squashed = if let Some(parent_file) = base_parent_file {
341            MutableIndexSegment::incremental(parent_file)
342        } else {
343            MutableIndexSegment::full(self.commit_id_length, self.change_id_length)
344        };
345        for parent_file in files_to_squash.iter().rev() {
346            squashed.add_commits_from(parent_file.as_ref());
347        }
348        squashed.add_commits_from(&self);
349        squashed
350    }
351
352    pub(super) fn save_in(self, dir: &Path) -> io::Result<Arc<ReadonlyIndexSegment>> {
353        if self.num_local_commits() == 0 && self.parent_file.is_some() {
354            return Ok(self.parent_file.unwrap());
355        }
356
357        let mut buf = Vec::new();
358        buf.extend(INDEX_SEGMENT_FILE_FORMAT_VERSION.to_le_bytes());
359        self.serialize_parent_filename(&mut buf);
360        let local_entries_offset = buf.len();
361        self.serialize_local_entries(&mut buf);
362        let mut hasher = Blake2b512::new();
363        hasher.update(&buf);
364        let index_file_id_hex = hex_util::encode_hex(&hasher.finalize());
365        let index_file_path = dir.join(&index_file_id_hex);
366
367        let mut temp_file = NamedTempFile::new_in(dir)?;
368        let file = temp_file.as_file_mut();
369        file.write_all(&buf)?;
370        persist_content_addressed_temp_file(temp_file, index_file_path)?;
371
372        Ok(ReadonlyIndexSegment::load_with_parent_file(
373            &mut &buf[local_entries_offset..],
374            index_file_id_hex,
375            self.parent_file,
376            self.commit_id_length,
377            self.change_id_length,
378        )
379        .expect("in-memory index data should be valid and readable"))
380    }
381}
382
383impl IndexSegment for MutableIndexSegment {
384    fn num_parent_commits(&self) -> u32 {
385        self.num_parent_commits
386    }
387
388    fn num_local_commits(&self) -> u32 {
389        self.graph.len().try_into().unwrap()
390    }
391
392    fn parent_file(&self) -> Option<&Arc<ReadonlyIndexSegment>> {
393        self.parent_file.as_ref()
394    }
395
396    fn name(&self) -> Option<String> {
397        None
398    }
399
400    fn commit_id_to_pos(&self, commit_id: &CommitId) -> Option<LocalPosition> {
401        self.commit_lookup.get(commit_id).copied()
402    }
403
404    fn resolve_neighbor_commit_ids(
405        &self,
406        commit_id: &CommitId,
407    ) -> (Option<CommitId>, Option<CommitId>) {
408        let (prev_id, next_id) = resolve_neighbor_ids(&self.commit_lookup, commit_id);
409        (prev_id.cloned(), next_id.cloned())
410    }
411
412    fn resolve_commit_id_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<CommitId> {
413        let min_bytes_prefix = CommitId::from_bytes(prefix.min_prefix_bytes());
414        resolve_id_prefix(&self.commit_lookup, prefix, &min_bytes_prefix).map(|(id, _)| id.clone())
415    }
416
417    fn resolve_neighbor_change_ids(
418        &self,
419        change_id: &ChangeId,
420    ) -> (Option<ChangeId>, Option<ChangeId>) {
421        let (prev_id, next_id) = resolve_neighbor_ids(&self.change_lookup, change_id);
422        (prev_id.cloned(), next_id.cloned())
423    }
424
425    fn resolve_change_id_prefix(
426        &self,
427        prefix: &HexPrefix,
428    ) -> PrefixResolution<(ChangeId, SmallLocalPositionsVec)> {
429        let min_bytes_prefix = ChangeId::from_bytes(prefix.min_prefix_bytes());
430        resolve_id_prefix(&self.change_lookup, prefix, &min_bytes_prefix)
431            .map(|(id, positions)| (id.clone(), positions.clone()))
432    }
433
434    fn generation_number(&self, local_pos: LocalPosition) -> u32 {
435        self.graph[local_pos.0 as usize].generation_number
436    }
437
438    fn commit_id(&self, local_pos: LocalPosition) -> CommitId {
439        self.graph[local_pos.0 as usize].commit_id.clone()
440    }
441
442    fn change_id(&self, local_pos: LocalPosition) -> ChangeId {
443        self.graph[local_pos.0 as usize].change_id.clone()
444    }
445
446    fn num_parents(&self, local_pos: LocalPosition) -> u32 {
447        self.graph[local_pos.0 as usize]
448            .parent_positions
449            .len()
450            .try_into()
451            .unwrap()
452    }
453
454    fn parent_positions(&self, local_pos: LocalPosition) -> SmallIndexPositionsVec {
455        self.graph[local_pos.0 as usize].parent_positions.clone()
456    }
457}
458
459/// In-memory mutable records for the on-disk commit index backend.
460pub struct DefaultMutableIndex(MutableIndexSegment);
461
462impl DefaultMutableIndex {
463    pub(crate) fn full(commit_id_length: usize, change_id_length: usize) -> Self {
464        let mutable_segment = MutableIndexSegment::full(commit_id_length, change_id_length);
465        DefaultMutableIndex(mutable_segment)
466    }
467
468    pub(super) fn incremental(parent_file: Arc<ReadonlyIndexSegment>) -> Self {
469        let mutable_segment = MutableIndexSegment::incremental(parent_file);
470        DefaultMutableIndex(mutable_segment)
471    }
472
473    #[cfg(test)]
474    pub(crate) fn add_commit_data(
475        &mut self,
476        commit_id: CommitId,
477        change_id: ChangeId,
478        parent_ids: &[CommitId],
479    ) {
480        self.0.add_commit_data(commit_id, change_id, parent_ids);
481    }
482
483    pub(super) fn squash_and_save_in(self, dir: &Path) -> io::Result<Arc<ReadonlyIndexSegment>> {
484        self.0.maybe_squash_with_ancestors().save_in(dir)
485    }
486}
487
488impl AsCompositeIndex for DefaultMutableIndex {
489    fn as_composite(&self) -> &CompositeIndex {
490        self.0.as_composite()
491    }
492}
493
494impl Index for DefaultMutableIndex {
495    fn shortest_unique_commit_id_prefix_len(&self, commit_id: &CommitId) -> usize {
496        self.as_composite()
497            .shortest_unique_commit_id_prefix_len(commit_id)
498    }
499
500    fn resolve_commit_id_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<CommitId> {
501        self.as_composite().resolve_commit_id_prefix(prefix)
502    }
503
504    fn has_id(&self, commit_id: &CommitId) -> bool {
505        self.as_composite().has_id(commit_id)
506    }
507
508    fn is_ancestor(&self, ancestor_id: &CommitId, descendant_id: &CommitId) -> bool {
509        self.as_composite().is_ancestor(ancestor_id, descendant_id)
510    }
511
512    fn common_ancestors(&self, set1: &[CommitId], set2: &[CommitId]) -> Vec<CommitId> {
513        self.as_composite().common_ancestors(set1, set2)
514    }
515
516    fn all_heads_for_gc(
517        &self,
518    ) -> Result<Box<dyn Iterator<Item = CommitId> + '_>, AllHeadsForGcUnsupported> {
519        Ok(Box::new(self.as_composite().all_heads()))
520    }
521
522    fn heads(
523        &self,
524        candidates: &mut dyn Iterator<Item = &CommitId>,
525    ) -> Result<Vec<CommitId>, IndexError> {
526        self.as_composite().heads(candidates)
527    }
528
529    fn evaluate_revset<'index>(
530        &'index self,
531        expression: &ResolvedExpression,
532        store: &Arc<Store>,
533    ) -> Result<Box<dyn Revset + 'index>, RevsetEvaluationError> {
534        self.as_composite().evaluate_revset(expression, store)
535    }
536}
537
538impl MutableIndex for DefaultMutableIndex {
539    fn as_any(&self) -> &dyn Any {
540        self
541    }
542
543    fn into_any(self: Box<Self>) -> Box<dyn Any> {
544        Box::new(*self)
545    }
546
547    fn as_index(&self) -> &dyn Index {
548        self
549    }
550
551    fn change_id_index(
552        &self,
553        heads: &mut dyn Iterator<Item = &CommitId>,
554    ) -> Box<dyn ChangeIdIndex + '_> {
555        Box::new(ChangeIdIndexImpl::new(self, heads))
556    }
557
558    fn add_commit(&mut self, commit: &Commit) {
559        self.0.add_commit(commit);
560    }
561
562    fn merge_in(&mut self, other: &dyn ReadonlyIndex) {
563        let other = other
564            .as_any()
565            .downcast_ref::<DefaultReadonlyIndex>()
566            .expect("index to merge in must be a DefaultReadonlyIndex");
567        self.0.merge_in(other.as_segment().clone());
568    }
569}
570
571fn resolve_neighbor_ids<'a, K: Ord, V>(
572    lookup_table: &'a BTreeMap<K, V>,
573    id: &K,
574) -> (Option<&'a K>, Option<&'a K>) {
575    let prev_id = lookup_table
576        .range((Bound::Unbounded, Bound::Excluded(id)))
577        .next_back()
578        .map(|(id, _)| id);
579    let next_id = lookup_table
580        .range((Bound::Excluded(id), Bound::Unbounded))
581        .next()
582        .map(|(id, _)| id);
583    (prev_id, next_id)
584}
585
586fn resolve_id_prefix<'a, K: ObjectId + Ord, V>(
587    lookup_table: &'a BTreeMap<K, V>,
588    prefix: &HexPrefix,
589    min_bytes_prefix: &K,
590) -> PrefixResolution<(&'a K, &'a V)> {
591    let mut matches = lookup_table
592        .range((Bound::Included(min_bytes_prefix), Bound::Unbounded))
593        .take_while(|&(id, _)| prefix.matches(id))
594        .fuse();
595    match (matches.next(), matches.next()) {
596        (Some(entry), None) => PrefixResolution::SingleMatch(entry),
597        (Some(_), Some(_)) => PrefixResolution::AmbiguousMatch,
598        (None, _) => PrefixResolution::NoMatch,
599    }
600}