jj_lib/default_index/
mutable.rs

1// Copyright 2023 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![allow(missing_docs)]
16
17use std::any::Any;
18use std::cmp::max;
19use std::collections::BTreeMap;
20use std::collections::HashMap;
21use std::io;
22use std::io::Write as _;
23use std::ops::Bound;
24use std::path::Path;
25use std::sync::Arc;
26
27use blake2::Blake2b512;
28use digest::Digest as _;
29use itertools::Itertools as _;
30use smallvec::smallvec;
31use smallvec::SmallVec;
32use tempfile::NamedTempFile;
33
34use super::composite::AsCompositeIndex;
35use super::composite::ChangeIdIndexImpl;
36use super::composite::CompositeIndex;
37use super::composite::DynIndexSegment;
38use super::composite::IndexSegment;
39use super::entry::IndexPosition;
40use super::entry::LocalPosition;
41use super::entry::SmallIndexPositionsVec;
42use super::entry::SmallLocalPositionsVec;
43use super::readonly::DefaultReadonlyIndex;
44use super::readonly::ReadonlyIndexSegment;
45use super::readonly::INDEX_SEGMENT_FILE_FORMAT_VERSION;
46use super::readonly::OVERFLOW_FLAG;
47use crate::backend::ChangeId;
48use crate::backend::CommitId;
49use crate::commit::Commit;
50use crate::file_util::persist_content_addressed_temp_file;
51use crate::index::AllHeadsForGcUnsupported;
52use crate::index::ChangeIdIndex;
53use crate::index::Index;
54use crate::index::IndexError;
55use crate::index::MutableIndex;
56use crate::index::ReadonlyIndex;
57use crate::object_id::HexPrefix;
58use crate::object_id::ObjectId;
59use crate::object_id::PrefixResolution;
60use crate::revset::ResolvedExpression;
61use crate::revset::Revset;
62use crate::revset::RevsetEvaluationError;
63use crate::store::Store;
64
65#[derive(Debug)]
66struct MutableGraphEntry {
67    commit_id: CommitId,
68    change_id: ChangeId,
69    generation_number: u32,
70    parent_positions: SmallIndexPositionsVec,
71}
72
73pub(super) struct MutableIndexSegment {
74    parent_file: Option<Arc<ReadonlyIndexSegment>>,
75    num_parent_commits: u32,
76    commit_id_length: usize,
77    change_id_length: usize,
78    graph: Vec<MutableGraphEntry>,
79    commit_lookup: BTreeMap<CommitId, LocalPosition>,
80    change_lookup: BTreeMap<ChangeId, SmallLocalPositionsVec>,
81}
82
83impl MutableIndexSegment {
84    pub(super) fn full(commit_id_length: usize, change_id_length: usize) -> Self {
85        Self {
86            parent_file: None,
87            num_parent_commits: 0,
88            commit_id_length,
89            change_id_length,
90            graph: vec![],
91            commit_lookup: BTreeMap::new(),
92            change_lookup: BTreeMap::new(),
93        }
94    }
95
96    pub(super) fn incremental(parent_file: Arc<ReadonlyIndexSegment>) -> Self {
97        let num_parent_commits = parent_file.as_composite().num_commits();
98        let commit_id_length = parent_file.commit_id_length();
99        let change_id_length = parent_file.change_id_length();
100        Self {
101            parent_file: Some(parent_file),
102            num_parent_commits,
103            commit_id_length,
104            change_id_length,
105            graph: vec![],
106            commit_lookup: BTreeMap::new(),
107            change_lookup: BTreeMap::new(),
108        }
109    }
110
111    pub(super) fn as_composite(&self) -> &CompositeIndex {
112        CompositeIndex::new(self)
113    }
114
115    pub(super) fn add_commit(&mut self, commit: &Commit) {
116        self.add_commit_data(
117            commit.id().clone(),
118            commit.change_id().clone(),
119            commit.parent_ids(),
120        );
121    }
122
123    pub(super) fn add_commit_data(
124        &mut self,
125        commit_id: CommitId,
126        change_id: ChangeId,
127        parent_ids: &[CommitId],
128    ) {
129        if self.as_composite().has_id(&commit_id) {
130            return;
131        }
132        let mut entry = MutableGraphEntry {
133            commit_id,
134            change_id,
135            generation_number: 0,
136            parent_positions: SmallVec::new(),
137        };
138        for parent_id in parent_ids {
139            let parent_entry = self
140                .as_composite()
141                .entry_by_id(parent_id)
142                .expect("parent commit is not indexed");
143            entry.generation_number = max(
144                entry.generation_number,
145                parent_entry.generation_number() + 1,
146            );
147            entry.parent_positions.push(parent_entry.position());
148        }
149        let local_pos = LocalPosition(u32::try_from(self.graph.len()).unwrap());
150        self.commit_lookup
151            .insert(entry.commit_id.clone(), local_pos);
152        self.change_lookup
153            .entry(entry.change_id.clone())
154            // positions are inherently sorted
155            .and_modify(|positions| positions.push(local_pos))
156            .or_insert(smallvec![local_pos]);
157        self.graph.push(entry);
158    }
159
160    pub(super) fn add_commits_from(&mut self, other_segment: &DynIndexSegment) {
161        let other = CompositeIndex::new(other_segment);
162        for pos in other_segment.num_parent_commits()..other.num_commits() {
163            let entry = other.entry_by_pos(IndexPosition(pos));
164            let parent_ids = entry.parents().map(|entry| entry.commit_id()).collect_vec();
165            self.add_commit_data(entry.commit_id(), entry.change_id(), &parent_ids);
166        }
167    }
168
169    pub(super) fn merge_in(&mut self, other: Arc<ReadonlyIndexSegment>) {
170        let mut maybe_own_ancestor = self.parent_file.clone();
171        let mut maybe_other_ancestor = Some(other);
172        let mut files_to_add = vec![];
173        loop {
174            if maybe_other_ancestor.is_none() {
175                break;
176            }
177            let other_ancestor = maybe_other_ancestor.as_ref().unwrap();
178            if maybe_own_ancestor.is_none() {
179                files_to_add.push(other_ancestor.clone());
180                maybe_other_ancestor = other_ancestor.parent_file().cloned();
181                continue;
182            }
183            let own_ancestor = maybe_own_ancestor.as_ref().unwrap();
184            if own_ancestor.name() == other_ancestor.name() {
185                break;
186            }
187            if own_ancestor.as_composite().num_commits()
188                < other_ancestor.as_composite().num_commits()
189            {
190                files_to_add.push(other_ancestor.clone());
191                maybe_other_ancestor = other_ancestor.parent_file().cloned();
192            } else {
193                maybe_own_ancestor = own_ancestor.parent_file().cloned();
194            }
195        }
196
197        for file in files_to_add.iter().rev() {
198            self.add_commits_from(file.as_ref());
199        }
200    }
201
202    fn serialize_parent_filename(&self, buf: &mut Vec<u8>) {
203        if let Some(parent_file) = &self.parent_file {
204            buf.extend(
205                u32::try_from(parent_file.name().len())
206                    .unwrap()
207                    .to_le_bytes(),
208            );
209            buf.extend_from_slice(parent_file.name().as_bytes());
210        } else {
211            buf.extend(0_u32.to_le_bytes());
212        }
213    }
214
215    fn serialize_local_entries(&self, buf: &mut Vec<u8>) {
216        assert_eq!(self.graph.len(), self.commit_lookup.len());
217        debug_assert_eq!(
218            self.graph.len(),
219            self.change_lookup.values().flatten().count()
220        );
221
222        let num_commits = u32::try_from(self.graph.len()).unwrap();
223        buf.extend(num_commits.to_le_bytes());
224        let num_change_ids = u32::try_from(self.change_lookup.len()).unwrap();
225        buf.extend(num_change_ids.to_le_bytes());
226        // We'll write the actual values later
227        let parent_overflow_offset = buf.len();
228        buf.extend(0_u32.to_le_bytes());
229        let change_overflow_offset = buf.len();
230        buf.extend(0_u32.to_le_bytes());
231
232        // Positions of change ids in the sorted table
233        let change_id_pos_map: HashMap<&ChangeId, u32> = self
234            .change_lookup
235            .keys()
236            .enumerate()
237            .map(|(i, change_id)| (change_id, u32::try_from(i).unwrap()))
238            .collect();
239
240        let mut parent_overflow = vec![];
241        for entry in &self.graph {
242            buf.extend(entry.generation_number.to_le_bytes());
243
244            match entry.parent_positions.as_slice() {
245                [] => {
246                    buf.extend((!0_u32).to_le_bytes());
247                    buf.extend((!0_u32).to_le_bytes());
248                }
249                [IndexPosition(pos1)] => {
250                    assert!(*pos1 < OVERFLOW_FLAG);
251                    buf.extend(pos1.to_le_bytes());
252                    buf.extend((!0_u32).to_le_bytes());
253                }
254                [IndexPosition(pos1), IndexPosition(pos2)] => {
255                    assert!(*pos1 < OVERFLOW_FLAG);
256                    assert!(*pos2 < OVERFLOW_FLAG);
257                    buf.extend(pos1.to_le_bytes());
258                    buf.extend(pos2.to_le_bytes());
259                }
260                positions => {
261                    let overflow_pos = u32::try_from(parent_overflow.len()).unwrap();
262                    let num_parents = u32::try_from(positions.len()).unwrap();
263                    assert!(overflow_pos < OVERFLOW_FLAG);
264                    assert!(num_parents < OVERFLOW_FLAG);
265                    buf.extend((!overflow_pos).to_le_bytes());
266                    buf.extend((!num_parents).to_le_bytes());
267                    parent_overflow.extend_from_slice(positions);
268                }
269            }
270
271            buf.extend(change_id_pos_map[&entry.change_id].to_le_bytes());
272
273            assert_eq!(entry.commit_id.as_bytes().len(), self.commit_id_length);
274            buf.extend_from_slice(entry.commit_id.as_bytes());
275        }
276
277        for LocalPosition(pos) in self.commit_lookup.values() {
278            buf.extend(pos.to_le_bytes());
279        }
280
281        for change_id in self.change_lookup.keys() {
282            assert_eq!(change_id.as_bytes().len(), self.change_id_length);
283            buf.extend_from_slice(change_id.as_bytes());
284        }
285
286        let mut change_overflow = vec![];
287        for positions in self.change_lookup.values() {
288            match positions.as_slice() {
289                [] => panic!("change id lookup entry must not be empty"),
290                // Optimize for imported commits
291                [LocalPosition(pos1)] => {
292                    assert!(*pos1 < OVERFLOW_FLAG);
293                    buf.extend(pos1.to_le_bytes());
294                }
295                positions => {
296                    let overflow_pos = u32::try_from(change_overflow.len()).unwrap();
297                    assert!(overflow_pos < OVERFLOW_FLAG);
298                    buf.extend((!overflow_pos).to_le_bytes());
299                    change_overflow.extend_from_slice(positions);
300                }
301            }
302        }
303
304        let num_parent_overflow = u32::try_from(parent_overflow.len()).unwrap();
305        buf[parent_overflow_offset..][..4].copy_from_slice(&num_parent_overflow.to_le_bytes());
306        for IndexPosition(pos) in parent_overflow {
307            buf.extend(pos.to_le_bytes());
308        }
309
310        let num_change_overflow = u32::try_from(change_overflow.len()).unwrap();
311        buf[change_overflow_offset..][..4].copy_from_slice(&num_change_overflow.to_le_bytes());
312        for LocalPosition(pos) in change_overflow {
313            buf.extend(pos.to_le_bytes());
314        }
315    }
316
317    /// If the MutableIndex has more than half the commits of its parent
318    /// ReadonlyIndex, return MutableIndex with the commits from both. This
319    /// is done recursively, so the stack of index files has O(log n) files.
320    fn maybe_squash_with_ancestors(self) -> MutableIndexSegment {
321        let mut num_new_commits = self.num_local_commits();
322        let mut files_to_squash = vec![];
323        let mut base_parent_file = None;
324        for parent_file in self.as_composite().ancestor_files_without_local() {
325            // TODO: We should probably also squash if the parent file has less than N
326            // commits, regardless of how many (few) are in `self`.
327            if 2 * num_new_commits < parent_file.num_local_commits() {
328                base_parent_file = Some(parent_file.clone());
329                break;
330            }
331            num_new_commits += parent_file.num_local_commits();
332            files_to_squash.push(parent_file.clone());
333        }
334
335        if files_to_squash.is_empty() {
336            return self;
337        }
338
339        let mut squashed = if let Some(parent_file) = base_parent_file {
340            MutableIndexSegment::incremental(parent_file)
341        } else {
342            MutableIndexSegment::full(self.commit_id_length, self.change_id_length)
343        };
344        for parent_file in files_to_squash.iter().rev() {
345            squashed.add_commits_from(parent_file.as_ref());
346        }
347        squashed.add_commits_from(&self);
348        squashed
349    }
350
351    pub(super) fn save_in(self, dir: &Path) -> io::Result<Arc<ReadonlyIndexSegment>> {
352        if self.num_local_commits() == 0 && self.parent_file.is_some() {
353            return Ok(self.parent_file.unwrap());
354        }
355
356        let mut buf = Vec::new();
357        buf.extend(INDEX_SEGMENT_FILE_FORMAT_VERSION.to_le_bytes());
358        self.serialize_parent_filename(&mut buf);
359        let local_entries_offset = buf.len();
360        self.serialize_local_entries(&mut buf);
361        let mut hasher = Blake2b512::new();
362        hasher.update(&buf);
363        let index_file_id_hex = hex::encode(hasher.finalize());
364        let index_file_path = dir.join(&index_file_id_hex);
365
366        let mut temp_file = NamedTempFile::new_in(dir)?;
367        let file = temp_file.as_file_mut();
368        file.write_all(&buf)?;
369        persist_content_addressed_temp_file(temp_file, index_file_path)?;
370
371        Ok(ReadonlyIndexSegment::load_with_parent_file(
372            &mut &buf[local_entries_offset..],
373            index_file_id_hex,
374            self.parent_file,
375            self.commit_id_length,
376            self.change_id_length,
377        )
378        .expect("in-memory index data should be valid and readable"))
379    }
380}
381
382impl IndexSegment for MutableIndexSegment {
383    fn num_parent_commits(&self) -> u32 {
384        self.num_parent_commits
385    }
386
387    fn num_local_commits(&self) -> u32 {
388        self.graph.len().try_into().unwrap()
389    }
390
391    fn parent_file(&self) -> Option<&Arc<ReadonlyIndexSegment>> {
392        self.parent_file.as_ref()
393    }
394
395    fn name(&self) -> Option<String> {
396        None
397    }
398
399    fn commit_id_to_pos(&self, commit_id: &CommitId) -> Option<LocalPosition> {
400        self.commit_lookup.get(commit_id).copied()
401    }
402
403    fn resolve_neighbor_commit_ids(
404        &self,
405        commit_id: &CommitId,
406    ) -> (Option<CommitId>, Option<CommitId>) {
407        let (prev_id, next_id) = resolve_neighbor_ids(&self.commit_lookup, commit_id);
408        (prev_id.cloned(), next_id.cloned())
409    }
410
411    fn resolve_commit_id_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<CommitId> {
412        let min_bytes_prefix = CommitId::from_bytes(prefix.min_prefix_bytes());
413        resolve_id_prefix(&self.commit_lookup, prefix, &min_bytes_prefix).map(|(id, _)| id.clone())
414    }
415
416    fn resolve_neighbor_change_ids(
417        &self,
418        change_id: &ChangeId,
419    ) -> (Option<ChangeId>, Option<ChangeId>) {
420        let (prev_id, next_id) = resolve_neighbor_ids(&self.change_lookup, change_id);
421        (prev_id.cloned(), next_id.cloned())
422    }
423
424    fn resolve_change_id_prefix(
425        &self,
426        prefix: &HexPrefix,
427    ) -> PrefixResolution<(ChangeId, SmallLocalPositionsVec)> {
428        let min_bytes_prefix = ChangeId::from_bytes(prefix.min_prefix_bytes());
429        resolve_id_prefix(&self.change_lookup, prefix, &min_bytes_prefix)
430            .map(|(id, positions)| (id.clone(), positions.clone()))
431    }
432
433    fn generation_number(&self, local_pos: LocalPosition) -> u32 {
434        self.graph[local_pos.0 as usize].generation_number
435    }
436
437    fn commit_id(&self, local_pos: LocalPosition) -> CommitId {
438        self.graph[local_pos.0 as usize].commit_id.clone()
439    }
440
441    fn change_id(&self, local_pos: LocalPosition) -> ChangeId {
442        self.graph[local_pos.0 as usize].change_id.clone()
443    }
444
445    fn num_parents(&self, local_pos: LocalPosition) -> u32 {
446        self.graph[local_pos.0 as usize]
447            .parent_positions
448            .len()
449            .try_into()
450            .unwrap()
451    }
452
453    fn parent_positions(&self, local_pos: LocalPosition) -> SmallIndexPositionsVec {
454        self.graph[local_pos.0 as usize].parent_positions.clone()
455    }
456}
457
458/// In-memory mutable records for the on-disk commit index backend.
459pub struct DefaultMutableIndex(MutableIndexSegment);
460
461impl DefaultMutableIndex {
462    pub(crate) fn full(commit_id_length: usize, change_id_length: usize) -> Self {
463        let mutable_segment = MutableIndexSegment::full(commit_id_length, change_id_length);
464        DefaultMutableIndex(mutable_segment)
465    }
466
467    pub(super) fn incremental(parent_file: Arc<ReadonlyIndexSegment>) -> Self {
468        let mutable_segment = MutableIndexSegment::incremental(parent_file);
469        DefaultMutableIndex(mutable_segment)
470    }
471
472    #[cfg(test)]
473    pub(crate) fn add_commit_data(
474        &mut self,
475        commit_id: CommitId,
476        change_id: ChangeId,
477        parent_ids: &[CommitId],
478    ) {
479        self.0.add_commit_data(commit_id, change_id, parent_ids);
480    }
481
482    pub(super) fn squash_and_save_in(self, dir: &Path) -> io::Result<Arc<ReadonlyIndexSegment>> {
483        self.0.maybe_squash_with_ancestors().save_in(dir)
484    }
485}
486
487impl AsCompositeIndex for DefaultMutableIndex {
488    fn as_composite(&self) -> &CompositeIndex {
489        self.0.as_composite()
490    }
491}
492
493impl Index for DefaultMutableIndex {
494    fn shortest_unique_commit_id_prefix_len(&self, commit_id: &CommitId) -> usize {
495        self.as_composite()
496            .shortest_unique_commit_id_prefix_len(commit_id)
497    }
498
499    fn resolve_commit_id_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<CommitId> {
500        self.as_composite().resolve_commit_id_prefix(prefix)
501    }
502
503    fn has_id(&self, commit_id: &CommitId) -> bool {
504        self.as_composite().has_id(commit_id)
505    }
506
507    fn is_ancestor(&self, ancestor_id: &CommitId, descendant_id: &CommitId) -> bool {
508        self.as_composite().is_ancestor(ancestor_id, descendant_id)
509    }
510
511    fn common_ancestors(&self, set1: &[CommitId], set2: &[CommitId]) -> Vec<CommitId> {
512        self.as_composite().common_ancestors(set1, set2)
513    }
514
515    fn all_heads_for_gc(
516        &self,
517    ) -> Result<Box<dyn Iterator<Item = CommitId> + '_>, AllHeadsForGcUnsupported> {
518        Ok(Box::new(self.as_composite().all_heads()))
519    }
520
521    fn heads(
522        &self,
523        candidates: &mut dyn Iterator<Item = &CommitId>,
524    ) -> Result<Vec<CommitId>, IndexError> {
525        self.as_composite().heads(candidates)
526    }
527
528    fn evaluate_revset<'index>(
529        &'index self,
530        expression: &ResolvedExpression,
531        store: &Arc<Store>,
532    ) -> Result<Box<dyn Revset + 'index>, RevsetEvaluationError> {
533        self.as_composite().evaluate_revset(expression, store)
534    }
535}
536
537impl MutableIndex for DefaultMutableIndex {
538    fn as_any(&self) -> &dyn Any {
539        self
540    }
541
542    fn into_any(self: Box<Self>) -> Box<dyn Any> {
543        Box::new(*self)
544    }
545
546    fn as_index(&self) -> &dyn Index {
547        self
548    }
549
550    fn change_id_index(
551        &self,
552        heads: &mut dyn Iterator<Item = &CommitId>,
553    ) -> Box<dyn ChangeIdIndex + '_> {
554        Box::new(ChangeIdIndexImpl::new(self, heads))
555    }
556
557    fn add_commit(&mut self, commit: &Commit) {
558        self.0.add_commit(commit);
559    }
560
561    fn merge_in(&mut self, other: &dyn ReadonlyIndex) {
562        let other = other
563            .as_any()
564            .downcast_ref::<DefaultReadonlyIndex>()
565            .expect("index to merge in must be a DefaultReadonlyIndex");
566        self.0.merge_in(other.as_segment().clone());
567    }
568}
569
570fn resolve_neighbor_ids<'a, K: Ord, V>(
571    lookup_table: &'a BTreeMap<K, V>,
572    id: &K,
573) -> (Option<&'a K>, Option<&'a K>) {
574    let prev_id = lookup_table
575        .range((Bound::Unbounded, Bound::Excluded(id)))
576        .next_back()
577        .map(|(id, _)| id);
578    let next_id = lookup_table
579        .range((Bound::Excluded(id), Bound::Unbounded))
580        .next()
581        .map(|(id, _)| id);
582    (prev_id, next_id)
583}
584
585fn resolve_id_prefix<'a, K: ObjectId + Ord, V>(
586    lookup_table: &'a BTreeMap<K, V>,
587    prefix: &HexPrefix,
588    min_bytes_prefix: &K,
589) -> PrefixResolution<(&'a K, &'a V)> {
590    let mut matches = lookup_table
591        .range((Bound::Included(min_bytes_prefix), Bound::Unbounded))
592        .take_while(|&(id, _)| prefix.matches(id))
593        .fuse();
594    match (matches.next(), matches.next()) {
595        (Some(entry), None) => PrefixResolution::SingleMatch(entry),
596        (Some(_), Some(_)) => PrefixResolution::AmbiguousMatch,
597        (None, _) => PrefixResolution::NoMatch,
598    }
599}