jj_lib/default_index/
readonly.rs

1// Copyright 2023 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![expect(missing_docs)]
16
17use std::cmp::Ordering;
18use std::collections::HashSet;
19use std::fmt;
20use std::fmt::Debug;
21use std::fs::File;
22use std::io;
23use std::io::Read;
24use std::iter;
25use std::ops::Range;
26use std::path::Path;
27use std::sync::Arc;
28
29use itertools::Itertools as _;
30use smallvec::smallvec;
31use thiserror::Error;
32
33use super::changed_path::CompositeChangedPathIndex;
34use super::composite::AsCompositeIndex;
35use super::composite::ChangeIdIndexImpl;
36use super::composite::CommitIndexSegment;
37use super::composite::CommitIndexSegmentId;
38use super::composite::CompositeCommitIndex;
39use super::composite::CompositeIndex;
40use super::entry::GlobalCommitPosition;
41use super::entry::LocalCommitPosition;
42use super::entry::SmallGlobalCommitPositionsVec;
43use super::entry::SmallLocalCommitPositionsVec;
44use super::mutable::DefaultMutableIndex;
45use super::revset_engine;
46use super::revset_engine::RevsetImpl;
47use crate::backend::ChangeId;
48use crate::backend::CommitId;
49use crate::graph::GraphNode;
50use crate::index::AllHeadsForGcUnsupported;
51use crate::index::ChangeIdIndex;
52use crate::index::Index;
53use crate::index::IndexError;
54use crate::index::MutableIndex;
55use crate::index::ReadonlyIndex;
56use crate::object_id::HexPrefix;
57use crate::object_id::ObjectId;
58use crate::object_id::PrefixResolution;
59use crate::repo_path::RepoPathBuf;
60use crate::revset::ResolvedExpression;
61use crate::revset::Revset;
62use crate::revset::RevsetEvaluationError;
63use crate::store::Store;
64
65/// Error while loading index segment file.
66#[derive(Debug, Error)]
67pub enum ReadonlyIndexLoadError {
68    #[error("Unexpected {kind} index version")]
69    UnexpectedVersion {
70        /// Index type.
71        kind: &'static str,
72        found_version: u32,
73        expected_version: u32,
74    },
75    #[error("Failed to load {kind} index file '{name}'")]
76    Other {
77        /// Index type.
78        kind: &'static str,
79        /// Index file name.
80        name: String,
81        /// Underlying error.
82        #[source]
83        error: io::Error,
84    },
85}
86
87impl ReadonlyIndexLoadError {
88    pub(super) fn invalid_data(
89        kind: &'static str,
90        name: impl Into<String>,
91        error: impl Into<Box<dyn std::error::Error + Send + Sync>>,
92    ) -> Self {
93        Self::from_io_err(
94            kind,
95            name,
96            io::Error::new(io::ErrorKind::InvalidData, error),
97        )
98    }
99
100    pub(super) fn from_io_err(
101        kind: &'static str,
102        name: impl Into<String>,
103        error: io::Error,
104    ) -> Self {
105        Self::Other {
106            kind,
107            name: name.into(),
108            error,
109        }
110    }
111
112    /// Returns true if the underlying error suggests data corruption.
113    pub(super) fn is_corrupt_or_not_found(&self) -> bool {
114        match self {
115            Self::UnexpectedVersion { .. } => true,
116            Self::Other { error, .. } => {
117                // If the parent file name field is corrupt, the file wouldn't be found.
118                // And there's no need to distinguish it from an empty file.
119                matches!(
120                    error.kind(),
121                    io::ErrorKind::NotFound
122                        | io::ErrorKind::InvalidData
123                        | io::ErrorKind::UnexpectedEof
124                )
125            }
126        }
127    }
128}
129
130/// Current format version of the commit index segment file.
131pub(super) const COMMIT_INDEX_SEGMENT_FILE_FORMAT_VERSION: u32 = 6;
132
133/// If set, the value is stored in the overflow table.
134pub(super) const OVERFLOW_FLAG: u32 = 0x8000_0000;
135
136/// Global index position of parent entry, or overflow pointer.
137#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
138struct ParentIndexPosition(u32);
139
140impl ParentIndexPosition {
141    fn as_inlined(self) -> Option<GlobalCommitPosition> {
142        (self.0 & OVERFLOW_FLAG == 0).then_some(GlobalCommitPosition(self.0))
143    }
144
145    fn as_overflow(self) -> Option<u32> {
146        (self.0 & OVERFLOW_FLAG != 0).then_some(!self.0)
147    }
148}
149
150/// Local position of entry pointed by change id, or overflow pointer.
151#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
152struct ChangeLocalPosition(u32);
153
154impl ChangeLocalPosition {
155    fn as_inlined(self) -> Option<LocalCommitPosition> {
156        (self.0 & OVERFLOW_FLAG == 0).then_some(LocalCommitPosition(self.0))
157    }
158
159    fn as_overflow(self) -> Option<u32> {
160        (self.0 & OVERFLOW_FLAG != 0).then_some(!self.0)
161    }
162}
163
164/// Lengths of fields to be serialized.
165#[derive(Clone, Copy, Debug)]
166pub(super) struct FieldLengths {
167    pub commit_id: usize,
168    pub change_id: usize,
169}
170
171struct CommitGraphEntry<'a> {
172    data: &'a [u8],
173}
174
175// TODO: Add pointers to ancestors further back, like a skip list. Clear the
176// lowest set bit to determine which generation number the pointers point to.
177impl CommitGraphEntry<'_> {
178    fn size(commit_id_length: usize) -> usize {
179        16 + commit_id_length
180    }
181
182    fn generation_number(&self) -> u32 {
183        u32::from_le_bytes(self.data[0..4].try_into().unwrap())
184    }
185
186    fn parent1_pos_or_overflow_pos(&self) -> ParentIndexPosition {
187        ParentIndexPosition(u32::from_le_bytes(self.data[4..8].try_into().unwrap()))
188    }
189
190    fn parent2_pos_or_overflow_len(&self) -> ParentIndexPosition {
191        ParentIndexPosition(u32::from_le_bytes(self.data[8..12].try_into().unwrap()))
192    }
193
194    fn change_id_lookup_pos(&self) -> u32 {
195        u32::from_le_bytes(self.data[12..16].try_into().unwrap())
196    }
197
198    fn commit_id(&self) -> CommitId {
199        CommitId::from_bytes(self.commit_id_bytes())
200    }
201
202    // might be better to add borrowed version of CommitId
203    fn commit_id_bytes(&self) -> &[u8] {
204        &self.data[16..]
205    }
206}
207
208/// Commit index segment backed by immutable file.
209///
210/// File format:
211/// ```text
212/// u32: file format version
213/// u32: parent segment file name length (0 means root)
214/// <length number of bytes>: parent segment file name
215///
216/// u32: number of local commit entries
217/// u32: number of local change ids
218/// u32: number of overflow parent entries
219/// u32: number of overflow change id positions
220/// for each entry, in some topological order with parents first:
221///   u32: generation number
222///   if number of parents <= 2:
223///     u32: (< 0x8000_0000) global index position for parent 1
224///          (==0xffff_ffff) no parent 1
225///     u32: (< 0x8000_0000) global index position for parent 2
226///          (==0xffff_ffff) no parent 2
227///   else:
228///     u32: (>=0x8000_0000) position in the overflow table, bit-negated
229///     u32: (>=0x8000_0000) number of parents (in the overflow table), bit-negated
230///   u32: change id position in the sorted change ids table
231///   <commit id length number of bytes>: commit id
232/// for each entry, sorted by commit id:
233///   u32: local position in the graph entries table
234/// for each entry, sorted by change id:
235///   <change id length number of bytes>: change id
236/// for each entry, sorted by change id:
237///   if number of associated commits == 1:
238///     u32: (< 0x8000_0000) local position in the graph entries table
239///   else:
240///     u32: (>=0x8000_0000) position in the overflow table, bit-negated
241/// for each overflow parent:
242///   u32: global index position
243/// for each overflow change id entry:
244///   u32: local position in the graph entries table
245/// ```
246///
247/// Note that u32 fields are 4-byte aligned so long as the parent file name
248/// (which is hexadecimal hash) and commit/change ids aren't of exotic length.
249// TODO: replace the table by a trie so we don't have to repeat the full commit
250//       ids
251// TODO: add a fanout table like git's commit graph has?
252pub(super) struct ReadonlyCommitIndexSegment {
253    parent_file: Option<Arc<ReadonlyCommitIndexSegment>>,
254    num_parent_commits: u32,
255    id: CommitIndexSegmentId,
256    field_lengths: FieldLengths,
257    // Number of commits not counting the parent file
258    num_local_commits: u32,
259    num_local_change_ids: u32,
260    num_change_overflow_entries: u32,
261    // Base data offsets in bytes:
262    commit_lookup_base: usize,
263    change_id_table_base: usize,
264    change_pos_table_base: usize,
265    parent_overflow_base: usize,
266    change_overflow_base: usize,
267    data: Vec<u8>,
268}
269
270impl Debug for ReadonlyCommitIndexSegment {
271    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
272        f.debug_struct("ReadonlyCommitIndexSegment")
273            .field("id", &self.id)
274            .field("parent_file", &self.parent_file)
275            .finish_non_exhaustive()
276    }
277}
278
279impl ReadonlyCommitIndexSegment {
280    /// Loads both parent segments and local entries from the given file `name`.
281    pub(super) fn load(
282        dir: &Path,
283        id: CommitIndexSegmentId,
284        lengths: FieldLengths,
285    ) -> Result<Arc<Self>, ReadonlyIndexLoadError> {
286        let mut file = File::open(dir.join(id.hex()))
287            .map_err(|err| ReadonlyIndexLoadError::from_io_err("commit", id.hex(), err))?;
288        Self::load_from(&mut file, dir, id, lengths)
289    }
290
291    /// Loads both parent segments and local entries from the given `file`.
292    pub(super) fn load_from(
293        file: &mut dyn Read,
294        dir: &Path,
295        id: CommitIndexSegmentId,
296        lengths: FieldLengths,
297    ) -> Result<Arc<Self>, ReadonlyIndexLoadError> {
298        let from_io_err = |err| ReadonlyIndexLoadError::from_io_err("commit", id.hex(), err);
299        let read_u32 = |file: &mut dyn Read| {
300            let mut buf = [0; 4];
301            file.read_exact(&mut buf).map_err(from_io_err)?;
302            Ok(u32::from_le_bytes(buf))
303        };
304        let format_version = read_u32(file)?;
305        if format_version != COMMIT_INDEX_SEGMENT_FILE_FORMAT_VERSION {
306            return Err(ReadonlyIndexLoadError::UnexpectedVersion {
307                kind: "commit",
308                found_version: format_version,
309                expected_version: COMMIT_INDEX_SEGMENT_FILE_FORMAT_VERSION,
310            });
311        }
312        let parent_filename_len = read_u32(file)?;
313        let maybe_parent_file = if parent_filename_len > 0 {
314            let mut parent_filename_bytes = vec![0; parent_filename_len as usize];
315            file.read_exact(&mut parent_filename_bytes)
316                .map_err(from_io_err)?;
317            let parent_file_id = CommitIndexSegmentId::try_from_hex(parent_filename_bytes)
318                .ok_or_else(|| {
319                    ReadonlyIndexLoadError::invalid_data(
320                        "commit",
321                        id.hex(),
322                        "parent file name is not valid hex",
323                    )
324                })?;
325            let parent_file = Self::load(dir, parent_file_id, lengths)?;
326            Some(parent_file)
327        } else {
328            None
329        };
330        Self::load_with_parent_file(file, id, maybe_parent_file, lengths)
331    }
332
333    /// Loads local entries from the given `file`, returns new segment linked to
334    /// the given `parent_file`.
335    pub(super) fn load_with_parent_file(
336        file: &mut dyn Read,
337        id: CommitIndexSegmentId,
338        parent_file: Option<Arc<Self>>,
339        lengths: FieldLengths,
340    ) -> Result<Arc<Self>, ReadonlyIndexLoadError> {
341        let from_io_err = |err| ReadonlyIndexLoadError::from_io_err("commit", id.hex(), err);
342        let read_u32 = |file: &mut dyn Read| {
343            let mut buf = [0; 4];
344            file.read_exact(&mut buf).map_err(from_io_err)?;
345            Ok(u32::from_le_bytes(buf))
346        };
347        let num_parent_commits = parent_file
348            .as_ref()
349            .map_or(0, |segment| segment.as_composite().num_commits());
350        let num_local_commits = read_u32(file)?;
351        let num_local_change_ids = read_u32(file)?;
352        let num_parent_overflow_entries = read_u32(file)?;
353        let num_change_overflow_entries = read_u32(file)?;
354        let mut data = vec![];
355        file.read_to_end(&mut data).map_err(from_io_err)?;
356
357        let commit_graph_entry_size = CommitGraphEntry::size(lengths.commit_id);
358        let graph_size = (num_local_commits as usize) * commit_graph_entry_size;
359        let commit_lookup_size = (num_local_commits as usize) * 4;
360        let change_id_table_size = (num_local_change_ids as usize) * lengths.change_id;
361        let change_pos_table_size = (num_local_change_ids as usize) * 4;
362        let parent_overflow_size = (num_parent_overflow_entries as usize) * 4;
363        let change_overflow_size = (num_change_overflow_entries as usize) * 4;
364
365        let graph_base = 0;
366        let commit_lookup_base = graph_base + graph_size;
367        let change_id_table_base = commit_lookup_base + commit_lookup_size;
368        let change_pos_table_base = change_id_table_base + change_id_table_size;
369        let parent_overflow_base = change_pos_table_base + change_pos_table_size;
370        let change_overflow_base = parent_overflow_base + parent_overflow_size;
371        let expected_size = change_overflow_base + change_overflow_size;
372
373        if data.len() != expected_size {
374            return Err(ReadonlyIndexLoadError::invalid_data(
375                "commit",
376                id.hex(),
377                "unexpected data length",
378            ));
379        }
380
381        Ok(Arc::new(Self {
382            parent_file,
383            num_parent_commits,
384            id,
385            field_lengths: lengths,
386            num_local_commits,
387            num_local_change_ids,
388            num_change_overflow_entries,
389            commit_lookup_base,
390            change_id_table_base,
391            change_pos_table_base,
392            parent_overflow_base,
393            change_overflow_base,
394            data,
395        }))
396    }
397
398    pub(super) fn as_composite(&self) -> &CompositeCommitIndex {
399        CompositeCommitIndex::new(self)
400    }
401
402    pub(super) fn id(&self) -> &CommitIndexSegmentId {
403        &self.id
404    }
405
406    pub(super) fn field_lengths(&self) -> FieldLengths {
407        self.field_lengths
408    }
409
410    fn graph_entry(&self, local_pos: LocalCommitPosition) -> CommitGraphEntry<'_> {
411        let table = &self.data[..self.commit_lookup_base];
412        let entry_size = CommitGraphEntry::size(self.field_lengths.commit_id);
413        let offset = (local_pos.0 as usize) * entry_size;
414        CommitGraphEntry {
415            data: &table[offset..][..entry_size],
416        }
417    }
418
419    fn commit_lookup_pos(&self, lookup_pos: u32) -> LocalCommitPosition {
420        let table = &self.data[self.commit_lookup_base..self.change_id_table_base];
421        let offset = (lookup_pos as usize) * 4;
422        LocalCommitPosition(u32::from_le_bytes(table[offset..][..4].try_into().unwrap()))
423    }
424
425    fn change_lookup_id(&self, lookup_pos: u32) -> ChangeId {
426        ChangeId::from_bytes(self.change_lookup_id_bytes(lookup_pos))
427    }
428
429    // might be better to add borrowed version of ChangeId
430    fn change_lookup_id_bytes(&self, lookup_pos: u32) -> &[u8] {
431        let table = &self.data[self.change_id_table_base..self.change_pos_table_base];
432        let offset = (lookup_pos as usize) * self.field_lengths.change_id;
433        &table[offset..][..self.field_lengths.change_id]
434    }
435
436    fn change_lookup_pos(&self, lookup_pos: u32) -> ChangeLocalPosition {
437        let table = &self.data[self.change_pos_table_base..self.parent_overflow_base];
438        let offset = (lookup_pos as usize) * 4;
439        ChangeLocalPosition(u32::from_le_bytes(table[offset..][..4].try_into().unwrap()))
440    }
441
442    fn overflow_parents(
443        &self,
444        overflow_pos: u32,
445        num_parents: u32,
446    ) -> SmallGlobalCommitPositionsVec {
447        let table = &self.data[self.parent_overflow_base..self.change_overflow_base];
448        let offset = (overflow_pos as usize) * 4;
449        let size = (num_parents as usize) * 4;
450        let (chunks, _remainder) = table[offset..][..size].as_chunks();
451        chunks
452            .iter()
453            .map(|&chunk: &[u8; 4]| GlobalCommitPosition(u32::from_le_bytes(chunk)))
454            .collect()
455    }
456
457    /// Scans graph entry positions stored in the overflow change ids table.
458    fn overflow_changes_from(
459        &self,
460        overflow_pos: u32,
461    ) -> impl Iterator<Item = LocalCommitPosition> + use<'_> {
462        let table = &self.data[self.change_overflow_base..];
463        let offset = (overflow_pos as usize) * 4;
464        let (chunks, _remainder) = table[offset..].as_chunks();
465        chunks
466            .iter()
467            .map(|&chunk: &[u8; 4]| LocalCommitPosition(u32::from_le_bytes(chunk)))
468    }
469
470    /// Binary searches commit id by `prefix`. Returns the lookup position.
471    fn commit_id_byte_prefix_to_lookup_pos(&self, prefix: &[u8]) -> PositionLookupResult {
472        binary_search_pos_by(self.num_local_commits, |pos| {
473            let local_pos = self.commit_lookup_pos(pos);
474            let entry = self.graph_entry(local_pos);
475            entry.commit_id_bytes().cmp(prefix)
476        })
477    }
478
479    /// Binary searches change id by `prefix`. Returns the lookup position.
480    fn change_id_byte_prefix_to_lookup_pos(&self, prefix: &[u8]) -> PositionLookupResult {
481        binary_search_pos_by(self.num_local_change_ids, |pos| {
482            let change_id_bytes = self.change_lookup_id_bytes(pos);
483            change_id_bytes.cmp(prefix)
484        })
485    }
486}
487
488impl CommitIndexSegment for ReadonlyCommitIndexSegment {
489    fn num_parent_commits(&self) -> u32 {
490        self.num_parent_commits
491    }
492
493    fn num_local_commits(&self) -> u32 {
494        self.num_local_commits
495    }
496
497    fn parent_file(&self) -> Option<&Arc<ReadonlyCommitIndexSegment>> {
498        self.parent_file.as_ref()
499    }
500
501    fn commit_id_to_pos(&self, commit_id: &CommitId) -> Option<LocalCommitPosition> {
502        self.commit_id_byte_prefix_to_lookup_pos(commit_id.as_bytes())
503            .ok()
504            .map(|pos| self.commit_lookup_pos(pos))
505    }
506
507    fn resolve_neighbor_commit_ids(
508        &self,
509        commit_id: &CommitId,
510    ) -> (Option<CommitId>, Option<CommitId>) {
511        self.commit_id_byte_prefix_to_lookup_pos(commit_id.as_bytes())
512            .map_neighbors(|pos| {
513                let local_pos = self.commit_lookup_pos(pos);
514                let entry = self.graph_entry(local_pos);
515                entry.commit_id()
516            })
517    }
518
519    fn resolve_commit_id_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<CommitId> {
520        self.commit_id_byte_prefix_to_lookup_pos(prefix.min_prefix_bytes())
521            .prefix_matches(prefix, |pos| {
522                let local_pos = self.commit_lookup_pos(pos);
523                let entry = self.graph_entry(local_pos);
524                entry.commit_id()
525            })
526            .map(|(id, _)| id)
527    }
528
529    fn resolve_neighbor_change_ids(
530        &self,
531        change_id: &ChangeId,
532    ) -> (Option<ChangeId>, Option<ChangeId>) {
533        self.change_id_byte_prefix_to_lookup_pos(change_id.as_bytes())
534            .map_neighbors(|pos| self.change_lookup_id(pos))
535    }
536
537    fn resolve_change_id_prefix(
538        &self,
539        prefix: &HexPrefix,
540    ) -> PrefixResolution<(ChangeId, SmallLocalCommitPositionsVec)> {
541        self.change_id_byte_prefix_to_lookup_pos(prefix.min_prefix_bytes())
542            .prefix_matches(prefix, |pos| self.change_lookup_id(pos))
543            .map(|(id, lookup_pos)| {
544                let change_pos = self.change_lookup_pos(lookup_pos);
545                if let Some(local_pos) = change_pos.as_inlined() {
546                    (id, smallvec![local_pos])
547                } else {
548                    let overflow_pos = change_pos.as_overflow().unwrap();
549                    // Collect commits having the same change id. For cache
550                    // locality, it might be better to look for the next few
551                    // change id positions to determine the size.
552                    let positions: SmallLocalCommitPositionsVec = self
553                        .overflow_changes_from(overflow_pos)
554                        .take_while(|&local_pos| {
555                            let entry = self.graph_entry(local_pos);
556                            entry.change_id_lookup_pos() == lookup_pos
557                        })
558                        .collect();
559                    debug_assert_eq!(
560                        overflow_pos + u32::try_from(positions.len()).unwrap(),
561                        (lookup_pos + 1..self.num_local_change_ids)
562                            .find_map(|lookup_pos| self.change_lookup_pos(lookup_pos).as_overflow())
563                            .unwrap_or(self.num_change_overflow_entries),
564                        "all overflow positions to the next change id should be collected"
565                    );
566                    (id, positions)
567                }
568            })
569    }
570
571    fn generation_number(&self, local_pos: LocalCommitPosition) -> u32 {
572        self.graph_entry(local_pos).generation_number()
573    }
574
575    fn commit_id(&self, local_pos: LocalCommitPosition) -> CommitId {
576        self.graph_entry(local_pos).commit_id()
577    }
578
579    fn change_id(&self, local_pos: LocalCommitPosition) -> ChangeId {
580        let entry = self.graph_entry(local_pos);
581        self.change_lookup_id(entry.change_id_lookup_pos())
582    }
583
584    fn num_parents(&self, local_pos: LocalCommitPosition) -> u32 {
585        let graph_entry = self.graph_entry(local_pos);
586        let pos1_or_overflow_pos = graph_entry.parent1_pos_or_overflow_pos();
587        let pos2_or_overflow_len = graph_entry.parent2_pos_or_overflow_len();
588        let inlined_len1 = pos1_or_overflow_pos.as_inlined().is_some() as u32;
589        let inlined_len2 = pos2_or_overflow_len.as_inlined().is_some() as u32;
590        let overflow_len = pos2_or_overflow_len.as_overflow().unwrap_or(0);
591        inlined_len1 + inlined_len2 + overflow_len
592    }
593
594    fn parent_positions(&self, local_pos: LocalCommitPosition) -> SmallGlobalCommitPositionsVec {
595        let graph_entry = self.graph_entry(local_pos);
596        let pos1_or_overflow_pos = graph_entry.parent1_pos_or_overflow_pos();
597        let pos2_or_overflow_len = graph_entry.parent2_pos_or_overflow_len();
598        if let Some(pos1) = pos1_or_overflow_pos.as_inlined() {
599            if let Some(pos2) = pos2_or_overflow_len.as_inlined() {
600                smallvec![pos1, pos2]
601            } else {
602                smallvec![pos1]
603            }
604        } else {
605            let overflow_pos = pos1_or_overflow_pos.as_overflow().unwrap();
606            let num_parents = pos2_or_overflow_len.as_overflow().unwrap();
607            self.overflow_parents(overflow_pos, num_parents)
608        }
609    }
610}
611
612/// Commit index backend which stores data on local disk.
613#[derive(Clone, Debug)]
614pub struct DefaultReadonlyIndex(CompositeIndex);
615
616impl DefaultReadonlyIndex {
617    pub(super) fn from_segment(
618        commits: Arc<ReadonlyCommitIndexSegment>,
619        changed_paths: CompositeChangedPathIndex,
620    ) -> Self {
621        Self(CompositeIndex::from_readonly(commits, changed_paths))
622    }
623
624    pub(super) fn readonly_commits(&self) -> &Arc<ReadonlyCommitIndexSegment> {
625        self.0.readonly_commits().expect("must have readonly")
626    }
627
628    pub(super) fn changed_paths(&self) -> &CompositeChangedPathIndex {
629        self.0.changed_paths()
630    }
631
632    /// Returns the number of all indexed commits.
633    pub fn num_commits(&self) -> u32 {
634        self.0.commits().num_commits()
635    }
636
637    /// Collects statistics of indexed commits and segments.
638    pub fn stats(&self) -> IndexStats {
639        let commits = self.readonly_commits();
640        let num_commits = commits.as_composite().num_commits();
641        let mut num_merges = 0;
642        let mut max_generation_number = 0;
643        let mut change_ids = HashSet::new();
644        for pos in (0..num_commits).map(GlobalCommitPosition) {
645            let entry = commits.as_composite().entry_by_pos(pos);
646            max_generation_number = max_generation_number.max(entry.generation_number());
647            if entry.num_parents() > 1 {
648                num_merges += 1;
649            }
650            change_ids.insert(entry.change_id());
651        }
652        let num_heads = u32::try_from(commits.as_composite().all_heads_pos().count()).unwrap();
653        let mut commit_levels = iter::successors(Some(commits), |segment| segment.parent_file())
654            .map(|segment| CommitIndexLevelStats {
655                num_commits: segment.num_local_commits(),
656                name: segment.id().hex(),
657            })
658            .collect_vec();
659        commit_levels.reverse();
660
661        let changed_paths = self.changed_paths();
662        let changed_path_commits_range = changed_paths
663            .start_commit_pos()
664            .map(|GlobalCommitPosition(start)| start..(start + changed_paths.num_commits()));
665        let changed_path_levels = changed_paths
666            .readonly_segments()
667            .iter()
668            .map(|segment| ChangedPathIndexLevelStats {
669                num_commits: segment.num_local_commits(),
670                num_changed_paths: segment.num_changed_paths(),
671                num_paths: segment.num_paths(),
672                name: segment.id().hex(),
673            })
674            .collect_vec();
675
676        IndexStats {
677            num_commits,
678            num_merges,
679            max_generation_number,
680            num_heads,
681            num_changes: change_ids.len().try_into().unwrap(),
682            commit_levels,
683            changed_path_commits_range,
684            changed_path_levels,
685        }
686    }
687
688    /// Looks up generation of the specified commit.
689    pub fn generation_number(&self, commit_id: &CommitId) -> Option<u32> {
690        let entry = self.0.commits().entry_by_id(commit_id)?;
691        Some(entry.generation_number())
692    }
693
694    #[doc(hidden)] // for tests
695    pub fn evaluate_revset_impl(
696        &self,
697        expression: &ResolvedExpression,
698        store: &Arc<Store>,
699    ) -> Result<DefaultReadonlyIndexRevset, RevsetEvaluationError> {
700        let inner = revset_engine::evaluate(expression, store, self.clone())?;
701        Ok(DefaultReadonlyIndexRevset { inner })
702    }
703
704    pub(super) fn start_modification(&self) -> DefaultMutableIndex {
705        DefaultMutableIndex::incremental(self)
706    }
707}
708
709impl AsCompositeIndex for DefaultReadonlyIndex {
710    fn as_composite(&self) -> &CompositeIndex {
711        &self.0
712    }
713}
714
715impl Index for DefaultReadonlyIndex {
716    fn shortest_unique_commit_id_prefix_len(&self, commit_id: &CommitId) -> usize {
717        self.0.shortest_unique_commit_id_prefix_len(commit_id)
718    }
719
720    fn resolve_commit_id_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<CommitId> {
721        self.0.resolve_commit_id_prefix(prefix)
722    }
723
724    fn has_id(&self, commit_id: &CommitId) -> bool {
725        self.0.has_id(commit_id)
726    }
727
728    fn is_ancestor(&self, ancestor_id: &CommitId, descendant_id: &CommitId) -> bool {
729        self.0.is_ancestor(ancestor_id, descendant_id)
730    }
731
732    fn common_ancestors(&self, set1: &[CommitId], set2: &[CommitId]) -> Vec<CommitId> {
733        self.0.common_ancestors(set1, set2)
734    }
735
736    fn all_heads_for_gc(
737        &self,
738    ) -> Result<Box<dyn Iterator<Item = CommitId> + '_>, AllHeadsForGcUnsupported> {
739        self.0.all_heads_for_gc()
740    }
741
742    fn heads(
743        &self,
744        candidates: &mut dyn Iterator<Item = &CommitId>,
745    ) -> Result<Vec<CommitId>, IndexError> {
746        self.0.heads(candidates)
747    }
748
749    fn changed_paths_in_commit(
750        &self,
751        commit_id: &CommitId,
752    ) -> Result<Option<Box<dyn Iterator<Item = RepoPathBuf> + '_>>, IndexError> {
753        self.0.changed_paths_in_commit(commit_id)
754    }
755
756    fn evaluate_revset(
757        &self,
758        expression: &ResolvedExpression,
759        store: &Arc<Store>,
760    ) -> Result<Box<dyn Revset + '_>, RevsetEvaluationError> {
761        self.0.evaluate_revset(expression, store)
762    }
763}
764
765impl ReadonlyIndex for DefaultReadonlyIndex {
766    fn as_index(&self) -> &dyn Index {
767        self
768    }
769
770    fn change_id_index(
771        &self,
772        heads: &mut dyn Iterator<Item = &CommitId>,
773    ) -> Box<dyn ChangeIdIndex> {
774        Box::new(ChangeIdIndexImpl::new(self.clone(), heads))
775    }
776
777    fn start_modification(&self) -> Box<dyn MutableIndex> {
778        Box::new(Self::start_modification(self))
779    }
780}
781
782#[derive(Debug)]
783#[doc(hidden)] // for tests
784pub struct DefaultReadonlyIndexRevset {
785    inner: RevsetImpl<DefaultReadonlyIndex>,
786}
787
788impl DefaultReadonlyIndexRevset {
789    pub fn iter_graph_impl(
790        &self,
791        skip_transitive_edges: bool,
792    ) -> impl Iterator<Item = Result<GraphNode<CommitId>, RevsetEvaluationError>> {
793        self.inner.iter_graph_impl(skip_transitive_edges)
794    }
795
796    pub fn into_inner(self) -> Box<dyn Revset> {
797        Box::new(self.inner)
798    }
799}
800
801#[derive(Clone, Debug)]
802pub struct IndexStats {
803    pub num_commits: u32,
804    pub num_merges: u32,
805    pub max_generation_number: u32,
806    pub num_heads: u32,
807    pub num_changes: u32,
808    pub commit_levels: Vec<CommitIndexLevelStats>,
809    pub changed_path_commits_range: Option<Range<u32>>,
810    pub changed_path_levels: Vec<ChangedPathIndexLevelStats>,
811}
812
813#[derive(Clone, Debug)]
814pub struct CommitIndexLevelStats {
815    pub num_commits: u32,
816    pub name: String,
817}
818
819#[derive(Clone, Debug)]
820pub struct ChangedPathIndexLevelStats {
821    /// Number of commits.
822    pub num_commits: u32,
823    /// Sum of number of per-commit changed paths.
824    pub num_changed_paths: u32,
825    /// Number of unique paths.
826    pub num_paths: u32,
827    /// Index file name.
828    pub name: String,
829}
830
831/// Binary search result in a sorted lookup table.
832#[derive(Clone, Copy, Debug)]
833struct PositionLookupResult {
834    /// `Ok` means the element is found at the position. `Err` contains the
835    /// position where the element could be inserted.
836    result: Result<u32, u32>,
837    size: u32,
838}
839
840impl PositionLookupResult {
841    /// Returns position of the element if exactly matched.
842    fn ok(self) -> Option<u32> {
843        self.result.ok()
844    }
845
846    /// Returns `(previous, next)` positions of the matching element or
847    /// boundary.
848    fn neighbors(self) -> (Option<u32>, Option<u32>) {
849        match self.result {
850            Ok(pos) => (pos.checked_sub(1), (pos + 1..self.size).next()),
851            Err(pos) => (pos.checked_sub(1), (pos..self.size).next()),
852        }
853    }
854
855    /// Looks up `(previous, next)` elements by the given function.
856    fn map_neighbors<T>(self, mut lookup: impl FnMut(u32) -> T) -> (Option<T>, Option<T>) {
857        let (prev_pos, next_pos) = self.neighbors();
858        (prev_pos.map(&mut lookup), next_pos.map(&mut lookup))
859    }
860
861    /// Looks up matching elements from the current position, returns one if
862    /// the given `prefix` unambiguously matches.
863    fn prefix_matches<T: ObjectId>(
864        self,
865        prefix: &HexPrefix,
866        lookup: impl FnMut(u32) -> T,
867    ) -> PrefixResolution<(T, u32)> {
868        let lookup_pos = self.result.unwrap_or_else(|pos| pos);
869        let mut matches = (lookup_pos..self.size)
870            .map(lookup)
871            .take_while(|id| prefix.matches(id))
872            .fuse();
873        match (matches.next(), matches.next()) {
874            (Some(id), None) => PrefixResolution::SingleMatch((id, lookup_pos)),
875            (Some(_), Some(_)) => PrefixResolution::AmbiguousMatch,
876            (None, _) => PrefixResolution::NoMatch,
877        }
878    }
879}
880
881/// Binary searches u32 position with the given comparison function.
882fn binary_search_pos_by(size: u32, mut f: impl FnMut(u32) -> Ordering) -> PositionLookupResult {
883    let mut low = 0;
884    let mut high = size;
885    while low < high {
886        let mid = (low + high) / 2;
887        let cmp = f(mid);
888        // According to Rust std lib, this produces cmov instructions.
889        // https://github.com/rust-lang/rust/blob/1.76.0/library/core/src/slice/mod.rs#L2845-L2855
890        low = if cmp == Ordering::Less { mid + 1 } else { low };
891        high = if cmp == Ordering::Greater { mid } else { high };
892        if cmp == Ordering::Equal {
893            let result = Ok(mid);
894            return PositionLookupResult { result, size };
895        }
896    }
897    let result = Err(low);
898    PositionLookupResult { result, size }
899}