jj_lib/default_index/
readonly.rs

1// Copyright 2023 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![expect(missing_docs)]
16
17use std::cmp::Ordering;
18use std::collections::HashSet;
19use std::fmt;
20use std::fmt::Debug;
21use std::fs::File;
22use std::io;
23use std::io::Read;
24use std::iter;
25use std::ops::Range;
26use std::path::Path;
27use std::sync::Arc;
28
29use itertools::Itertools as _;
30use smallvec::smallvec;
31use thiserror::Error;
32
33use super::changed_path::CompositeChangedPathIndex;
34use super::composite::AsCompositeIndex;
35use super::composite::ChangeIdIndexImpl;
36use super::composite::CommitIndexSegment;
37use super::composite::CommitIndexSegmentId;
38use super::composite::CompositeCommitIndex;
39use super::composite::CompositeIndex;
40use super::entry::GlobalCommitPosition;
41use super::entry::LocalCommitPosition;
42use super::entry::SmallGlobalCommitPositionsVec;
43use super::entry::SmallLocalCommitPositionsVec;
44use super::mutable::DefaultMutableIndex;
45use super::revset_engine;
46use super::revset_engine::RevsetImpl;
47use crate::backend::ChangeId;
48use crate::backend::CommitId;
49use crate::graph::GraphNode;
50use crate::index::ChangeIdIndex;
51use crate::index::Index;
52use crate::index::IndexResult;
53use crate::index::MutableIndex;
54use crate::index::ReadonlyIndex;
55use crate::object_id::HexPrefix;
56use crate::object_id::ObjectId;
57use crate::object_id::PrefixResolution;
58use crate::repo_path::RepoPathBuf;
59use crate::revset::ResolvedExpression;
60use crate::revset::Revset;
61use crate::revset::RevsetEvaluationError;
62use crate::store::Store;
63
64/// Error while loading index segment file.
65#[derive(Debug, Error)]
66pub enum ReadonlyIndexLoadError {
67    #[error("Unexpected {kind} index version")]
68    UnexpectedVersion {
69        /// Index type.
70        kind: &'static str,
71        found_version: u32,
72        expected_version: u32,
73    },
74    #[error("Failed to load {kind} index file '{name}'")]
75    Other {
76        /// Index type.
77        kind: &'static str,
78        /// Index file name.
79        name: String,
80        /// Underlying error.
81        #[source]
82        error: io::Error,
83    },
84}
85
86impl ReadonlyIndexLoadError {
87    pub(super) fn invalid_data(
88        kind: &'static str,
89        name: impl Into<String>,
90        error: impl Into<Box<dyn std::error::Error + Send + Sync>>,
91    ) -> Self {
92        Self::from_io_err(
93            kind,
94            name,
95            io::Error::new(io::ErrorKind::InvalidData, error),
96        )
97    }
98
99    pub(super) fn from_io_err(
100        kind: &'static str,
101        name: impl Into<String>,
102        error: io::Error,
103    ) -> Self {
104        Self::Other {
105            kind,
106            name: name.into(),
107            error,
108        }
109    }
110
111    /// Returns true if the underlying error suggests data corruption.
112    pub(super) fn is_corrupt_or_not_found(&self) -> bool {
113        match self {
114            Self::UnexpectedVersion { .. } => true,
115            Self::Other { error, .. } => {
116                // If the parent file name field is corrupt, the file wouldn't be found.
117                // And there's no need to distinguish it from an empty file.
118                matches!(
119                    error.kind(),
120                    io::ErrorKind::NotFound
121                        | io::ErrorKind::InvalidData
122                        | io::ErrorKind::UnexpectedEof
123                )
124            }
125        }
126    }
127}
128
129/// Current format version of the commit index segment file.
130pub(super) const COMMIT_INDEX_SEGMENT_FILE_FORMAT_VERSION: u32 = 6;
131
132/// If set, the value is stored in the overflow table.
133pub(super) const OVERFLOW_FLAG: u32 = 0x8000_0000;
134
135/// Global index position of parent entry, or overflow pointer.
136#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
137struct ParentIndexPosition(u32);
138
139impl ParentIndexPosition {
140    fn as_inlined(self) -> Option<GlobalCommitPosition> {
141        (self.0 & OVERFLOW_FLAG == 0).then_some(GlobalCommitPosition(self.0))
142    }
143
144    fn as_overflow(self) -> Option<u32> {
145        (self.0 & OVERFLOW_FLAG != 0).then_some(!self.0)
146    }
147}
148
149/// Local position of entry pointed by change id, or overflow pointer.
150#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
151struct ChangeLocalPosition(u32);
152
153impl ChangeLocalPosition {
154    fn as_inlined(self) -> Option<LocalCommitPosition> {
155        (self.0 & OVERFLOW_FLAG == 0).then_some(LocalCommitPosition(self.0))
156    }
157
158    fn as_overflow(self) -> Option<u32> {
159        (self.0 & OVERFLOW_FLAG != 0).then_some(!self.0)
160    }
161}
162
163/// Lengths of fields to be serialized.
164#[derive(Clone, Copy, Debug)]
165pub(super) struct FieldLengths {
166    pub commit_id: usize,
167    pub change_id: usize,
168}
169
170struct CommitGraphEntry<'a> {
171    data: &'a [u8],
172}
173
174// TODO: Add pointers to ancestors further back, like a skip list. Clear the
175// lowest set bit to determine which generation number the pointers point to.
176impl CommitGraphEntry<'_> {
177    fn size(commit_id_length: usize) -> usize {
178        16 + commit_id_length
179    }
180
181    fn generation_number(&self) -> u32 {
182        u32::from_le_bytes(self.data[0..4].try_into().unwrap())
183    }
184
185    fn parent1_pos_or_overflow_pos(&self) -> ParentIndexPosition {
186        ParentIndexPosition(u32::from_le_bytes(self.data[4..8].try_into().unwrap()))
187    }
188
189    fn parent2_pos_or_overflow_len(&self) -> ParentIndexPosition {
190        ParentIndexPosition(u32::from_le_bytes(self.data[8..12].try_into().unwrap()))
191    }
192
193    fn change_id_lookup_pos(&self) -> u32 {
194        u32::from_le_bytes(self.data[12..16].try_into().unwrap())
195    }
196
197    fn commit_id(&self) -> CommitId {
198        CommitId::from_bytes(self.commit_id_bytes())
199    }
200
201    // might be better to add borrowed version of CommitId
202    fn commit_id_bytes(&self) -> &[u8] {
203        &self.data[16..]
204    }
205}
206
207/// Commit index segment backed by immutable file.
208///
209/// File format:
210/// ```text
211/// u32: file format version
212/// u32: parent segment file name length (0 means root)
213/// <length number of bytes>: parent segment file name
214///
215/// u32: number of local commit entries
216/// u32: number of local change ids
217/// u32: number of overflow parent entries
218/// u32: number of overflow change id positions
219/// for each entry, in some topological order with parents first:
220///   u32: generation number
221///   if number of parents <= 2:
222///     u32: (< 0x8000_0000) global index position for parent 1
223///          (==0xffff_ffff) no parent 1
224///     u32: (< 0x8000_0000) global index position for parent 2
225///          (==0xffff_ffff) no parent 2
226///   else:
227///     u32: (>=0x8000_0000) position in the overflow table, bit-negated
228///     u32: (>=0x8000_0000) number of parents (in the overflow table), bit-negated
229///   u32: change id position in the sorted change ids table
230///   <commit id length number of bytes>: commit id
231/// for each entry, sorted by commit id:
232///   u32: local position in the graph entries table
233/// for each entry, sorted by change id:
234///   <change id length number of bytes>: change id
235/// for each entry, sorted by change id:
236///   if number of associated commits == 1:
237///     u32: (< 0x8000_0000) local position in the graph entries table
238///   else:
239///     u32: (>=0x8000_0000) position in the overflow table, bit-negated
240/// for each overflow parent:
241///   u32: global index position
242/// for each overflow change id entry:
243///   u32: local position in the graph entries table
244/// ```
245///
246/// Note that u32 fields are 4-byte aligned so long as the parent file name
247/// (which is hexadecimal hash) and commit/change ids aren't of exotic length.
248// TODO: replace the table by a trie so we don't have to repeat the full commit
249//       ids
250// TODO: add a fanout table like git's commit graph has?
251pub(super) struct ReadonlyCommitIndexSegment {
252    parent_file: Option<Arc<Self>>,
253    num_parent_commits: u32,
254    id: CommitIndexSegmentId,
255    field_lengths: FieldLengths,
256    // Number of commits not counting the parent file
257    num_local_commits: u32,
258    num_local_change_ids: u32,
259    num_change_overflow_entries: u32,
260    // Base data offsets in bytes:
261    commit_lookup_base: usize,
262    change_id_table_base: usize,
263    change_pos_table_base: usize,
264    parent_overflow_base: usize,
265    change_overflow_base: usize,
266    data: Vec<u8>,
267}
268
269impl Debug for ReadonlyCommitIndexSegment {
270    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
271        f.debug_struct("ReadonlyCommitIndexSegment")
272            .field("id", &self.id)
273            .field("parent_file", &self.parent_file)
274            .finish_non_exhaustive()
275    }
276}
277
278impl ReadonlyCommitIndexSegment {
279    /// Loads both parent segments and local entries from the given file `name`.
280    pub(super) fn load(
281        dir: &Path,
282        id: CommitIndexSegmentId,
283        lengths: FieldLengths,
284    ) -> Result<Arc<Self>, ReadonlyIndexLoadError> {
285        let mut file = File::open(dir.join(id.hex()))
286            .map_err(|err| ReadonlyIndexLoadError::from_io_err("commit", id.hex(), err))?;
287        Self::load_from(&mut file, dir, id, lengths)
288    }
289
290    /// Loads both parent segments and local entries from the given `file`.
291    pub(super) fn load_from(
292        file: &mut dyn Read,
293        dir: &Path,
294        id: CommitIndexSegmentId,
295        lengths: FieldLengths,
296    ) -> Result<Arc<Self>, ReadonlyIndexLoadError> {
297        let from_io_err = |err| ReadonlyIndexLoadError::from_io_err("commit", id.hex(), err);
298        let read_u32 = |file: &mut dyn Read| {
299            let mut buf = [0; 4];
300            file.read_exact(&mut buf).map_err(from_io_err)?;
301            Ok(u32::from_le_bytes(buf))
302        };
303        let format_version = read_u32(file)?;
304        if format_version != COMMIT_INDEX_SEGMENT_FILE_FORMAT_VERSION {
305            return Err(ReadonlyIndexLoadError::UnexpectedVersion {
306                kind: "commit",
307                found_version: format_version,
308                expected_version: COMMIT_INDEX_SEGMENT_FILE_FORMAT_VERSION,
309            });
310        }
311        let parent_filename_len = read_u32(file)?;
312        let maybe_parent_file = if parent_filename_len > 0 {
313            let mut parent_filename_bytes = vec![0; parent_filename_len as usize];
314            file.read_exact(&mut parent_filename_bytes)
315                .map_err(from_io_err)?;
316            let parent_file_id = CommitIndexSegmentId::try_from_hex(parent_filename_bytes)
317                .ok_or_else(|| {
318                    ReadonlyIndexLoadError::invalid_data(
319                        "commit",
320                        id.hex(),
321                        "parent file name is not valid hex",
322                    )
323                })?;
324            let parent_file = Self::load(dir, parent_file_id, lengths)?;
325            Some(parent_file)
326        } else {
327            None
328        };
329        Self::load_with_parent_file(file, id, maybe_parent_file, lengths)
330    }
331
332    /// Loads local entries from the given `file`, returns new segment linked to
333    /// the given `parent_file`.
334    pub(super) fn load_with_parent_file(
335        file: &mut dyn Read,
336        id: CommitIndexSegmentId,
337        parent_file: Option<Arc<Self>>,
338        lengths: FieldLengths,
339    ) -> Result<Arc<Self>, ReadonlyIndexLoadError> {
340        let from_io_err = |err| ReadonlyIndexLoadError::from_io_err("commit", id.hex(), err);
341        let read_u32 = |file: &mut dyn Read| {
342            let mut buf = [0; 4];
343            file.read_exact(&mut buf).map_err(from_io_err)?;
344            Ok(u32::from_le_bytes(buf))
345        };
346        let num_parent_commits = parent_file
347            .as_ref()
348            .map_or(0, |segment| segment.as_composite().num_commits());
349        let num_local_commits = read_u32(file)?;
350        let num_local_change_ids = read_u32(file)?;
351        let num_parent_overflow_entries = read_u32(file)?;
352        let num_change_overflow_entries = read_u32(file)?;
353        let mut data = vec![];
354        file.read_to_end(&mut data).map_err(from_io_err)?;
355
356        let commit_graph_entry_size = CommitGraphEntry::size(lengths.commit_id);
357        let graph_size = (num_local_commits as usize) * commit_graph_entry_size;
358        let commit_lookup_size = (num_local_commits as usize) * 4;
359        let change_id_table_size = (num_local_change_ids as usize) * lengths.change_id;
360        let change_pos_table_size = (num_local_change_ids as usize) * 4;
361        let parent_overflow_size = (num_parent_overflow_entries as usize) * 4;
362        let change_overflow_size = (num_change_overflow_entries as usize) * 4;
363
364        let graph_base = 0;
365        let commit_lookup_base = graph_base + graph_size;
366        let change_id_table_base = commit_lookup_base + commit_lookup_size;
367        let change_pos_table_base = change_id_table_base + change_id_table_size;
368        let parent_overflow_base = change_pos_table_base + change_pos_table_size;
369        let change_overflow_base = parent_overflow_base + parent_overflow_size;
370        let expected_size = change_overflow_base + change_overflow_size;
371
372        if data.len() != expected_size {
373            return Err(ReadonlyIndexLoadError::invalid_data(
374                "commit",
375                id.hex(),
376                "unexpected data length",
377            ));
378        }
379
380        Ok(Arc::new(Self {
381            parent_file,
382            num_parent_commits,
383            id,
384            field_lengths: lengths,
385            num_local_commits,
386            num_local_change_ids,
387            num_change_overflow_entries,
388            commit_lookup_base,
389            change_id_table_base,
390            change_pos_table_base,
391            parent_overflow_base,
392            change_overflow_base,
393            data,
394        }))
395    }
396
397    pub(super) fn as_composite(&self) -> &CompositeCommitIndex {
398        CompositeCommitIndex::new(self)
399    }
400
401    pub(super) fn id(&self) -> &CommitIndexSegmentId {
402        &self.id
403    }
404
405    pub(super) fn field_lengths(&self) -> FieldLengths {
406        self.field_lengths
407    }
408
409    fn graph_entry(&self, local_pos: LocalCommitPosition) -> CommitGraphEntry<'_> {
410        let table = &self.data[..self.commit_lookup_base];
411        let entry_size = CommitGraphEntry::size(self.field_lengths.commit_id);
412        let offset = (local_pos.0 as usize) * entry_size;
413        CommitGraphEntry {
414            data: &table[offset..][..entry_size],
415        }
416    }
417
418    fn commit_lookup_pos(&self, lookup_pos: u32) -> LocalCommitPosition {
419        let table = &self.data[self.commit_lookup_base..self.change_id_table_base];
420        let offset = (lookup_pos as usize) * 4;
421        LocalCommitPosition(u32::from_le_bytes(table[offset..][..4].try_into().unwrap()))
422    }
423
424    fn change_lookup_id(&self, lookup_pos: u32) -> ChangeId {
425        ChangeId::from_bytes(self.change_lookup_id_bytes(lookup_pos))
426    }
427
428    // might be better to add borrowed version of ChangeId
429    fn change_lookup_id_bytes(&self, lookup_pos: u32) -> &[u8] {
430        let table = &self.data[self.change_id_table_base..self.change_pos_table_base];
431        let offset = (lookup_pos as usize) * self.field_lengths.change_id;
432        &table[offset..][..self.field_lengths.change_id]
433    }
434
435    fn change_lookup_pos(&self, lookup_pos: u32) -> ChangeLocalPosition {
436        let table = &self.data[self.change_pos_table_base..self.parent_overflow_base];
437        let offset = (lookup_pos as usize) * 4;
438        ChangeLocalPosition(u32::from_le_bytes(table[offset..][..4].try_into().unwrap()))
439    }
440
441    fn overflow_parents(
442        &self,
443        overflow_pos: u32,
444        num_parents: u32,
445    ) -> SmallGlobalCommitPositionsVec {
446        let table = &self.data[self.parent_overflow_base..self.change_overflow_base];
447        let offset = (overflow_pos as usize) * 4;
448        let size = (num_parents as usize) * 4;
449        let (chunks, _remainder) = table[offset..][..size].as_chunks();
450        chunks
451            .iter()
452            .map(|&chunk: &[u8; 4]| GlobalCommitPosition(u32::from_le_bytes(chunk)))
453            .collect()
454    }
455
456    /// Scans graph entry positions stored in the overflow change ids table.
457    fn overflow_changes_from(
458        &self,
459        overflow_pos: u32,
460    ) -> impl Iterator<Item = LocalCommitPosition> {
461        let table = &self.data[self.change_overflow_base..];
462        let offset = (overflow_pos as usize) * 4;
463        let (chunks, _remainder) = table[offset..].as_chunks();
464        chunks
465            .iter()
466            .map(|&chunk: &[u8; 4]| LocalCommitPosition(u32::from_le_bytes(chunk)))
467    }
468
469    /// Binary searches commit id by `prefix`. Returns the lookup position.
470    fn commit_id_byte_prefix_to_lookup_pos(&self, prefix: &[u8]) -> PositionLookupResult {
471        binary_search_pos_by(self.num_local_commits, |pos| {
472            let local_pos = self.commit_lookup_pos(pos);
473            let entry = self.graph_entry(local_pos);
474            entry.commit_id_bytes().cmp(prefix)
475        })
476    }
477
478    /// Binary searches change id by `prefix`. Returns the lookup position.
479    fn change_id_byte_prefix_to_lookup_pos(&self, prefix: &[u8]) -> PositionLookupResult {
480        binary_search_pos_by(self.num_local_change_ids, |pos| {
481            let change_id_bytes = self.change_lookup_id_bytes(pos);
482            change_id_bytes.cmp(prefix)
483        })
484    }
485}
486
487impl CommitIndexSegment for ReadonlyCommitIndexSegment {
488    fn num_parent_commits(&self) -> u32 {
489        self.num_parent_commits
490    }
491
492    fn num_local_commits(&self) -> u32 {
493        self.num_local_commits
494    }
495
496    fn parent_file(&self) -> Option<&Arc<ReadonlyCommitIndexSegment>> {
497        self.parent_file.as_ref()
498    }
499
500    fn commit_id_to_pos(&self, commit_id: &CommitId) -> Option<LocalCommitPosition> {
501        self.commit_id_byte_prefix_to_lookup_pos(commit_id.as_bytes())
502            .ok()
503            .map(|pos| self.commit_lookup_pos(pos))
504    }
505
506    fn resolve_neighbor_commit_ids(
507        &self,
508        commit_id: &CommitId,
509    ) -> (Option<CommitId>, Option<CommitId>) {
510        self.commit_id_byte_prefix_to_lookup_pos(commit_id.as_bytes())
511            .map_neighbors(|pos| {
512                let local_pos = self.commit_lookup_pos(pos);
513                let entry = self.graph_entry(local_pos);
514                entry.commit_id()
515            })
516    }
517
518    fn resolve_commit_id_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<CommitId> {
519        self.commit_id_byte_prefix_to_lookup_pos(prefix.min_prefix_bytes())
520            .prefix_matches(prefix, |pos| {
521                let local_pos = self.commit_lookup_pos(pos);
522                let entry = self.graph_entry(local_pos);
523                entry.commit_id()
524            })
525            .map(|(id, _)| id)
526    }
527
528    fn resolve_neighbor_change_ids(
529        &self,
530        change_id: &ChangeId,
531    ) -> (Option<ChangeId>, Option<ChangeId>) {
532        self.change_id_byte_prefix_to_lookup_pos(change_id.as_bytes())
533            .map_neighbors(|pos| self.change_lookup_id(pos))
534    }
535
536    fn resolve_change_id_prefix(
537        &self,
538        prefix: &HexPrefix,
539    ) -> PrefixResolution<(ChangeId, SmallLocalCommitPositionsVec)> {
540        self.change_id_byte_prefix_to_lookup_pos(prefix.min_prefix_bytes())
541            .prefix_matches(prefix, |pos| self.change_lookup_id(pos))
542            .map(|(id, lookup_pos)| {
543                let change_pos = self.change_lookup_pos(lookup_pos);
544                if let Some(local_pos) = change_pos.as_inlined() {
545                    (id, smallvec![local_pos])
546                } else {
547                    let overflow_pos = change_pos.as_overflow().unwrap();
548                    // Collect commits having the same change id. For cache
549                    // locality, it might be better to look for the next few
550                    // change id positions to determine the size.
551                    let positions: SmallLocalCommitPositionsVec = self
552                        .overflow_changes_from(overflow_pos)
553                        .take_while(|&local_pos| {
554                            let entry = self.graph_entry(local_pos);
555                            entry.change_id_lookup_pos() == lookup_pos
556                        })
557                        .collect();
558                    debug_assert_eq!(
559                        overflow_pos + u32::try_from(positions.len()).unwrap(),
560                        (lookup_pos + 1..self.num_local_change_ids)
561                            .find_map(|lookup_pos| self.change_lookup_pos(lookup_pos).as_overflow())
562                            .unwrap_or(self.num_change_overflow_entries),
563                        "all overflow positions to the next change id should be collected"
564                    );
565                    (id, positions)
566                }
567            })
568    }
569
570    fn generation_number(&self, local_pos: LocalCommitPosition) -> u32 {
571        self.graph_entry(local_pos).generation_number()
572    }
573
574    fn commit_id(&self, local_pos: LocalCommitPosition) -> CommitId {
575        self.graph_entry(local_pos).commit_id()
576    }
577
578    fn change_id(&self, local_pos: LocalCommitPosition) -> ChangeId {
579        let entry = self.graph_entry(local_pos);
580        self.change_lookup_id(entry.change_id_lookup_pos())
581    }
582
583    fn num_parents(&self, local_pos: LocalCommitPosition) -> u32 {
584        let graph_entry = self.graph_entry(local_pos);
585        let pos1_or_overflow_pos = graph_entry.parent1_pos_or_overflow_pos();
586        let pos2_or_overflow_len = graph_entry.parent2_pos_or_overflow_len();
587        let inlined_len1 = pos1_or_overflow_pos.as_inlined().is_some() as u32;
588        let inlined_len2 = pos2_or_overflow_len.as_inlined().is_some() as u32;
589        let overflow_len = pos2_or_overflow_len.as_overflow().unwrap_or(0);
590        inlined_len1 + inlined_len2 + overflow_len
591    }
592
593    fn parent_positions(&self, local_pos: LocalCommitPosition) -> SmallGlobalCommitPositionsVec {
594        let graph_entry = self.graph_entry(local_pos);
595        let pos1_or_overflow_pos = graph_entry.parent1_pos_or_overflow_pos();
596        let pos2_or_overflow_len = graph_entry.parent2_pos_or_overflow_len();
597        if let Some(pos1) = pos1_or_overflow_pos.as_inlined() {
598            if let Some(pos2) = pos2_or_overflow_len.as_inlined() {
599                smallvec![pos1, pos2]
600            } else {
601                smallvec![pos1]
602            }
603        } else {
604            let overflow_pos = pos1_or_overflow_pos.as_overflow().unwrap();
605            let num_parents = pos2_or_overflow_len.as_overflow().unwrap();
606            self.overflow_parents(overflow_pos, num_parents)
607        }
608    }
609}
610
611/// Commit index backend which stores data on local disk.
612#[derive(Clone, Debug)]
613pub struct DefaultReadonlyIndex(CompositeIndex);
614
615impl DefaultReadonlyIndex {
616    pub(super) fn from_segment(
617        commits: Arc<ReadonlyCommitIndexSegment>,
618        changed_paths: CompositeChangedPathIndex,
619    ) -> Self {
620        Self(CompositeIndex::from_readonly(commits, changed_paths))
621    }
622
623    pub(super) fn readonly_commits(&self) -> &Arc<ReadonlyCommitIndexSegment> {
624        self.0.readonly_commits().expect("must have readonly")
625    }
626
627    pub(super) fn changed_paths(&self) -> &CompositeChangedPathIndex {
628        self.0.changed_paths()
629    }
630
631    pub(super) fn has_id_impl(&self, commit_id: &CommitId) -> bool {
632        self.0.commits().has_id(commit_id)
633    }
634
635    /// Returns the number of all indexed commits.
636    pub fn num_commits(&self) -> u32 {
637        self.0.commits().num_commits()
638    }
639
640    /// Collects statistics of indexed commits and segments.
641    pub fn stats(&self) -> IndexStats {
642        let commits = self.readonly_commits();
643        let num_commits = commits.as_composite().num_commits();
644        let mut num_merges = 0;
645        let mut max_generation_number = 0;
646        let mut change_ids = HashSet::new();
647        for pos in (0..num_commits).map(GlobalCommitPosition) {
648            let entry = commits.as_composite().entry_by_pos(pos);
649            max_generation_number = max_generation_number.max(entry.generation_number());
650            if entry.num_parents() > 1 {
651                num_merges += 1;
652            }
653            change_ids.insert(entry.change_id());
654        }
655        let num_heads = u32::try_from(commits.as_composite().all_heads_pos().count()).unwrap();
656        let mut commit_levels = iter::successors(Some(commits), |segment| segment.parent_file())
657            .map(|segment| CommitIndexLevelStats {
658                num_commits: segment.num_local_commits(),
659                name: segment.id().hex(),
660            })
661            .collect_vec();
662        commit_levels.reverse();
663
664        let changed_paths = self.changed_paths();
665        let changed_path_commits_range = changed_paths
666            .start_commit_pos()
667            .map(|GlobalCommitPosition(start)| start..(start + changed_paths.num_commits()));
668        let changed_path_levels = changed_paths
669            .readonly_segments()
670            .iter()
671            .map(|segment| ChangedPathIndexLevelStats {
672                num_commits: segment.num_local_commits(),
673                num_changed_paths: segment.num_changed_paths(),
674                num_paths: segment.num_paths(),
675                name: segment.id().hex(),
676            })
677            .collect_vec();
678
679        IndexStats {
680            num_commits,
681            num_merges,
682            max_generation_number,
683            num_heads,
684            num_changes: change_ids.len().try_into().unwrap(),
685            commit_levels,
686            changed_path_commits_range,
687            changed_path_levels,
688        }
689    }
690
691    /// Looks up generation of the specified commit.
692    pub fn generation_number(&self, commit_id: &CommitId) -> Option<u32> {
693        let entry = self.0.commits().entry_by_id(commit_id)?;
694        Some(entry.generation_number())
695    }
696
697    #[doc(hidden)] // for tests
698    pub fn evaluate_revset_impl(
699        &self,
700        expression: &ResolvedExpression,
701        store: &Arc<Store>,
702    ) -> Result<DefaultReadonlyIndexRevset, RevsetEvaluationError> {
703        let inner = revset_engine::evaluate(expression, store, self.clone())?;
704        Ok(DefaultReadonlyIndexRevset { inner })
705    }
706
707    pub(super) fn start_modification(&self) -> DefaultMutableIndex {
708        DefaultMutableIndex::incremental(self)
709    }
710}
711
712impl AsCompositeIndex for DefaultReadonlyIndex {
713    fn as_composite(&self) -> &CompositeIndex {
714        &self.0
715    }
716}
717
718impl Index for DefaultReadonlyIndex {
719    fn shortest_unique_commit_id_prefix_len(&self, commit_id: &CommitId) -> IndexResult<usize> {
720        self.0.shortest_unique_commit_id_prefix_len(commit_id)
721    }
722
723    fn resolve_commit_id_prefix(
724        &self,
725        prefix: &HexPrefix,
726    ) -> IndexResult<PrefixResolution<CommitId>> {
727        self.0.resolve_commit_id_prefix(prefix)
728    }
729
730    fn has_id(&self, commit_id: &CommitId) -> IndexResult<bool> {
731        Ok(self.has_id_impl(commit_id))
732    }
733
734    fn is_ancestor(&self, ancestor_id: &CommitId, descendant_id: &CommitId) -> IndexResult<bool> {
735        self.0.is_ancestor(ancestor_id, descendant_id)
736    }
737
738    fn common_ancestors(&self, set1: &[CommitId], set2: &[CommitId]) -> IndexResult<Vec<CommitId>> {
739        self.0.common_ancestors(set1, set2)
740    }
741
742    fn all_heads_for_gc(&self) -> IndexResult<Box<dyn Iterator<Item = CommitId> + '_>> {
743        self.0.all_heads_for_gc()
744    }
745
746    fn heads(&self, candidates: &mut dyn Iterator<Item = &CommitId>) -> IndexResult<Vec<CommitId>> {
747        self.0.heads(candidates)
748    }
749
750    fn changed_paths_in_commit(
751        &self,
752        commit_id: &CommitId,
753    ) -> IndexResult<Option<Box<dyn Iterator<Item = RepoPathBuf> + '_>>> {
754        self.0.changed_paths_in_commit(commit_id)
755    }
756
757    fn evaluate_revset(
758        &self,
759        expression: &ResolvedExpression,
760        store: &Arc<Store>,
761    ) -> Result<Box<dyn Revset + '_>, RevsetEvaluationError> {
762        self.0.evaluate_revset(expression, store)
763    }
764}
765
766impl ReadonlyIndex for DefaultReadonlyIndex {
767    fn as_index(&self) -> &dyn Index {
768        self
769    }
770
771    fn change_id_index(
772        &self,
773        heads: &mut dyn Iterator<Item = &CommitId>,
774    ) -> Box<dyn ChangeIdIndex> {
775        Box::new(ChangeIdIndexImpl::new(self.clone(), heads))
776    }
777
778    fn start_modification(&self) -> Box<dyn MutableIndex> {
779        Box::new(Self::start_modification(self))
780    }
781}
782
783#[derive(Debug)]
784#[doc(hidden)] // for tests
785pub struct DefaultReadonlyIndexRevset {
786    inner: RevsetImpl<DefaultReadonlyIndex>,
787}
788
789impl DefaultReadonlyIndexRevset {
790    pub fn iter_graph_impl(
791        &self,
792        skip_transitive_edges: bool,
793    ) -> impl Iterator<Item = Result<GraphNode<CommitId>, RevsetEvaluationError>> {
794        self.inner.iter_graph_impl(skip_transitive_edges)
795    }
796
797    pub fn into_inner(self) -> Box<dyn Revset> {
798        Box::new(self.inner)
799    }
800}
801
802#[derive(Clone, Debug)]
803pub struct IndexStats {
804    pub num_commits: u32,
805    pub num_merges: u32,
806    pub max_generation_number: u32,
807    pub num_heads: u32,
808    pub num_changes: u32,
809    pub commit_levels: Vec<CommitIndexLevelStats>,
810    pub changed_path_commits_range: Option<Range<u32>>,
811    pub changed_path_levels: Vec<ChangedPathIndexLevelStats>,
812}
813
814#[derive(Clone, Debug)]
815pub struct CommitIndexLevelStats {
816    pub num_commits: u32,
817    pub name: String,
818}
819
820#[derive(Clone, Debug)]
821pub struct ChangedPathIndexLevelStats {
822    /// Number of commits.
823    pub num_commits: u32,
824    /// Sum of number of per-commit changed paths.
825    pub num_changed_paths: u32,
826    /// Number of unique paths.
827    pub num_paths: u32,
828    /// Index file name.
829    pub name: String,
830}
831
832/// Binary search result in a sorted lookup table.
833#[derive(Clone, Copy, Debug)]
834struct PositionLookupResult {
835    /// `Ok` means the element is found at the position. `Err` contains the
836    /// position where the element could be inserted.
837    result: Result<u32, u32>,
838    size: u32,
839}
840
841impl PositionLookupResult {
842    /// Returns position of the element if exactly matched.
843    fn ok(self) -> Option<u32> {
844        self.result.ok()
845    }
846
847    /// Returns `(previous, next)` positions of the matching element or
848    /// boundary.
849    fn neighbors(self) -> (Option<u32>, Option<u32>) {
850        match self.result {
851            Ok(pos) => (pos.checked_sub(1), (pos + 1..self.size).next()),
852            Err(pos) => (pos.checked_sub(1), (pos..self.size).next()),
853        }
854    }
855
856    /// Looks up `(previous, next)` elements by the given function.
857    fn map_neighbors<T>(self, mut lookup: impl FnMut(u32) -> T) -> (Option<T>, Option<T>) {
858        let (prev_pos, next_pos) = self.neighbors();
859        (prev_pos.map(&mut lookup), next_pos.map(&mut lookup))
860    }
861
862    /// Looks up matching elements from the current position, returns one if
863    /// the given `prefix` unambiguously matches.
864    fn prefix_matches<T: ObjectId>(
865        self,
866        prefix: &HexPrefix,
867        lookup: impl FnMut(u32) -> T,
868    ) -> PrefixResolution<(T, u32)> {
869        let lookup_pos = self.result.unwrap_or_else(|pos| pos);
870        let mut matches = (lookup_pos..self.size)
871            .map(lookup)
872            .take_while(|id| prefix.matches(id))
873            .fuse();
874        match (matches.next(), matches.next()) {
875            (Some(id), None) => PrefixResolution::SingleMatch((id, lookup_pos)),
876            (Some(_), Some(_)) => PrefixResolution::AmbiguousMatch,
877            (None, _) => PrefixResolution::NoMatch,
878        }
879    }
880}
881
882/// Binary searches u32 position with the given comparison function.
883fn binary_search_pos_by(size: u32, mut f: impl FnMut(u32) -> Ordering) -> PositionLookupResult {
884    let mut low = 0;
885    let mut high = size;
886    while low < high {
887        let mid = (low + high) / 2;
888        let cmp = f(mid);
889        // According to Rust std lib, this produces cmov instructions.
890        // https://github.com/rust-lang/rust/blob/1.76.0/library/core/src/slice/mod.rs#L2845-L2855
891        low = if cmp == Ordering::Less { mid + 1 } else { low };
892        high = if cmp == Ordering::Greater { mid } else { high };
893        if cmp == Ordering::Equal {
894            let result = Ok(mid);
895            return PositionLookupResult { result, size };
896        }
897    }
898    let result = Err(low);
899    PositionLookupResult { result, size }
900}