gix_blame/
types.rs

1use crate::file::function::tokens_for_diffing;
2use gix_hash::ObjectId;
3use gix_object::bstr::BString;
4use std::num::NonZeroU32;
5use std::{
6    collections::BTreeMap,
7    ops::{AddAssign, Range, SubAssign},
8};
9
10/// The outcome of [`file()`](crate::file()).
11#[derive(Debug, Default, Clone)]
12pub struct Outcome {
13    /// One entry in sequential order, to associate a hunk in the blamed file with the source commit (and its lines)
14    /// that introduced it.
15    pub entries: Vec<BlameEntry>,
16    /// A buffer with the file content of the *Blamed File*, ready for tokenization.
17    pub blob: Vec<u8>,
18    /// Additional information about the amount of work performed to produce the blame.
19    pub statistics: Statistics,
20}
21
22/// Additional information about the performed operations.
23#[derive(Debug, Default, Copy, Clone)]
24pub struct Statistics {
25    /// The amount of commits it traversed until the blame was complete.
26    pub commits_traversed: usize,
27    /// The amount of commits whose trees were extracted.
28    pub commits_to_tree: usize,
29    /// The amount of trees that were decoded to find the entry of the file to blame.
30    pub trees_decoded: usize,
31    /// The amount of tree-diffs to see if the filepath was added, deleted or modified. These diffs
32    /// are likely partial as they are cancelled as soon as a change to the blamed file is
33    /// detected.
34    pub trees_diffed: usize,
35    /// The amount of blobs there were compared to each other to learn what changed between commits.
36    /// Note that in order to diff a blob, one needs to load both versions from the database.
37    pub blobs_diffed: usize,
38}
39
40impl Outcome {
41    /// Return an iterator over each entry in [`Self::entries`], along with its lines, line by line.
42    ///
43    /// Note that [`Self::blob`] must be tokenized in exactly the same way as the tokenizer that was used
44    /// to perform the diffs, which is what this method assures.
45    pub fn entries_with_lines(&self) -> impl Iterator<Item = (BlameEntry, Vec<BString>)> + '_ {
46        use gix_diff::blob::intern::TokenSource;
47        let mut interner = gix_diff::blob::intern::Interner::new(self.blob.len() / 100);
48        let lines_as_tokens: Vec<_> = tokens_for_diffing(&self.blob)
49            .tokenize()
50            .map(|token| interner.intern(token))
51            .collect();
52        self.entries.iter().map(move |e| {
53            (
54                e.clone(),
55                lines_as_tokens[e.range_in_blamed_file()]
56                    .iter()
57                    .map(|token| BString::new(interner[*token].into()))
58                    .collect(),
59            )
60        })
61    }
62}
63
64/// Describes the offset of a particular hunk relative to the *Blamed File*.
65#[derive(Clone, Copy, Debug, PartialEq)]
66pub enum Offset {
67    /// The amount of lines to add.
68    Added(u32),
69    /// The amount of lines to remove.
70    Deleted(u32),
71}
72
73impl Offset {
74    /// Shift the given `range` according to our offset.
75    pub fn shifted_range(&self, range: &Range<u32>) -> Range<u32> {
76        match self {
77            Offset::Added(added) => {
78                debug_assert!(range.start >= *added, "{self:?} {range:?}");
79                Range {
80                    start: range.start - added,
81                    end: range.end - added,
82                }
83            }
84            Offset::Deleted(deleted) => Range {
85                start: range.start + deleted,
86                end: range.end + deleted,
87            },
88        }
89    }
90}
91
92impl AddAssign<u32> for Offset {
93    fn add_assign(&mut self, rhs: u32) {
94        match self {
95            Self::Added(added) => *self = Self::Added(*added + rhs),
96            Self::Deleted(deleted) => {
97                if rhs > *deleted {
98                    *self = Self::Added(rhs - *deleted);
99                } else {
100                    *self = Self::Deleted(*deleted - rhs);
101                }
102            }
103        }
104    }
105}
106
107impl SubAssign<u32> for Offset {
108    fn sub_assign(&mut self, rhs: u32) {
109        match self {
110            Self::Added(added) => {
111                if rhs > *added {
112                    *self = Self::Deleted(rhs - *added);
113                } else {
114                    *self = Self::Added(*added - rhs);
115                }
116            }
117            Self::Deleted(deleted) => *self = Self::Deleted(*deleted + rhs),
118        }
119    }
120}
121
122/// A mapping of a section of the *Blamed File* to the section in a *Source File* that introduced it.
123///
124/// Both ranges are of the same size, but may use different [starting points](Range::start). Naturally,
125/// they have the same content, which is the reason they are in what is returned by [`file()`](crate::file()).
126#[derive(Clone, Debug, PartialEq)]
127pub struct BlameEntry {
128    /// The index of the token in the *Blamed File* (typically lines) where this entry begins.
129    pub start_in_blamed_file: u32,
130    /// The index of the token in the *Source File* (typically lines) where this entry begins.
131    ///
132    /// This is possibly offset compared to `start_in_blamed_file`.
133    pub start_in_source_file: u32,
134    /// The amount of lines the hunk is spanning.
135    pub len: NonZeroU32,
136    /// The commit that introduced the section into the *Source File*.
137    pub commit_id: ObjectId,
138}
139
140impl BlameEntry {
141    /// Create a new instance.
142    pub fn new(range_in_blamed_file: Range<u32>, range_in_source_file: Range<u32>, commit_id: ObjectId) -> Self {
143        debug_assert!(
144            range_in_blamed_file.end > range_in_blamed_file.start,
145            "{range_in_blamed_file:?}"
146        );
147        debug_assert!(
148            range_in_source_file.end > range_in_source_file.start,
149            "{range_in_source_file:?}"
150        );
151        debug_assert_eq!(range_in_source_file.len(), range_in_blamed_file.len());
152
153        Self {
154            start_in_blamed_file: range_in_blamed_file.start,
155            start_in_source_file: range_in_source_file.start,
156            len: NonZeroU32::new(range_in_blamed_file.len() as u32).expect("BUG: hunks are never empty"),
157            commit_id,
158        }
159    }
160}
161
162impl BlameEntry {
163    /// Return the range of tokens this entry spans in the *Blamed File*.
164    pub fn range_in_blamed_file(&self) -> Range<usize> {
165        let start = self.start_in_blamed_file as usize;
166        start..start + self.len.get() as usize
167    }
168    /// Return the range of tokens this entry spans in the *Source File*.
169    pub fn range_in_source_file(&self) -> Range<usize> {
170        let start = self.start_in_source_file as usize;
171        start..start + self.len.get() as usize
172    }
173}
174
175pub(crate) trait LineRange {
176    fn shift_by(&self, offset: Offset) -> Self;
177}
178
179impl LineRange for Range<u32> {
180    fn shift_by(&self, offset: Offset) -> Self {
181        offset.shifted_range(self)
182    }
183}
184
185/// Tracks the hunks in the *Blamed File* that are not yet associated with the commit that introduced them.
186#[derive(Debug, PartialEq)]
187pub struct UnblamedHunk {
188    /// The range in the file that is being blamed that this hunk represents.
189    pub range_in_blamed_file: Range<u32>,
190    /// Maps a commit to the range in a source file (i.e. *Blamed File* at a revision) that is equal to `range_in_blamed_file`.
191    pub suspects: BTreeMap<ObjectId, Range<u32>>,
192}
193
194#[derive(Debug)]
195pub(crate) enum Either<T, U> {
196    Left(T),
197    Right(U),
198}
199
200/// A single change between two blobs, or an unchanged region.
201#[derive(Debug, PartialEq)]
202pub enum Change {
203    /// A range of tokens that wasn't changed.
204    Unchanged(Range<u32>),
205    /// `(added_line_range, num_deleted_in_before)`
206    AddedOrReplaced(Range<u32>, u32),
207    /// `(line_to_start_deletion_at, num_deleted_in_before)`
208    Deleted(u32, u32),
209}