gix_blame/
types.rs

1use std::{
2    num::NonZeroU32,
3    ops::{AddAssign, Range, SubAssign},
4};
5
6use gix_hash::ObjectId;
7use gix_object::bstr::BString;
8use smallvec::SmallVec;
9
10use crate::file::function::tokens_for_diffing;
11
12/// Options to be passed to [`file()`](crate::file()).
13#[derive(Default, Debug, Clone)]
14pub struct Options {
15    /// The algorithm to use for diffing.
16    pub diff_algorithm: gix_diff::blob::Algorithm,
17    /// A 1-based inclusive range, in order to mirror `git`’s behaviour. `Some(20..40)` represents
18    /// 21 lines, spanning from line 20 up to and including line 40. This will be converted to
19    /// `19..40` internally as the algorithm uses 0-based ranges that are exclusive at the end.
20    pub range: Option<std::ops::Range<u32>>,
21    /// Don't consider commits before the given date.
22    pub since: Option<gix_date::Time>,
23}
24
25/// The outcome of [`file()`](crate::file()).
26#[derive(Debug, Default, Clone)]
27pub struct Outcome {
28    /// One entry in sequential order, to associate a hunk in the blamed file with the source commit (and its lines)
29    /// that introduced it.
30    pub entries: Vec<BlameEntry>,
31    /// A buffer with the file content of the *Blamed File*, ready for tokenization.
32    pub blob: Vec<u8>,
33    /// Additional information about the amount of work performed to produce the blame.
34    pub statistics: Statistics,
35}
36
37/// Additional information about the performed operations.
38#[derive(Debug, Default, Copy, Clone)]
39pub struct Statistics {
40    /// The amount of commits it traversed until the blame was complete.
41    pub commits_traversed: usize,
42    /// The amount of trees that were decoded to find the entry of the file to blame.
43    pub trees_decoded: usize,
44    /// The amount of tree-diffs to see if the filepath was added, deleted or modified. These diffs
45    /// are likely partial as they are cancelled as soon as a change to the blamed file is
46    /// detected.
47    pub trees_diffed: usize,
48    /// The amount of blobs there were compared to each other to learn what changed between commits.
49    /// Note that in order to diff a blob, one needs to load both versions from the database.
50    pub blobs_diffed: usize,
51}
52
53impl Outcome {
54    /// Return an iterator over each entry in [`Self::entries`], along with its lines, line by line.
55    ///
56    /// Note that [`Self::blob`] must be tokenized in exactly the same way as the tokenizer that was used
57    /// to perform the diffs, which is what this method assures.
58    pub fn entries_with_lines(&self) -> impl Iterator<Item = (BlameEntry, Vec<BString>)> + '_ {
59        use gix_diff::blob::intern::TokenSource;
60        let mut interner = gix_diff::blob::intern::Interner::new(self.blob.len() / 100);
61        let lines_as_tokens: Vec<_> = tokens_for_diffing(&self.blob)
62            .tokenize()
63            .map(|token| interner.intern(token))
64            .collect();
65        self.entries.iter().map(move |e| {
66            (
67                e.clone(),
68                lines_as_tokens[e.range_in_blamed_file()]
69                    .iter()
70                    .map(|token| BString::new(interner[*token].into()))
71                    .collect(),
72            )
73        })
74    }
75}
76
77/// Describes the offset of a particular hunk relative to the *Blamed File*.
78#[derive(Clone, Copy, Debug, PartialEq)]
79pub enum Offset {
80    /// The amount of lines to add.
81    Added(u32),
82    /// The amount of lines to remove.
83    Deleted(u32),
84}
85
86impl Offset {
87    /// Shift the given `range` according to our offset.
88    pub fn shifted_range(&self, range: &Range<u32>) -> Range<u32> {
89        match self {
90            Offset::Added(added) => {
91                debug_assert!(range.start >= *added, "{self:?} {range:?}");
92                Range {
93                    start: range.start - added,
94                    end: range.end - added,
95                }
96            }
97            Offset::Deleted(deleted) => Range {
98                start: range.start + deleted,
99                end: range.end + deleted,
100            },
101        }
102    }
103}
104
105impl AddAssign<u32> for Offset {
106    fn add_assign(&mut self, rhs: u32) {
107        match self {
108            Self::Added(added) => *self = Self::Added(*added + rhs),
109            Self::Deleted(deleted) => {
110                if rhs > *deleted {
111                    *self = Self::Added(rhs - *deleted);
112                } else {
113                    *self = Self::Deleted(*deleted - rhs);
114                }
115            }
116        }
117    }
118}
119
120impl SubAssign<u32> for Offset {
121    fn sub_assign(&mut self, rhs: u32) {
122        match self {
123            Self::Added(added) => {
124                if rhs > *added {
125                    *self = Self::Deleted(rhs - *added);
126                } else {
127                    *self = Self::Added(*added - rhs);
128                }
129            }
130            Self::Deleted(deleted) => *self = Self::Deleted(*deleted + rhs),
131        }
132    }
133}
134
135/// A mapping of a section of the *Blamed File* to the section in a *Source File* that introduced it.
136///
137/// Both ranges are of the same size, but may use different [starting points](Range::start). Naturally,
138/// they have the same content, which is the reason they are in what is returned by [`file()`](crate::file()).
139#[derive(Clone, Debug, PartialEq)]
140pub struct BlameEntry {
141    /// The index of the token in the *Blamed File* (typically lines) where this entry begins.
142    pub start_in_blamed_file: u32,
143    /// The index of the token in the *Source File* (typically lines) where this entry begins.
144    ///
145    /// This is possibly offset compared to `start_in_blamed_file`.
146    pub start_in_source_file: u32,
147    /// The amount of lines the hunk is spanning.
148    pub len: NonZeroU32,
149    /// The commit that introduced the section into the *Source File*.
150    pub commit_id: ObjectId,
151}
152
153impl BlameEntry {
154    /// Create a new instance.
155    pub fn new(range_in_blamed_file: Range<u32>, range_in_source_file: Range<u32>, commit_id: ObjectId) -> Self {
156        debug_assert!(
157            range_in_blamed_file.end > range_in_blamed_file.start,
158            "{range_in_blamed_file:?}"
159        );
160        debug_assert!(
161            range_in_source_file.end > range_in_source_file.start,
162            "{range_in_source_file:?}"
163        );
164        debug_assert_eq!(range_in_source_file.len(), range_in_blamed_file.len());
165
166        Self {
167            start_in_blamed_file: range_in_blamed_file.start,
168            start_in_source_file: range_in_source_file.start,
169            len: NonZeroU32::new(range_in_blamed_file.len() as u32).expect("BUG: hunks are never empty"),
170            commit_id,
171        }
172    }
173}
174
175impl BlameEntry {
176    /// Return the range of tokens this entry spans in the *Blamed File*.
177    pub fn range_in_blamed_file(&self) -> Range<usize> {
178        let start = self.start_in_blamed_file as usize;
179        start..start + self.len.get() as usize
180    }
181    /// Return the range of tokens this entry spans in the *Source File*.
182    pub fn range_in_source_file(&self) -> Range<usize> {
183        let start = self.start_in_source_file as usize;
184        start..start + self.len.get() as usize
185    }
186}
187
188pub(crate) trait LineRange {
189    fn shift_by(&self, offset: Offset) -> Self;
190}
191
192impl LineRange for Range<u32> {
193    fn shift_by(&self, offset: Offset) -> Self {
194        offset.shifted_range(self)
195    }
196}
197
198/// Tracks the hunks in the *Blamed File* that are not yet associated with the commit that introduced them.
199#[derive(Debug, PartialEq)]
200pub struct UnblamedHunk {
201    /// The range in the file that is being blamed that this hunk represents.
202    pub range_in_blamed_file: Range<u32>,
203    /// Maps a commit to the range in a source file (i.e. *Blamed File* at a revision) that is
204    /// equal to `range_in_blamed_file`. Since `suspects` rarely contains more than 1 item, it can
205    /// efficiently be stored as a `SmallVec`.
206    pub suspects: SmallVec<[(ObjectId, Range<u32>); 1]>,
207}
208
209impl UnblamedHunk {
210    pub(crate) fn has_suspect(&self, suspect: &ObjectId) -> bool {
211        self.suspects.iter().any(|entry| entry.0 == *suspect)
212    }
213
214    pub(crate) fn get_range(&self, suspect: &ObjectId) -> Option<&Range<u32>> {
215        self.suspects
216            .iter()
217            .find(|entry| entry.0 == *suspect)
218            .map(|entry| &entry.1)
219    }
220}
221
222#[derive(Debug)]
223pub(crate) enum Either<T, U> {
224    Left(T),
225    Right(U),
226}
227
228/// A single change between two blobs, or an unchanged region.
229#[derive(Debug, PartialEq)]
230pub enum Change {
231    /// A range of tokens that wasn't changed.
232    Unchanged(Range<u32>),
233    /// `(added_line_range, num_deleted_in_before)`
234    AddedOrReplaced(Range<u32>, u32),
235    /// `(line_to_start_deletion_at, num_deleted_in_before)`
236    Deleted(u32, u32),
237}