gix_blame/
types.rs

1use crate::file::function::tokens_for_diffing;
2use gix_hash::ObjectId;
3use gix_object::bstr::BString;
4use std::num::NonZeroU32;
5use std::{
6    collections::BTreeMap,
7    ops::{AddAssign, Range, SubAssign},
8};
9
10/// Options to be passed to [`file()`](crate::file()).
11#[derive(Default, Debug, Clone)]
12pub struct Options {
13    /// The algorithm to use for diffing.
14    pub diff_algorithm: gix_diff::blob::Algorithm,
15    /// A 1-based inclusive range, in order to mirror `git`’s behaviour. `Some(20..40)` represents
16    /// 21 lines, spanning from line 20 up to and including line 40. This will be converted to
17    /// `19..40` internally as the algorithm uses 0-based ranges that are exclusive at the end.
18    pub range: Option<std::ops::Range<u32>>,
19    /// Don't consider commits before the given date.
20    pub since: Option<gix_date::Time>,
21}
22
23/// The outcome of [`file()`](crate::file()).
24#[derive(Debug, Default, Clone)]
25pub struct Outcome {
26    /// One entry in sequential order, to associate a hunk in the blamed file with the source commit (and its lines)
27    /// that introduced it.
28    pub entries: Vec<BlameEntry>,
29    /// A buffer with the file content of the *Blamed File*, ready for tokenization.
30    pub blob: Vec<u8>,
31    /// Additional information about the amount of work performed to produce the blame.
32    pub statistics: Statistics,
33}
34
35/// Additional information about the performed operations.
36#[derive(Debug, Default, Copy, Clone)]
37pub struct Statistics {
38    /// The amount of commits it traversed until the blame was complete.
39    pub commits_traversed: usize,
40    /// The amount of trees that were decoded to find the entry of the file to blame.
41    pub trees_decoded: usize,
42    /// The amount of tree-diffs to see if the filepath was added, deleted or modified. These diffs
43    /// are likely partial as they are cancelled as soon as a change to the blamed file is
44    /// detected.
45    pub trees_diffed: usize,
46    /// The amount of blobs there were compared to each other to learn what changed between commits.
47    /// Note that in order to diff a blob, one needs to load both versions from the database.
48    pub blobs_diffed: usize,
49}
50
51impl Outcome {
52    /// Return an iterator over each entry in [`Self::entries`], along with its lines, line by line.
53    ///
54    /// Note that [`Self::blob`] must be tokenized in exactly the same way as the tokenizer that was used
55    /// to perform the diffs, which is what this method assures.
56    pub fn entries_with_lines(&self) -> impl Iterator<Item = (BlameEntry, Vec<BString>)> + '_ {
57        use gix_diff::blob::intern::TokenSource;
58        let mut interner = gix_diff::blob::intern::Interner::new(self.blob.len() / 100);
59        let lines_as_tokens: Vec<_> = tokens_for_diffing(&self.blob)
60            .tokenize()
61            .map(|token| interner.intern(token))
62            .collect();
63        self.entries.iter().map(move |e| {
64            (
65                e.clone(),
66                lines_as_tokens[e.range_in_blamed_file()]
67                    .iter()
68                    .map(|token| BString::new(interner[*token].into()))
69                    .collect(),
70            )
71        })
72    }
73}
74
75/// Describes the offset of a particular hunk relative to the *Blamed File*.
76#[derive(Clone, Copy, Debug, PartialEq)]
77pub enum Offset {
78    /// The amount of lines to add.
79    Added(u32),
80    /// The amount of lines to remove.
81    Deleted(u32),
82}
83
84impl Offset {
85    /// Shift the given `range` according to our offset.
86    pub fn shifted_range(&self, range: &Range<u32>) -> Range<u32> {
87        match self {
88            Offset::Added(added) => {
89                debug_assert!(range.start >= *added, "{self:?} {range:?}");
90                Range {
91                    start: range.start - added,
92                    end: range.end - added,
93                }
94            }
95            Offset::Deleted(deleted) => Range {
96                start: range.start + deleted,
97                end: range.end + deleted,
98            },
99        }
100    }
101}
102
103impl AddAssign<u32> for Offset {
104    fn add_assign(&mut self, rhs: u32) {
105        match self {
106            Self::Added(added) => *self = Self::Added(*added + rhs),
107            Self::Deleted(deleted) => {
108                if rhs > *deleted {
109                    *self = Self::Added(rhs - *deleted);
110                } else {
111                    *self = Self::Deleted(*deleted - rhs);
112                }
113            }
114        }
115    }
116}
117
118impl SubAssign<u32> for Offset {
119    fn sub_assign(&mut self, rhs: u32) {
120        match self {
121            Self::Added(added) => {
122                if rhs > *added {
123                    *self = Self::Deleted(rhs - *added);
124                } else {
125                    *self = Self::Added(*added - rhs);
126                }
127            }
128            Self::Deleted(deleted) => *self = Self::Deleted(*deleted + rhs),
129        }
130    }
131}
132
133/// A mapping of a section of the *Blamed File* to the section in a *Source File* that introduced it.
134///
135/// Both ranges are of the same size, but may use different [starting points](Range::start). Naturally,
136/// they have the same content, which is the reason they are in what is returned by [`file()`](crate::file()).
137#[derive(Clone, Debug, PartialEq)]
138pub struct BlameEntry {
139    /// The index of the token in the *Blamed File* (typically lines) where this entry begins.
140    pub start_in_blamed_file: u32,
141    /// The index of the token in the *Source File* (typically lines) where this entry begins.
142    ///
143    /// This is possibly offset compared to `start_in_blamed_file`.
144    pub start_in_source_file: u32,
145    /// The amount of lines the hunk is spanning.
146    pub len: NonZeroU32,
147    /// The commit that introduced the section into the *Source File*.
148    pub commit_id: ObjectId,
149}
150
151impl BlameEntry {
152    /// Create a new instance.
153    pub fn new(range_in_blamed_file: Range<u32>, range_in_source_file: Range<u32>, commit_id: ObjectId) -> Self {
154        debug_assert!(
155            range_in_blamed_file.end > range_in_blamed_file.start,
156            "{range_in_blamed_file:?}"
157        );
158        debug_assert!(
159            range_in_source_file.end > range_in_source_file.start,
160            "{range_in_source_file:?}"
161        );
162        debug_assert_eq!(range_in_source_file.len(), range_in_blamed_file.len());
163
164        Self {
165            start_in_blamed_file: range_in_blamed_file.start,
166            start_in_source_file: range_in_source_file.start,
167            len: NonZeroU32::new(range_in_blamed_file.len() as u32).expect("BUG: hunks are never empty"),
168            commit_id,
169        }
170    }
171}
172
173impl BlameEntry {
174    /// Return the range of tokens this entry spans in the *Blamed File*.
175    pub fn range_in_blamed_file(&self) -> Range<usize> {
176        let start = self.start_in_blamed_file as usize;
177        start..start + self.len.get() as usize
178    }
179    /// Return the range of tokens this entry spans in the *Source File*.
180    pub fn range_in_source_file(&self) -> Range<usize> {
181        let start = self.start_in_source_file as usize;
182        start..start + self.len.get() as usize
183    }
184}
185
186pub(crate) trait LineRange {
187    fn shift_by(&self, offset: Offset) -> Self;
188}
189
190impl LineRange for Range<u32> {
191    fn shift_by(&self, offset: Offset) -> Self {
192        offset.shifted_range(self)
193    }
194}
195
196/// Tracks the hunks in the *Blamed File* that are not yet associated with the commit that introduced them.
197#[derive(Debug, PartialEq)]
198pub struct UnblamedHunk {
199    /// The range in the file that is being blamed that this hunk represents.
200    pub range_in_blamed_file: Range<u32>,
201    /// Maps a commit to the range in a source file (i.e. *Blamed File* at a revision) that is equal to `range_in_blamed_file`.
202    pub suspects: BTreeMap<ObjectId, Range<u32>>,
203}
204
205#[derive(Debug)]
206pub(crate) enum Either<T, U> {
207    Left(T),
208    Right(U),
209}
210
211/// A single change between two blobs, or an unchanged region.
212#[derive(Debug, PartialEq)]
213pub enum Change {
214    /// A range of tokens that wasn't changed.
215    Unchanged(Range<u32>),
216    /// `(added_line_range, num_deleted_in_before)`
217    AddedOrReplaced(Range<u32>, u32),
218    /// `(line_to_start_deletion_at, num_deleted_in_before)`
219    Deleted(u32, u32),
220}