gix_blame/types.rs
1use crate::file::function::tokens_for_diffing;
2use gix_hash::ObjectId;
3use gix_object::bstr::BString;
4use std::num::NonZeroU32;
5use std::{
6 collections::BTreeMap,
7 ops::{AddAssign, Range, SubAssign},
8};
9
10/// Options to be passed to [`file()`](crate::file()).
11#[derive(Default, Debug, Clone)]
12pub struct Options {
13 /// The algorithm to use for diffing.
14 pub diff_algorithm: gix_diff::blob::Algorithm,
15 /// A 1-based inclusive range, in order to mirror `git`’s behaviour. `Some(20..40)` represents
16 /// 21 lines, spanning from line 20 up to and including line 40. This will be converted to
17 /// `19..40` internally as the algorithm uses 0-based ranges that are exclusive at the end.
18 pub range: Option<std::ops::Range<u32>>,
19 /// Don't consider commits before the given date.
20 pub since: Option<gix_date::Time>,
21}
22
23/// The outcome of [`file()`](crate::file()).
24#[derive(Debug, Default, Clone)]
25pub struct Outcome {
26 /// One entry in sequential order, to associate a hunk in the blamed file with the source commit (and its lines)
27 /// that introduced it.
28 pub entries: Vec<BlameEntry>,
29 /// A buffer with the file content of the *Blamed File*, ready for tokenization.
30 pub blob: Vec<u8>,
31 /// Additional information about the amount of work performed to produce the blame.
32 pub statistics: Statistics,
33}
34
35/// Additional information about the performed operations.
36#[derive(Debug, Default, Copy, Clone)]
37pub struct Statistics {
38 /// The amount of commits it traversed until the blame was complete.
39 pub commits_traversed: usize,
40 /// The amount of trees that were decoded to find the entry of the file to blame.
41 pub trees_decoded: usize,
42 /// The amount of tree-diffs to see if the filepath was added, deleted or modified. These diffs
43 /// are likely partial as they are cancelled as soon as a change to the blamed file is
44 /// detected.
45 pub trees_diffed: usize,
46 /// The amount of blobs there were compared to each other to learn what changed between commits.
47 /// Note that in order to diff a blob, one needs to load both versions from the database.
48 pub blobs_diffed: usize,
49}
50
51impl Outcome {
52 /// Return an iterator over each entry in [`Self::entries`], along with its lines, line by line.
53 ///
54 /// Note that [`Self::blob`] must be tokenized in exactly the same way as the tokenizer that was used
55 /// to perform the diffs, which is what this method assures.
56 pub fn entries_with_lines(&self) -> impl Iterator<Item = (BlameEntry, Vec<BString>)> + '_ {
57 use gix_diff::blob::intern::TokenSource;
58 let mut interner = gix_diff::blob::intern::Interner::new(self.blob.len() / 100);
59 let lines_as_tokens: Vec<_> = tokens_for_diffing(&self.blob)
60 .tokenize()
61 .map(|token| interner.intern(token))
62 .collect();
63 self.entries.iter().map(move |e| {
64 (
65 e.clone(),
66 lines_as_tokens[e.range_in_blamed_file()]
67 .iter()
68 .map(|token| BString::new(interner[*token].into()))
69 .collect(),
70 )
71 })
72 }
73}
74
75/// Describes the offset of a particular hunk relative to the *Blamed File*.
76#[derive(Clone, Copy, Debug, PartialEq)]
77pub enum Offset {
78 /// The amount of lines to add.
79 Added(u32),
80 /// The amount of lines to remove.
81 Deleted(u32),
82}
83
84impl Offset {
85 /// Shift the given `range` according to our offset.
86 pub fn shifted_range(&self, range: &Range<u32>) -> Range<u32> {
87 match self {
88 Offset::Added(added) => {
89 debug_assert!(range.start >= *added, "{self:?} {range:?}");
90 Range {
91 start: range.start - added,
92 end: range.end - added,
93 }
94 }
95 Offset::Deleted(deleted) => Range {
96 start: range.start + deleted,
97 end: range.end + deleted,
98 },
99 }
100 }
101}
102
103impl AddAssign<u32> for Offset {
104 fn add_assign(&mut self, rhs: u32) {
105 match self {
106 Self::Added(added) => *self = Self::Added(*added + rhs),
107 Self::Deleted(deleted) => {
108 if rhs > *deleted {
109 *self = Self::Added(rhs - *deleted);
110 } else {
111 *self = Self::Deleted(*deleted - rhs);
112 }
113 }
114 }
115 }
116}
117
118impl SubAssign<u32> for Offset {
119 fn sub_assign(&mut self, rhs: u32) {
120 match self {
121 Self::Added(added) => {
122 if rhs > *added {
123 *self = Self::Deleted(rhs - *added);
124 } else {
125 *self = Self::Added(*added - rhs);
126 }
127 }
128 Self::Deleted(deleted) => *self = Self::Deleted(*deleted + rhs),
129 }
130 }
131}
132
133/// A mapping of a section of the *Blamed File* to the section in a *Source File* that introduced it.
134///
135/// Both ranges are of the same size, but may use different [starting points](Range::start). Naturally,
136/// they have the same content, which is the reason they are in what is returned by [`file()`](crate::file()).
137#[derive(Clone, Debug, PartialEq)]
138pub struct BlameEntry {
139 /// The index of the token in the *Blamed File* (typically lines) where this entry begins.
140 pub start_in_blamed_file: u32,
141 /// The index of the token in the *Source File* (typically lines) where this entry begins.
142 ///
143 /// This is possibly offset compared to `start_in_blamed_file`.
144 pub start_in_source_file: u32,
145 /// The amount of lines the hunk is spanning.
146 pub len: NonZeroU32,
147 /// The commit that introduced the section into the *Source File*.
148 pub commit_id: ObjectId,
149}
150
151impl BlameEntry {
152 /// Create a new instance.
153 pub fn new(range_in_blamed_file: Range<u32>, range_in_source_file: Range<u32>, commit_id: ObjectId) -> Self {
154 debug_assert!(
155 range_in_blamed_file.end > range_in_blamed_file.start,
156 "{range_in_blamed_file:?}"
157 );
158 debug_assert!(
159 range_in_source_file.end > range_in_source_file.start,
160 "{range_in_source_file:?}"
161 );
162 debug_assert_eq!(range_in_source_file.len(), range_in_blamed_file.len());
163
164 Self {
165 start_in_blamed_file: range_in_blamed_file.start,
166 start_in_source_file: range_in_source_file.start,
167 len: NonZeroU32::new(range_in_blamed_file.len() as u32).expect("BUG: hunks are never empty"),
168 commit_id,
169 }
170 }
171}
172
173impl BlameEntry {
174 /// Return the range of tokens this entry spans in the *Blamed File*.
175 pub fn range_in_blamed_file(&self) -> Range<usize> {
176 let start = self.start_in_blamed_file as usize;
177 start..start + self.len.get() as usize
178 }
179 /// Return the range of tokens this entry spans in the *Source File*.
180 pub fn range_in_source_file(&self) -> Range<usize> {
181 let start = self.start_in_source_file as usize;
182 start..start + self.len.get() as usize
183 }
184}
185
186pub(crate) trait LineRange {
187 fn shift_by(&self, offset: Offset) -> Self;
188}
189
190impl LineRange for Range<u32> {
191 fn shift_by(&self, offset: Offset) -> Self {
192 offset.shifted_range(self)
193 }
194}
195
196/// Tracks the hunks in the *Blamed File* that are not yet associated with the commit that introduced them.
197#[derive(Debug, PartialEq)]
198pub struct UnblamedHunk {
199 /// The range in the file that is being blamed that this hunk represents.
200 pub range_in_blamed_file: Range<u32>,
201 /// Maps a commit to the range in a source file (i.e. *Blamed File* at a revision) that is equal to `range_in_blamed_file`.
202 pub suspects: BTreeMap<ObjectId, Range<u32>>,
203}
204
205#[derive(Debug)]
206pub(crate) enum Either<T, U> {
207 Left(T),
208 Right(U),
209}
210
211/// A single change between two blobs, or an unchanged region.
212#[derive(Debug, PartialEq)]
213pub enum Change {
214 /// A range of tokens that wasn't changed.
215 Unchanged(Range<u32>),
216 /// `(added_line_range, num_deleted_in_before)`
217 AddedOrReplaced(Range<u32>, u32),
218 /// `(line_to_start_deletion_at, num_deleted_in_before)`
219 Deleted(u32, u32),
220}