1use std::collections::HashMap;
22use std::collections::hash_map;
23use std::iter;
24use std::ops::Range;
25use std::rc::Rc;
26
27use bstr::BStr;
28use bstr::BString;
29use itertools::Itertools as _;
30use pollster::FutureExt as _;
31
32use crate::backend::BackendError;
33use crate::backend::BackendResult;
34use crate::backend::CommitId;
35use crate::commit::Commit;
36use crate::conflicts::ConflictMarkerStyle;
37use crate::conflicts::MaterializedTreeValue;
38use crate::conflicts::materialize_merge_result_to_bytes;
39use crate::conflicts::materialize_tree_value;
40use crate::diff::Diff;
41use crate::diff::DiffHunkKind;
42use crate::fileset::FilesetExpression;
43use crate::graph::GraphEdge;
44use crate::merged_tree::MergedTree;
45use crate::repo::Repo;
46use crate::repo_path::RepoPath;
47use crate::repo_path::RepoPathBuf;
48use crate::revset::ResolvedRevsetExpression;
49use crate::revset::RevsetEvaluationError;
50use crate::revset::RevsetExpression;
51use crate::revset::RevsetFilterPredicate;
52use crate::store::Store;
53
54#[derive(Clone, Debug)]
56pub struct FileAnnotation {
57 line_map: OriginalLineMap,
58 text: BString,
59}
60
61impl FileAnnotation {
62 pub fn line_origins(&self) -> impl Iterator<Item = (Result<&LineOrigin, &LineOrigin>, &BStr)> {
71 itertools::zip_eq(&self.line_map, self.text.split_inclusive(|b| *b == b'\n'))
72 .map(|(line_origin, line)| (line_origin.as_ref(), line.as_ref()))
73 }
74
75 pub fn lines(&self) -> impl Iterator<Item = (Result<&CommitId, &CommitId>, &BStr)> {
84 itertools::zip_eq(
85 self.commit_ids(),
86 self.text
87 .split_inclusive(|b| *b == b'\n')
88 .map(AsRef::as_ref),
89 )
90 }
91
92 pub fn line_ranges(
99 &self,
100 ) -> impl Iterator<Item = (Result<&CommitId, &CommitId>, Range<usize>)> {
101 let ranges = self
102 .text
103 .split_inclusive(|b| *b == b'\n')
104 .scan(0, |total, line| {
105 let start = *total;
106 *total += line.len();
107 Some(start..*total)
108 });
109 itertools::zip_eq(self.commit_ids(), ranges)
110 }
111
112 pub fn compact_line_ranges(
116 &self,
117 ) -> impl Iterator<Item = (Result<&CommitId, &CommitId>, Range<usize>)> {
118 let mut ranges = self.line_ranges();
119 let mut acc = ranges.next();
120 iter::from_fn(move || {
121 let (acc_commit_id, acc_range) = acc.as_mut()?;
122 for (cur_commit_id, cur_range) in ranges.by_ref() {
123 if *acc_commit_id == cur_commit_id {
124 acc_range.end = cur_range.end;
125 } else {
126 return acc.replace((cur_commit_id, cur_range));
127 }
128 }
129 acc.take()
130 })
131 }
132
133 pub fn text(&self) -> &BStr {
135 self.text.as_ref()
136 }
137
138 fn commit_ids(&self) -> impl Iterator<Item = Result<&CommitId, &CommitId>> {
139 self.line_map.iter().map(|line_origin| {
140 line_origin
141 .as_ref()
142 .map(|origin| &origin.commit_id)
143 .map_err(|origin| &origin.commit_id)
144 })
145 }
146}
147
148#[derive(Clone, Debug)]
150pub struct FileAnnotator {
151 file_path: RepoPathBuf,
153 starting_text: BString,
154 state: AnnotationState,
155}
156
157impl FileAnnotator {
158 pub fn from_commit(starting_commit: &Commit, file_path: &RepoPath) -> BackendResult<Self> {
162 let source = Source::load(starting_commit, file_path)?;
163 Ok(Self::with_source(starting_commit.id(), file_path, source))
164 }
165
166 pub fn with_file_content(
173 starting_commit_id: &CommitId,
174 file_path: &RepoPath,
175 starting_text: impl Into<Vec<u8>>,
176 ) -> Self {
177 let source = Source::new(BString::new(starting_text.into()));
178 Self::with_source(starting_commit_id, file_path, source)
179 }
180
181 fn with_source(
182 starting_commit_id: &CommitId,
183 file_path: &RepoPath,
184 mut source: Source,
185 ) -> Self {
186 source.fill_line_map();
187 let starting_text = source.text.clone();
188 let state = AnnotationState {
189 original_line_map: (0..source.line_map.len())
190 .map(|line_number| {
191 Err(LineOrigin {
192 commit_id: starting_commit_id.clone(),
193 line_number,
194 })
195 })
196 .collect(),
197 commit_source_map: HashMap::from([(starting_commit_id.clone(), source)]),
198 num_unresolved_roots: 0,
199 };
200 Self {
201 file_path: file_path.to_owned(),
202 starting_text,
203 state,
204 }
205 }
206
207 pub fn compute(
213 &mut self,
214 repo: &dyn Repo,
215 domain: &Rc<ResolvedRevsetExpression>,
216 ) -> Result<(), RevsetEvaluationError> {
217 process_commits(repo, &mut self.state, domain, &self.file_path)
218 }
219
220 pub fn pending_commits(&self) -> impl Iterator<Item = &CommitId> {
222 self.state.commit_source_map.keys()
223 }
224
225 pub fn to_annotation(&self) -> FileAnnotation {
227 FileAnnotation {
231 line_map: self.state.original_line_map.clone(),
232 text: self.starting_text.clone(),
233 }
234 }
235}
236
237#[derive(Clone, Debug)]
239struct AnnotationState {
240 original_line_map: OriginalLineMap,
241 commit_source_map: HashMap<CommitId, Source>,
243 num_unresolved_roots: usize,
245}
246
247#[derive(Clone, Debug)]
249struct Source {
250 line_map: Vec<(usize, usize)>,
253 text: BString,
255}
256
257impl Source {
258 fn new(text: BString) -> Self {
259 Self {
260 line_map: Vec::new(),
261 text,
262 }
263 }
264
265 fn load(commit: &Commit, file_path: &RepoPath) -> Result<Self, BackendError> {
266 let tree = commit.tree()?;
267 let text = get_file_contents(commit.store(), file_path, &tree).block_on()?;
268 Ok(Self::new(text))
269 }
270
271 fn fill_line_map(&mut self) {
272 let lines = self.text.split_inclusive(|b| *b == b'\n');
273 self.line_map = lines.enumerate().map(|(i, _)| (i, i)).collect();
274 }
275}
276
277type OriginalLineMap = Vec<Result<LineOrigin, LineOrigin>>;
280
281#[derive(Clone, Debug, Eq, PartialEq)]
283pub struct LineOrigin {
284 pub commit_id: CommitId,
286 pub line_number: usize,
288}
289
290fn process_commits(
293 repo: &dyn Repo,
294 state: &mut AnnotationState,
295 domain: &Rc<ResolvedRevsetExpression>,
296 file_name: &RepoPath,
297) -> Result<(), RevsetEvaluationError> {
298 let predicate = RevsetFilterPredicate::File(FilesetExpression::file_path(file_name.to_owned()));
299 let heads = RevsetExpression::commits(state.commit_source_map.keys().cloned().collect());
305 let revset = heads
306 .union(&domain.intersection(&heads.ancestors()).filtered(predicate))
307 .evaluate(repo)?;
308
309 state.num_unresolved_roots = 0;
310 for node in revset.iter_graph() {
311 let (commit_id, edge_list) = node?;
312 process_commit(repo, file_name, state, &commit_id, &edge_list)?;
313 if state.commit_source_map.len() == state.num_unresolved_roots {
314 break;
316 }
317 }
318 Ok(())
319}
320
321fn process_commit(
325 repo: &dyn Repo,
326 file_name: &RepoPath,
327 state: &mut AnnotationState,
328 current_commit_id: &CommitId,
329 edges: &[GraphEdge<CommitId>],
330) -> Result<(), BackendError> {
331 let Some(mut current_source) = state.commit_source_map.remove(current_commit_id) else {
332 return Ok(());
333 };
334
335 for parent_edge in edges {
336 let parent_commit_id = &parent_edge.target;
337 let parent_source = match state.commit_source_map.entry(parent_commit_id.clone()) {
338 hash_map::Entry::Occupied(entry) => entry.into_mut(),
339 hash_map::Entry::Vacant(entry) => {
340 let commit = repo.store().get_commit(entry.key())?;
341 entry.insert(Source::load(&commit, file_name)?)
342 }
343 };
344
345 let mut current_lines = current_source.line_map.iter().copied().peekable();
354 let mut new_current_line_map = Vec::new();
355 let mut new_parent_line_map = Vec::new();
356 copy_same_lines_with(
357 ¤t_source.text,
358 &parent_source.text,
359 |current_start, parent_start, count| {
360 new_current_line_map
361 .extend(current_lines.peeking_take_while(|&(cur, _)| cur < current_start));
362 while let Some((current, starting)) =
363 current_lines.next_if(|&(cur, _)| cur < current_start + count)
364 {
365 let parent = parent_start + (current - current_start);
366 new_parent_line_map.push((parent, starting));
367 }
368 },
369 );
370 new_current_line_map.extend(current_lines);
371 current_source.line_map = new_current_line_map;
372 parent_source.line_map = if parent_source.line_map.is_empty() {
373 new_parent_line_map
374 } else {
375 itertools::merge(parent_source.line_map.iter().copied(), new_parent_line_map).collect()
376 };
377 if parent_source.line_map.is_empty() {
378 state.commit_source_map.remove(parent_commit_id);
379 } else if parent_edge.is_missing() {
380 for &(parent_line_number, starting_line_number) in &parent_source.line_map {
384 state.original_line_map[starting_line_number] = Err(LineOrigin {
385 commit_id: parent_commit_id.clone(),
386 line_number: parent_line_number,
387 });
388 }
389 state.num_unresolved_roots += 1;
390 }
391 }
392
393 for (current_line_number, starting_line_number) in current_source.line_map {
397 state.original_line_map[starting_line_number] = Ok(LineOrigin {
398 commit_id: current_commit_id.clone(),
399 line_number: current_line_number,
400 });
401 }
402
403 Ok(())
404}
405
406fn copy_same_lines_with(
409 current_contents: &[u8],
410 parent_contents: &[u8],
411 mut copy: impl FnMut(usize, usize, usize),
412) {
413 let diff = Diff::by_line([current_contents, parent_contents]);
414 let mut current_line_counter: usize = 0;
415 let mut parent_line_counter: usize = 0;
416 for hunk in diff.hunks() {
417 match hunk.kind {
418 DiffHunkKind::Matching => {
419 let count = hunk.contents[0].split_inclusive(|b| *b == b'\n').count();
420 copy(current_line_counter, parent_line_counter, count);
421 current_line_counter += count;
422 parent_line_counter += count;
423 }
424 DiffHunkKind::Different => {
425 let current_output = hunk.contents[0];
426 let parent_output = hunk.contents[1];
427 current_line_counter += current_output.split_inclusive(|b| *b == b'\n').count();
428 parent_line_counter += parent_output.split_inclusive(|b| *b == b'\n').count();
429 }
430 }
431 }
432}
433
434async fn get_file_contents(
435 store: &Store,
436 path: &RepoPath,
437 tree: &MergedTree,
438) -> Result<BString, BackendError> {
439 let file_value = tree.path_value_async(path).await?;
440 let effective_file_value = materialize_tree_value(store, path, file_value).await?;
441 match effective_file_value {
442 MaterializedTreeValue::File(mut file) => Ok(file.read_all(path).await?.into()),
443 MaterializedTreeValue::FileConflict(file) => Ok(materialize_merge_result_to_bytes(
444 &file.contents,
445 ConflictMarkerStyle::default(),
446 )),
447 _ => Ok(BString::default()),
448 }
449}
450
451#[cfg(test)]
452mod tests {
453 use super::*;
454
455 fn make_line_origin(commit_id: &CommitId, line_number: usize) -> LineOrigin {
456 LineOrigin {
457 commit_id: commit_id.clone(),
458 line_number,
459 }
460 }
461
462 #[test]
463 fn test_lines_iterator_empty() {
464 let annotation = FileAnnotation {
465 line_map: vec![],
466 text: "".into(),
467 };
468 assert_eq!(annotation.line_origins().collect_vec(), vec![]);
469 assert_eq!(annotation.lines().collect_vec(), vec![]);
470 assert_eq!(annotation.line_ranges().collect_vec(), vec![]);
471 assert_eq!(annotation.compact_line_ranges().collect_vec(), vec![]);
472 }
473
474 #[test]
475 fn test_lines_iterator_with_content() {
476 let commit_id1 = CommitId::from_hex("111111");
477 let commit_id2 = CommitId::from_hex("222222");
478 let commit_id3 = CommitId::from_hex("333333");
479 let annotation = FileAnnotation {
480 line_map: vec![
481 Ok(make_line_origin(&commit_id1, 0)),
482 Ok(make_line_origin(&commit_id2, 1)),
483 Ok(make_line_origin(&commit_id3, 2)),
484 ],
485 text: "foo\n\nbar\n".into(),
486 };
487 assert_eq!(
488 annotation.line_origins().collect_vec(),
489 vec![
490 (Ok(&make_line_origin(&commit_id1, 0)), "foo\n".as_ref()),
491 (Ok(&make_line_origin(&commit_id2, 1)), "\n".as_ref()),
492 (Ok(&make_line_origin(&commit_id3, 2)), "bar\n".as_ref()),
493 ]
494 );
495 assert_eq!(
496 annotation.lines().collect_vec(),
497 vec![
498 (Ok(&commit_id1), "foo\n".as_ref()),
499 (Ok(&commit_id2), "\n".as_ref()),
500 (Ok(&commit_id3), "bar\n".as_ref()),
501 ]
502 );
503 assert_eq!(
504 annotation.line_ranges().collect_vec(),
505 vec![
506 (Ok(&commit_id1), 0..4),
507 (Ok(&commit_id2), 4..5),
508 (Ok(&commit_id3), 5..9),
509 ]
510 );
511 assert_eq!(
512 annotation.compact_line_ranges().collect_vec(),
513 vec![
514 (Ok(&commit_id1), 0..4),
515 (Ok(&commit_id2), 4..5),
516 (Ok(&commit_id3), 5..9),
517 ]
518 );
519 }
520
521 #[test]
522 fn test_lines_iterator_compaction() {
523 let commit_id1 = CommitId::from_hex("111111");
524 let commit_id2 = CommitId::from_hex("222222");
525 let commit_id3 = CommitId::from_hex("333333");
526 let annotation = FileAnnotation {
527 line_map: vec![
528 Ok(make_line_origin(&commit_id1, 0)),
529 Ok(make_line_origin(&commit_id1, 1)),
530 Ok(make_line_origin(&commit_id2, 2)),
531 Ok(make_line_origin(&commit_id1, 3)),
532 Ok(make_line_origin(&commit_id3, 4)),
533 Ok(make_line_origin(&commit_id3, 5)),
534 Ok(make_line_origin(&commit_id3, 6)),
535 ],
536 text: "\n".repeat(7).into(),
537 };
538 assert_eq!(
539 annotation.compact_line_ranges().collect_vec(),
540 vec![
541 (Ok(&commit_id1), 0..2),
542 (Ok(&commit_id2), 2..3),
543 (Ok(&commit_id1), 3..4),
544 (Ok(&commit_id3), 4..7),
545 ]
546 );
547 }
548}