1use std::collections::HashMap;
22use std::collections::hash_map;
23use std::iter;
24use std::ops::Range;
25use std::sync::Arc;
26
27use bstr::BStr;
28use bstr::BString;
29use futures::TryStreamExt as _;
30use itertools::Itertools as _;
31
32use crate::backend::BackendError;
33use crate::backend::BackendResult;
34use crate::backend::CommitId;
35use crate::commit::Commit;
36use crate::conflicts::ConflictMarkerStyle;
37use crate::conflicts::ConflictMaterializeOptions;
38use crate::conflicts::MaterializedTreeValue;
39use crate::conflicts::materialize_merge_result_to_bytes;
40use crate::conflicts::materialize_tree_value;
41use crate::diff::ContentDiff;
42use crate::diff::DiffHunkKind;
43use crate::files::FileMergeHunkLevel;
44use crate::fileset::FilesetExpression;
45use crate::graph::GraphEdge;
46use crate::merge::SameChange;
47use crate::merged_tree::MergedTree;
48use crate::repo::Repo;
49use crate::repo_path::RepoPath;
50use crate::repo_path::RepoPathBuf;
51use crate::revset::ResolvedRevsetExpression;
52use crate::revset::RevsetEvaluationError;
53use crate::revset::RevsetExpression;
54use crate::revset::RevsetFilterPredicate;
55use crate::store::Store;
56use crate::tree_merge::MergeOptions;
57
58#[derive(Clone, Debug)]
60pub struct FileAnnotation {
61 line_map: OriginalLineMap,
62 text: BString,
63}
64
65impl FileAnnotation {
66 pub fn line_origins(&self) -> impl Iterator<Item = (Result<&LineOrigin, &LineOrigin>, &BStr)> {
75 itertools::zip_eq(&self.line_map, self.text.split_inclusive(|b| *b == b'\n'))
76 .map(|(line_origin, line)| (line_origin.as_ref(), line.as_ref()))
77 }
78
79 pub fn lines(&self) -> impl Iterator<Item = (Result<&CommitId, &CommitId>, &BStr)> {
88 itertools::zip_eq(
89 self.commit_ids(),
90 self.text
91 .split_inclusive(|b| *b == b'\n')
92 .map(AsRef::as_ref),
93 )
94 }
95
96 pub fn line_ranges(
103 &self,
104 ) -> impl Iterator<Item = (Result<&CommitId, &CommitId>, Range<usize>)> {
105 let ranges = self
106 .text
107 .split_inclusive(|b| *b == b'\n')
108 .scan(0, |total, line| {
109 let start = *total;
110 *total += line.len();
111 Some(start..*total)
112 });
113 itertools::zip_eq(self.commit_ids(), ranges)
114 }
115
116 pub fn compact_line_ranges(
120 &self,
121 ) -> impl Iterator<Item = (Result<&CommitId, &CommitId>, Range<usize>)> {
122 let mut ranges = self.line_ranges();
123 let mut acc = ranges.next();
124 iter::from_fn(move || {
125 let (acc_commit_id, acc_range) = acc.as_mut()?;
126 for (cur_commit_id, cur_range) in ranges.by_ref() {
127 if *acc_commit_id == cur_commit_id {
128 acc_range.end = cur_range.end;
129 } else {
130 return acc.replace((cur_commit_id, cur_range));
131 }
132 }
133 acc.take()
134 })
135 }
136
137 pub fn text(&self) -> &BStr {
139 self.text.as_ref()
140 }
141
142 fn commit_ids(&self) -> impl Iterator<Item = Result<&CommitId, &CommitId>> {
143 self.line_map.iter().map(|line_origin| {
144 line_origin
145 .as_ref()
146 .map(|origin| &origin.commit_id)
147 .map_err(|origin| &origin.commit_id)
148 })
149 }
150}
151
152#[derive(Clone, Debug)]
154pub struct FileAnnotator {
155 file_path: RepoPathBuf,
157 starting_text: BString,
158 state: AnnotationState,
159}
160
161impl FileAnnotator {
162 pub async fn from_commit(
166 starting_commit: &Commit,
167 file_path: &RepoPath,
168 ) -> BackendResult<Self> {
169 let source = Source::load(starting_commit, file_path).await?;
170 Ok(Self::with_source(starting_commit.id(), file_path, source))
171 }
172
173 pub fn with_file_content(
180 starting_commit_id: &CommitId,
181 file_path: &RepoPath,
182 starting_text: impl Into<Vec<u8>>,
183 ) -> Self {
184 let source = Source::new(BString::new(starting_text.into()));
185 Self::with_source(starting_commit_id, file_path, source)
186 }
187
188 fn with_source(
189 starting_commit_id: &CommitId,
190 file_path: &RepoPath,
191 mut source: Source,
192 ) -> Self {
193 source.fill_line_map();
194 let starting_text = source.text.clone();
195 let state = AnnotationState {
196 original_line_map: (0..source.line_map.len())
197 .map(|line_number| {
198 Err(LineOrigin {
199 commit_id: starting_commit_id.clone(),
200 line_number,
201 })
202 })
203 .collect(),
204 commit_source_map: HashMap::from([(starting_commit_id.clone(), source)]),
205 num_unresolved_roots: 0,
206 };
207 Self {
208 file_path: file_path.to_owned(),
209 starting_text,
210 state,
211 }
212 }
213
214 pub async fn compute(
220 &mut self,
221 repo: &dyn Repo,
222 domain: &Arc<ResolvedRevsetExpression>,
223 ) -> Result<(), RevsetEvaluationError> {
224 process_commits(repo, &mut self.state, domain, &self.file_path).await
225 }
226
227 pub fn pending_commits(&self) -> impl Iterator<Item = &CommitId> {
229 self.state.commit_source_map.keys()
230 }
231
232 pub fn to_annotation(&self) -> FileAnnotation {
234 FileAnnotation {
238 line_map: self.state.original_line_map.clone(),
239 text: self.starting_text.clone(),
240 }
241 }
242}
243
244#[derive(Clone, Debug)]
246struct AnnotationState {
247 original_line_map: OriginalLineMap,
248 commit_source_map: HashMap<CommitId, Source>,
250 num_unresolved_roots: usize,
252}
253
254#[derive(Clone, Debug)]
256struct Source {
257 line_map: Vec<(usize, usize)>,
260 text: BString,
262}
263
264impl Source {
265 fn new(text: BString) -> Self {
266 Self {
267 line_map: Vec::new(),
268 text,
269 }
270 }
271
272 async fn load(commit: &Commit, file_path: &RepoPath) -> Result<Self, BackendError> {
273 let tree = commit.tree();
274 let text = get_file_contents(commit.store(), file_path, &tree).await?;
275 Ok(Self::new(text))
276 }
277
278 fn fill_line_map(&mut self) {
279 let lines = self.text.split_inclusive(|b| *b == b'\n');
280 self.line_map = lines.enumerate().map(|(i, _)| (i, i)).collect();
281 }
282}
283
284type OriginalLineMap = Vec<Result<LineOrigin, LineOrigin>>;
287
288#[derive(Clone, Debug, Eq, PartialEq)]
290pub struct LineOrigin {
291 pub commit_id: CommitId,
293 pub line_number: usize,
295}
296
297async fn process_commits(
300 repo: &dyn Repo,
301 state: &mut AnnotationState,
302 domain: &Arc<ResolvedRevsetExpression>,
303 file_name: &RepoPath,
304) -> Result<(), RevsetEvaluationError> {
305 let predicate = RevsetFilterPredicate::File(FilesetExpression::file_path(file_name.to_owned()));
306 let heads = RevsetExpression::commits(state.commit_source_map.keys().cloned().collect());
312 let revset = heads
313 .union(&domain.intersection(&heads.ancestors()).filtered(predicate))
314 .evaluate(repo)?;
315
316 state.num_unresolved_roots = 0;
317 let mut nodes = revset.stream_graph();
318 while let Some((commit_id, edge_list)) = nodes.try_next().await? {
319 process_commit(repo, file_name, state, &commit_id, &edge_list).await?;
320 if state.commit_source_map.len() == state.num_unresolved_roots {
321 break;
323 }
324 }
325 Ok(())
326}
327
328async fn process_commit(
332 repo: &dyn Repo,
333 file_name: &RepoPath,
334 state: &mut AnnotationState,
335 current_commit_id: &CommitId,
336 edges: &[GraphEdge<CommitId>],
337) -> Result<(), BackendError> {
338 let Some(mut current_source) = state.commit_source_map.remove(current_commit_id) else {
339 return Ok(());
340 };
341
342 for parent_edge in edges {
343 let parent_commit_id = &parent_edge.target;
344 let parent_source = match state.commit_source_map.entry(parent_commit_id.clone()) {
345 hash_map::Entry::Occupied(entry) => entry.into_mut(),
346 hash_map::Entry::Vacant(entry) => {
347 let commit = repo.store().get_commit_async(entry.key()).await?;
348 entry.insert(Source::load(&commit, file_name).await?)
349 }
350 };
351
352 let mut current_lines = current_source.line_map.iter().copied().peekable();
361 let mut new_current_line_map = Vec::new();
362 let mut new_parent_line_map = Vec::new();
363 copy_same_lines_with(
364 ¤t_source.text,
365 &parent_source.text,
366 |current_start, parent_start, count| {
367 new_current_line_map
368 .extend(current_lines.peeking_take_while(|&(cur, _)| cur < current_start));
369 while let Some((current, starting)) =
370 current_lines.next_if(|&(cur, _)| cur < current_start + count)
371 {
372 let parent = parent_start + (current - current_start);
373 new_parent_line_map.push((parent, starting));
374 }
375 },
376 );
377 new_current_line_map.extend(current_lines);
378 current_source.line_map = new_current_line_map;
379 parent_source.line_map = if parent_source.line_map.is_empty() {
380 new_parent_line_map
381 } else {
382 itertools::merge(parent_source.line_map.iter().copied(), new_parent_line_map).collect()
383 };
384 if parent_source.line_map.is_empty() {
385 state.commit_source_map.remove(parent_commit_id);
386 } else if parent_edge.is_missing() {
387 for &(parent_line_number, starting_line_number) in &parent_source.line_map {
391 state.original_line_map[starting_line_number] = Err(LineOrigin {
392 commit_id: parent_commit_id.clone(),
393 line_number: parent_line_number,
394 });
395 }
396 state.num_unresolved_roots += 1;
397 }
398 }
399
400 for (current_line_number, starting_line_number) in current_source.line_map {
404 state.original_line_map[starting_line_number] = Ok(LineOrigin {
405 commit_id: current_commit_id.clone(),
406 line_number: current_line_number,
407 });
408 }
409
410 Ok(())
411}
412
413fn copy_same_lines_with(
416 current_contents: &[u8],
417 parent_contents: &[u8],
418 mut copy: impl FnMut(usize, usize, usize),
419) {
420 let diff = ContentDiff::by_line([current_contents, parent_contents]);
421 let mut current_line_counter: usize = 0;
422 let mut parent_line_counter: usize = 0;
423 for hunk in diff.hunks() {
424 match hunk.kind {
425 DiffHunkKind::Matching => {
426 let count = hunk.contents[0].split_inclusive(|b| *b == b'\n').count();
427 copy(current_line_counter, parent_line_counter, count);
428 current_line_counter += count;
429 parent_line_counter += count;
430 }
431 DiffHunkKind::Different => {
432 let current_output = hunk.contents[0];
433 let parent_output = hunk.contents[1];
434 current_line_counter += current_output.split_inclusive(|b| *b == b'\n').count();
435 parent_line_counter += parent_output.split_inclusive(|b| *b == b'\n').count();
436 }
437 }
438 }
439}
440
441async fn get_file_contents(
442 store: &Store,
443 path: &RepoPath,
444 tree: &MergedTree,
445) -> Result<BString, BackendError> {
446 let file_value = tree.path_value(path).await?;
447 let effective_file_value =
448 materialize_tree_value(store, path, file_value, tree.labels()).await?;
449 match effective_file_value {
450 MaterializedTreeValue::File(mut file) => Ok(file.read_all(path).await?.into()),
451 MaterializedTreeValue::FileConflict(file) => {
452 let options = ConflictMaterializeOptions {
454 marker_style: ConflictMarkerStyle::Diff,
455 marker_len: None,
456 merge: MergeOptions {
457 hunk_level: FileMergeHunkLevel::Line,
458 same_change: SameChange::Accept,
459 },
460 };
461 Ok(materialize_merge_result_to_bytes(
462 &file.contents,
463 &file.labels,
464 &options,
465 ))
466 }
467 _ => Ok(BString::default()),
468 }
469}
470
471#[cfg(test)]
472mod tests {
473 use super::*;
474
475 fn make_line_origin(commit_id: &CommitId, line_number: usize) -> LineOrigin {
476 LineOrigin {
477 commit_id: commit_id.clone(),
478 line_number,
479 }
480 }
481
482 #[test]
483 fn test_lines_iterator_empty() {
484 let annotation = FileAnnotation {
485 line_map: vec![],
486 text: "".into(),
487 };
488 assert_eq!(annotation.line_origins().collect_vec(), vec![]);
489 assert_eq!(annotation.lines().collect_vec(), vec![]);
490 assert_eq!(annotation.line_ranges().collect_vec(), vec![]);
491 assert_eq!(annotation.compact_line_ranges().collect_vec(), vec![]);
492 }
493
494 #[test]
495 fn test_lines_iterator_with_content() {
496 let commit_id1 = CommitId::from_hex("111111");
497 let commit_id2 = CommitId::from_hex("222222");
498 let commit_id3 = CommitId::from_hex("333333");
499 let annotation = FileAnnotation {
500 line_map: vec![
501 Ok(make_line_origin(&commit_id1, 0)),
502 Ok(make_line_origin(&commit_id2, 1)),
503 Ok(make_line_origin(&commit_id3, 2)),
504 ],
505 text: "foo\n\nbar\n".into(),
506 };
507 assert_eq!(
508 annotation.line_origins().collect_vec(),
509 vec![
510 (Ok(&make_line_origin(&commit_id1, 0)), "foo\n".as_ref()),
511 (Ok(&make_line_origin(&commit_id2, 1)), "\n".as_ref()),
512 (Ok(&make_line_origin(&commit_id3, 2)), "bar\n".as_ref()),
513 ]
514 );
515 assert_eq!(
516 annotation.lines().collect_vec(),
517 vec![
518 (Ok(&commit_id1), "foo\n".as_ref()),
519 (Ok(&commit_id2), "\n".as_ref()),
520 (Ok(&commit_id3), "bar\n".as_ref()),
521 ]
522 );
523 assert_eq!(
524 annotation.line_ranges().collect_vec(),
525 vec![
526 (Ok(&commit_id1), 0..4),
527 (Ok(&commit_id2), 4..5),
528 (Ok(&commit_id3), 5..9),
529 ]
530 );
531 assert_eq!(
532 annotation.compact_line_ranges().collect_vec(),
533 vec![
534 (Ok(&commit_id1), 0..4),
535 (Ok(&commit_id2), 4..5),
536 (Ok(&commit_id3), 5..9),
537 ]
538 );
539 }
540
541 #[test]
542 fn test_lines_iterator_compaction() {
543 let commit_id1 = CommitId::from_hex("111111");
544 let commit_id2 = CommitId::from_hex("222222");
545 let commit_id3 = CommitId::from_hex("333333");
546 let annotation = FileAnnotation {
547 line_map: vec![
548 Ok(make_line_origin(&commit_id1, 0)),
549 Ok(make_line_origin(&commit_id1, 1)),
550 Ok(make_line_origin(&commit_id2, 2)),
551 Ok(make_line_origin(&commit_id1, 3)),
552 Ok(make_line_origin(&commit_id3, 4)),
553 Ok(make_line_origin(&commit_id3, 5)),
554 Ok(make_line_origin(&commit_id3, 6)),
555 ],
556 text: "\n".repeat(7).into(),
557 };
558 assert_eq!(
559 annotation.compact_line_ranges().collect_vec(),
560 vec![
561 (Ok(&commit_id1), 0..2),
562 (Ok(&commit_id2), 2..3),
563 (Ok(&commit_id1), 3..4),
564 (Ok(&commit_id3), 4..7),
565 ]
566 );
567 }
568}