1use crate::config::ConfigSet;
8use crate::crlf::{get_file_attrs, load_gitattributes, DiffAttr};
9use crate::diff::{detect_renames, diff_trees, zero_oid, DiffEntry};
10use crate::error::{Error, Result};
11use crate::objects::{parse_commit, parse_tree, ObjectId, ObjectKind, TreeEntry};
12use crate::odb::Odb;
13use crate::userdiff::{matcher_for_driver, FuncnameMatcher};
14use regex::bytes::RegexBuilder as BytesRegexBuilder;
15use similar::{Algorithm, ChangeTag, TextDiff};
16use std::collections::{HashMap, HashSet};
17use std::path::Path;
18
19#[derive(Clone, Copy, Debug, Eq, PartialEq)]
21pub struct Range {
22 pub start: i64,
23 pub end: i64,
24}
25
26#[derive(Clone, Debug, Default)]
28pub struct RangeSet {
29 pub ranges: Vec<Range>,
30}
31
32impl RangeSet {
33 fn append(&mut self, start: i64, end: i64) {
34 if start < end {
35 self.ranges.push(Range { start, end });
36 }
37 }
38
39 fn sort_and_merge(&mut self) {
40 if self.ranges.is_empty() {
41 return;
42 }
43 self.ranges.sort_by_key(|r| (r.start, r.end));
44 let mut out: Vec<Range> = Vec::new();
45 for r in std::mem::take(&mut self.ranges) {
46 if r.start >= r.end {
47 continue;
48 }
49 if let Some(last) = out.last_mut() {
50 if r.start <= last.end {
51 last.end = last.end.max(r.end);
52 } else {
53 out.push(r);
54 }
55 } else {
56 out.push(r);
57 }
58 }
59 self.ranges = out;
60 }
61
62 fn is_empty(&self) -> bool {
63 self.ranges.is_empty()
64 }
65}
66
67#[derive(Clone, Copy, Debug, Eq, PartialEq)]
69pub struct DiffHunk {
70 pub parent: Range,
71 pub target: Range,
72}
73
74#[derive(Clone, Debug, Default)]
76pub struct DiffRanges {
77 pub hunks: Vec<DiffHunk>,
78}
79
80impl DiffRanges {
81 fn parent_ranges(&self) -> RangeSet {
82 let mut s = RangeSet::default();
83 for h in &self.hunks {
84 s.append(h.parent.start, h.parent.end);
85 }
86 s.sort_and_merge();
87 s
88 }
89
90 fn target_ranges(&self) -> RangeSet {
91 let mut s = RangeSet::default();
92 for h in &self.hunks {
93 s.append(h.target.start, h.target.end);
94 }
95 s.sort_and_merge();
96 s
97 }
98}
99
100fn push_diff_hunk(out: &mut DiffRanges, p0: i64, p1: i64, t0: i64, t1: i64) {
101 out.hunks.push(DiffHunk {
102 parent: Range { start: p0, end: p1 },
103 target: Range { start: t0, end: t1 },
104 });
105}
106
107fn collect_diff_ranges(old: &str, new: &str) -> DiffRanges {
108 let diff = TextDiff::configure()
109 .algorithm(Algorithm::Myers)
110 .diff_lines(old, new);
111 let mut out = DiffRanges::default();
112 let old_lines: Vec<&str> = old.lines().collect();
113 let new_lines: Vec<&str> = new.lines().collect();
114 let mut o = 0usize;
115 let mut n = 0usize;
116 let mut in_hunk = false;
117 let mut h_o_start = 0usize;
118 let mut h_n_start = 0usize;
119 let mut h_o_end = 0usize;
120 let mut h_n_end = 0usize;
121
122 for change in diff.iter_all_changes() {
123 let len = change.value().lines().count().max(1);
124 match change.tag() {
125 ChangeTag::Equal => {
126 if in_hunk {
127 push_diff_hunk(
128 &mut out,
129 h_o_start as i64,
130 h_o_end as i64,
131 h_n_start as i64,
132 h_n_end as i64,
133 );
134 in_hunk = false;
135 }
136 o = (o + len).min(old_lines.len());
137 n = (n + len).min(new_lines.len());
138 }
139 ChangeTag::Delete => {
140 if !in_hunk {
141 in_hunk = true;
142 h_o_start = o;
143 h_n_start = n;
144 }
145 h_o_end = (o + len).min(old_lines.len());
146 h_n_end = n;
147 o = h_o_end;
148 }
149 ChangeTag::Insert => {
150 if !in_hunk {
151 in_hunk = true;
152 h_o_start = o;
153 h_n_start = n;
154 }
155 h_o_end = o;
156 h_n_end = (n + len).min(new_lines.len());
157 n = h_n_end;
158 }
159 }
160 }
161 if in_hunk {
162 push_diff_hunk(
163 &mut out,
164 h_o_start as i64,
165 h_o_end as i64,
166 h_n_start as i64,
167 h_n_end as i64,
168 );
169 }
170 out
171}
172
173fn ranges_overlap(a: Range, b: Range) -> bool {
174 !(a.end <= b.start || b.end <= a.start)
175}
176
177fn diff_ranges_filter_touched(diff: &DiffRanges, rs: &RangeSet) -> DiffRanges {
178 let mut out = DiffRanges::default();
179 let mut j = 0usize;
180 for h in &diff.hunks {
181 let tr = h.target;
182 while j < rs.ranges.len() && tr.start >= rs.ranges[j].end {
183 j += 1;
184 }
185 if j == rs.ranges.len() {
186 break;
187 }
188 if ranges_overlap(tr, rs.ranges[j]) {
189 out.hunks.push(*h);
190 }
191 }
192 out
193}
194
195fn range_set_difference(out: &mut RangeSet, a: &RangeSet, b: &RangeSet) {
196 let mut j = 0usize;
197 for r in &a.ranges {
198 let mut start = r.start;
199 let end = r.end;
200 while start < end {
201 while j < b.ranges.len() && start >= b.ranges[j].end {
202 j += 1;
203 }
204 if j >= b.ranges.len() || end <= b.ranges[j].start {
205 out.append(start, end);
206 break;
207 }
208 if start >= b.ranges[j].start {
209 start = b.ranges[j].end;
210 } else if end > b.ranges[j].start {
211 if start < b.ranges[j].start {
212 out.append(start, b.ranges[j].start);
213 }
214 start = b.ranges[j].end;
215 }
216 }
217 }
218}
219
220fn range_set_shift_diff(out: &mut RangeSet, rs: &RangeSet, diff: &DiffRanges) {
221 let mut j = 0usize;
222 let mut offset: i64 = 0;
223 for r in &rs.ranges {
224 while j < diff.hunks.len() && r.start >= diff.hunks[j].target.start {
225 let h = diff.hunks[j];
226 offset += (h.parent.end - h.parent.start) - (h.target.end - h.target.start);
227 j += 1;
228 }
229 out.append(r.start + offset, r.end + offset);
230 }
231}
232
233fn range_set_union(out: &mut RangeSet, a: &RangeSet, b: &RangeSet) {
234 let mut i = 0usize;
235 let mut j = 0usize;
236 while i < a.ranges.len() || j < b.ranges.len() {
237 let (start, end, from_a) = match (i < a.ranges.len(), j < b.ranges.len()) {
238 (true, true) => {
239 let ra = a.ranges[i];
240 let rb = b.ranges[j];
241 if ra.start < rb.start {
242 (ra.start, ra.end, true)
243 } else if rb.start < ra.start {
244 (rb.start, rb.end, false)
245 } else if ra.end < rb.end {
246 (ra.start, ra.end, true)
247 } else {
248 (rb.start, rb.end, false)
249 }
250 }
251 (true, false) => {
252 let ra = a.ranges[i];
253 (ra.start, ra.end, true)
254 }
255 (false, true) => {
256 let rb = b.ranges[j];
257 (rb.start, rb.end, false)
258 }
259 (false, false) => break,
260 };
261 if from_a {
262 i += 1;
263 } else {
264 j += 1;
265 }
266 if start >= end {
267 continue;
268 }
269 if let Some(last) = out.ranges.last_mut() {
270 if last.end < start {
271 out.ranges.push(Range { start, end });
272 } else if last.end < end {
273 last.end = end;
274 }
275 } else {
276 out.ranges.push(Range { start, end });
277 }
278 }
279}
280
281fn range_set_map_across_diff(
282 out: &mut RangeSet,
283 rs: &RangeSet,
284 diff: &DiffRanges,
285 touched_out: &mut DiffRanges,
286) {
287 let touched = diff_ranges_filter_touched(diff, rs);
288 let touched_target = touched.target_ranges();
289 let touched_parent = touched.parent_ranges();
290 let mut tmp1 = RangeSet::default();
291 range_set_difference(&mut tmp1, rs, &touched_target);
292 let mut tmp2 = RangeSet::default();
293 range_set_shift_diff(&mut tmp2, &tmp1, diff);
294 range_set_union(out, &tmp2, &touched_parent);
295 *touched_out = touched;
296}
297
298#[derive(Clone, Debug)]
300pub struct LineLogFile {
301 pub path: String,
302 pub ranges: RangeSet,
303}
304
305pub type LineLogState = HashMap<ObjectId, Vec<LineLogFile>>;
307
308fn search_insert_path(files: &[LineLogFile], path: &str) -> usize {
309 match files.binary_search_by_key(&path, |f| f.path.as_str()) {
310 Ok(i) => i,
311 Err(i) => i,
312 }
313}
314
315fn line_log_insert(files: &mut Vec<LineLogFile>, path: String, begin: i64, end: i64) {
316 let idx = search_insert_path(files, &path);
317 if idx < files.len() && files[idx].path == path {
318 files[idx].ranges.append(begin, end);
319 files[idx].ranges.sort_and_merge();
320 } else {
321 let mut rs = RangeSet::default();
322 rs.append(begin, end);
323 files.insert(idx, LineLogFile { path, ranges: rs });
324 }
325}
326
327fn fill_line_ends(data: &[u8]) -> (i64, Vec<usize>) {
328 let mut ends: Vec<usize> = vec![0];
329 let mut num = 0usize;
330 while num < data.len() {
331 if data[num] == b'\n' || num == data.len() - 1 {
332 ends.push(num);
333 }
334 num += 1;
335 }
336 let lines = (ends.len() as i64) - 1;
337 (lines, ends)
338}
339
340fn line_byte_offset(line: i64, ends: &[usize]) -> usize {
342 if line <= 0 {
343 0
344 } else {
345 ends[line as usize] + 1
346 }
347}
348
349fn nth_line<'a>(data: &'a [u8], line: i64, ends: &[usize]) -> &'a [u8] {
350 let idx = line_byte_offset(line, ends);
351 &data[idx..]
352}
353
354fn line_content_git_style<'a>(
356 data: &'a [u8],
357 line_idx: i64,
358 ends: &[usize],
359 total_lines: i64,
360) -> &'a [u8] {
361 let start = line_byte_offset(line_idx, ends);
362 let end = if line_idx + 1 >= total_lines {
363 data.len()
364 } else {
365 line_byte_offset(line_idx + 1, ends)
366 };
367 &data[start..end]
368}
369
370fn parse_loc<'a>(
373 spec: &'a str,
374 data: &[u8],
375 ends: &[usize],
376 lines: i64,
377 anchor_for_regex: i64,
378 rel_begin_line: i64,
379) -> Result<(i64, &'a str)> {
380 let rel_ok = rel_begin_line >= 1;
383
384 let bytes = spec.as_bytes();
385 if rel_ok && !spec.is_empty() && (bytes[0] == b'+' || bytes[0] == b'-') {
386 let neg = bytes[0] == b'-';
387 let rest = &spec[1..];
388 let (digits, after) = split_prefix_digits(rest);
389 if digits.is_empty() {
390 return Err(Error::CorruptObject("-L invalid relative range".to_owned()));
391 }
392 let num: i64 = digits
393 .parse()
394 .map_err(|_| Error::CorruptObject("-L invalid relative range".to_owned()))?;
395 let num = if neg { -num } else { num };
396 let ret = if num > 0 {
397 rel_begin_line + num - 2
398 } else if num == 0 {
399 rel_begin_line
400 } else {
401 (rel_begin_line + num).max(1)
402 };
403 return Ok((ret, after));
404 }
405
406 let (digits, after_num) = split_prefix_digits(spec);
407 if !digits.is_empty() {
408 let num: i64 = digits
409 .parse()
410 .map_err(|_| Error::CorruptObject("-L invalid line number".to_owned()))?;
411 if num <= 0 {
412 return Err(Error::CorruptObject("-L invalid line number".to_owned()));
413 }
414 return Ok((num, after_num));
415 }
416
417 let mut s = spec;
418 let mut anchor_line = anchor_for_regex;
419 if anchor_line < 0 {
420 anchor_line = -anchor_line;
421 } else if s.starts_with('^') {
422 anchor_line = 1;
423 s = &s[1..];
424 }
425
426 let s_bytes = s.as_bytes();
427 if s_bytes.first() != Some(&b'/') {
428 return Err(Error::CorruptObject("malformed -L argument".to_owned()));
429 }
430 let mut i = 1usize;
431 let mut pattern = String::new();
432 while i < s_bytes.len() {
433 if s_bytes[i] == b'/' {
434 break;
435 }
436 if s_bytes[i] == b'\\' && i + 1 < s_bytes.len() {
437 pattern.push(s_bytes[i + 1] as char);
438 i += 2;
439 continue;
440 }
441 pattern.push(s_bytes[i] as char);
442 i += 1;
443 }
444 if i >= s_bytes.len() || s_bytes[i] != b'/' {
445 return Err(Error::CorruptObject("malformed -L regex".to_owned()));
446 }
447 let rest = &s[i + 1..];
448
449 let begin0 = (anchor_line - 1).max(0).min(lines.saturating_sub(1).max(0));
450 let line_start = nth_line(data, begin0, ends);
451 let re = BytesRegexBuilder::new(&pattern)
452 .multi_line(true)
453 .build()
454 .map_err(|e| Error::CorruptObject(format!("regex: {e}")))?;
455 let mat = re
456 .find(line_start)
457 .ok_or_else(|| Error::CorruptObject("no match".to_owned()))?;
458 let mut line_idx = begin0;
459 let mut line_beg = line_byte_offset(line_idx, ends);
460 let cp = line_byte_offset(begin0, ends) + mat.start();
461 while line_idx < lines {
462 let next_beg = line_byte_offset(line_idx + 1, ends);
463 if line_beg <= cp && cp < next_beg {
464 break;
465 }
466 line_idx += 1;
467 line_beg = next_beg;
468 }
469 Ok((line_idx + 1, rest))
470}
471
472fn split_prefix_digits(s: &str) -> (&str, &str) {
473 let end = s
474 .as_bytes()
475 .iter()
476 .take_while(|b| b.is_ascii_digit())
477 .count();
478 (&s[..end], &s[end..])
479}
480
481fn match_funcname_line(line: &[u8]) -> bool {
482 let Some(&c) = line.first() else {
483 return false;
484 };
485 c.is_ascii_alphabetic() || c == b'_' || c == b'$'
486}
487
488fn line_matches_funcname(matcher: Option<&FuncnameMatcher>, line: &[u8]) -> bool {
489 let mut end = line.len();
490 while end > 0 && (line[end - 1] == b'\n' || line[end - 1] == b'\r') {
491 end -= 1;
492 }
493 let body = &line[..end];
494 if let Some(m) = matcher {
495 let s = String::from_utf8_lossy(body);
496 m.match_line(s.as_ref()).is_some()
497 } else {
498 match_funcname_line(body)
499 }
500}
501
502fn funcname_matcher_for_path(
505 git_dir: &Path,
506 work_tree: Option<&Path>,
507 path: &str,
508) -> Option<FuncnameMatcher> {
509 let wt = work_tree?;
510 let rules = load_gitattributes(wt);
511 let config = ConfigSet::load(Some(git_dir), true).unwrap_or_default();
512 let fa = get_file_attrs(&rules, path, false, &config);
513 let DiffAttr::Driver(ref name) = fa.diff_attr else {
514 return None;
515 };
516 matcher_for_driver(&config, name).ok().flatten()
517}
518
519fn parse_range_funcname<'a>(
520 arg: &'a str,
521 data: &[u8],
522 ends: &[usize],
523 lines: i64,
524 mut anchor: i64,
525 userdiff: Option<&FuncnameMatcher>,
526) -> Result<(i64, i64, &'a str)> {
527 let mut s = arg;
528 if s.starts_with('^') && s.get(1..2) == Some(":") {
529 anchor = 1;
530 s = &s[2..];
531 } else if s.starts_with(':') {
532 s = &s[1..];
533 } else {
534 return Err(Error::CorruptObject("bad funcname range".to_owned()));
535 }
536
537 let mut i = 0usize;
538 let b = s.as_bytes();
539 while i < b.len() && b[i] != b':' {
540 if b[i] == b'\\' && i + 1 < b.len() {
541 i += 2;
542 } else {
543 i += 1;
544 }
545 }
546 if i >= b.len() {
547 return Err(Error::CorruptObject("bad funcname range".to_owned()));
548 }
549 let pattern = &s[..i];
550 let after_colon = i + 1;
551
552 let anchor0 = (anchor - 1).max(0).min(lines.saturating_sub(1).max(0));
553
554 if pattern == "^" || pattern == "$" || pattern == ".*" {
556 let mut line_idx = anchor0;
557 let mut found_begin = None;
558 while line_idx < lines {
559 let slice = line_content_git_style(data, line_idx, ends, lines);
560 if line_matches_funcname(userdiff, slice) {
561 found_begin = Some(line_idx);
562 break;
563 }
564 line_idx += 1;
565 }
566 let begin = found_begin.ok_or_else(|| Error::CorruptObject("no match".to_owned()))?;
567 let mut end = begin + 1;
568 while end < lines {
569 let slice = line_content_git_style(data, end, ends, lines);
570 if line_matches_funcname(userdiff, slice) {
571 break;
572 }
573 end += 1;
574 }
575 let tail = s.get(after_colon..).unwrap_or("");
576 return Ok((begin + 1, end, tail));
577 }
578
579 let start_search = nth_line(data, anchor0, ends);
580 let re = BytesRegexBuilder::new(pattern)
581 .multi_line(true)
582 .build()
583 .map_err(|e| Error::CorruptObject(format!("regex: {e}")))?;
584
585 let mut p = start_search;
586 let found_bol = loop {
587 let Some(m) = re.find(p) else {
588 break None;
589 };
590 let match_start = p.as_ptr() as usize - data.as_ptr() as usize + m.start();
591 let mut bol = match_start;
592 while bol > 0 && data[bol - 1] != b'\n' {
593 bol -= 1;
594 }
595 let mut eol = match_start;
596 while eol < data.len() && data[eol] != b'\n' {
597 eol += 1;
598 }
599 if eol < data.len() {
600 eol += 1;
601 }
602 let line_slice = &data[bol..eol.min(data.len())];
603 if line_matches_funcname(userdiff, line_slice) {
604 break Some(bol);
605 }
606 p = &data[eol.min(data.len())..];
607 if p.is_empty() {
608 break None;
609 }
610 };
611
612 let bol = found_bol.ok_or_else(|| Error::CorruptObject("no match".to_owned()))?;
613
614 let mut begin = 0i64;
615 while begin < lines {
616 if line_byte_offset(begin, ends) == bol {
617 break;
618 }
619 begin += 1;
620 }
621 if begin >= lines {
622 return Err(Error::CorruptObject("funcname matches at EOF".to_owned()));
623 }
624
625 let mut end = begin + 1;
626 while end < lines {
627 let slice = line_content_git_style(data, end, ends, lines);
628 if line_matches_funcname(userdiff, slice) {
629 break;
630 }
631 end += 1;
632 }
633
634 let tail = s.get(after_colon..).unwrap_or("");
635 Ok((begin + 1, end, tail))
636}
637
638fn split_slash_regex_range(arg: &str) -> Result<(&str, Option<&str>)> {
640 let b = arg.as_bytes();
641 if b.first() != Some(&b'/') {
642 return Err(Error::CorruptObject("malformed -L argument".to_owned()));
643 }
644 let mut pos = 0usize;
645 let start_first = pos;
646 pos += 1;
647 while pos < b.len() {
648 if b[pos] == b'\\' {
649 pos = (pos + 2).min(b.len());
650 continue;
651 }
652 if b[pos] == b'/' {
653 pos += 1;
654 break;
655 }
656 pos += 1;
657 }
658 if pos > b.len() || pos == start_first + 1 {
659 return Err(Error::CorruptObject("malformed -L argument".to_owned()));
660 }
661 let first = &arg[start_first..pos];
662 if pos >= arg.len() {
663 return Ok((first, None));
664 }
665 if b.get(pos).copied() != Some(b',') {
666 return Err(Error::CorruptObject("malformed -L argument".to_owned()));
667 }
668 pos += 1;
669 if b.get(pos).copied() != Some(b'/') {
670 return Err(Error::CorruptObject("malformed -L argument".to_owned()));
671 }
672 let start_second = pos;
673 pos += 1;
674 while pos < b.len() {
675 if b[pos] == b'\\' {
676 pos = (pos + 2).min(b.len());
677 continue;
678 }
679 if b[pos] == b'/' {
680 pos += 1;
681 break;
682 }
683 pos += 1;
684 }
685 if pos != arg.len() {
686 return Err(Error::CorruptObject("malformed -L argument".to_owned()));
687 }
688 Ok((first, Some(&arg[start_second..pos])))
689}
690
691fn parse_range_arg(
692 arg: &str,
693 data: &[u8],
694 ends: &[usize],
695 lines: i64,
696 anchor: i64,
697 userdiff: Option<&FuncnameMatcher>,
698) -> Result<(i64, i64)> {
699 let (begin, end) = if arg.starts_with(':') || arg.starts_with("^:") {
700 let (b, e, rest) = parse_range_funcname(arg, data, ends, lines, anchor, userdiff)?;
701 if !rest.is_empty() {
702 return Err(Error::CorruptObject("malformed -L argument".to_owned()));
703 }
704 (b, e)
705 } else if arg.starts_with('/') {
706 let (a, second) = split_slash_regex_range(arg)?;
707 let (b1, r1) = parse_loc(a, data, ends, lines, -anchor, 0)?;
708 if !r1.is_empty() {
709 return Err(Error::CorruptObject("malformed -L argument".to_owned()));
710 }
711 let (b2, r2) = if let Some(s2) = second {
712 parse_loc(s2, data, ends, lines, b1 + 1, b1 + 1)?
713 } else {
714 (b1, "")
715 };
716 if !r2.is_empty() {
717 return Err(Error::CorruptObject("malformed -L argument".to_owned()));
718 }
719 let mut b1 = b1;
720 let mut b2 = b2;
721 if b1 != 0 && b2 != 0 && b2 < b1 {
722 std::mem::swap(&mut b1, &mut b2);
723 }
724 (b1, b2)
725 } else if arg.contains(',') {
726 let comma = arg
727 .find(',')
728 .ok_or_else(|| Error::CorruptObject("malformed -L argument".to_owned()))?;
729 let (a, b) = arg.split_at(comma);
730 let b = &b[1..];
731 let (b1, r1) = if a.is_empty() {
732 (1i64, "")
733 } else {
734 parse_loc(a, data, ends, lines, -anchor, 0)?
735 };
736 if !r1.is_empty() {
737 return Err(Error::CorruptObject("malformed -L argument".to_owned()));
738 }
739 let (b2, r2) = if b.is_empty() {
740 (b1, "")
741 } else {
742 parse_loc(b, data, ends, lines, b1 + 1, b1 + 1)?
743 };
744 if !r2.is_empty() {
745 return Err(Error::CorruptObject("malformed -L argument".to_owned()));
746 }
747 let mut b1 = b1;
748 let mut b2 = b2;
749 if b1 != 0 && b2 != 0 && b2 < b1 {
750 std::mem::swap(&mut b1, &mut b2);
751 }
752 (b1, b2)
753 } else {
754 let (n, rest) = parse_loc(arg, data, ends, lines, -anchor, 0)?;
755 if !rest.is_empty() {
756 return Err(Error::CorruptObject("malformed -L argument".to_owned()));
757 }
758 (n, 0)
761 };
762
763 if (lines == 0 && (begin != 0 || end != 0)) || (lines > 0 && lines < begin) {
764 return Err(Error::CorruptObject(format!("file has only {lines} lines")));
765 }
766 let mut begin = begin;
767 let mut end = end;
768 if begin < 1 {
769 begin = 1;
770 }
771 if end < 1 || lines < end {
772 end = lines;
773 }
774 begin -= 1;
775 Ok((begin, end))
776}
777
778fn scan_funcname_pattern_colon(s: &str) -> Option<usize> {
780 let b = s.as_bytes();
781 let mut i = 0usize;
782 while i < b.len() {
783 if b[i] == b':' {
784 return Some(i);
785 }
786 if b[i] == b'\\' && i + 1 < b.len() {
787 i += 2;
788 } else {
789 i += 1;
790 }
791 }
792 None
793}
794
795fn split_regex_line_range(spec: &str) -> Result<(&str, &str)> {
797 let b = spec.as_bytes();
798 if b.first() != Some(&b'/') {
799 return Err(Error::CorruptObject("malformed -L argument".to_owned()));
800 }
801 let mut pos = 0usize;
802 loop {
803 if pos >= b.len() || b[pos] != b'/' {
804 return Err(Error::CorruptObject("malformed -L argument".to_owned()));
805 }
806 pos += 1;
807 while pos < b.len() {
808 if b[pos] == b'\\' {
809 pos = (pos + 2).min(b.len());
810 continue;
811 }
812 if b[pos] == b'/' {
813 pos += 1;
814 break;
815 }
816 pos += 1;
817 }
818 if pos > b.len() {
819 return Err(Error::CorruptObject("malformed -L argument".to_owned()));
820 }
821 if pos < b.len() && b[pos] == b',' {
822 pos += 1;
823 continue;
824 }
825 break;
826 }
827 if pos >= b.len() || b[pos] != b':' {
828 return Err(Error::CorruptObject("malformed -L argument".to_owned()));
829 }
830 let path = &spec[pos + 1..];
831 if path.is_empty() {
832 return Err(Error::CorruptObject(
833 "-L argument not 'start,end:file'".to_owned(),
834 ));
835 }
836 Ok((&spec[..pos], path))
837}
838
839fn split_l_arg(spec: &str) -> Result<(&str, &str)> {
841 if spec.is_empty() {
842 return Err(Error::CorruptObject(
843 "-L argument not 'start,end:file'".to_owned(),
844 ));
845 }
846 if let Some(rest) = spec.strip_prefix("^:") {
847 let idx = scan_funcname_pattern_colon(rest)
848 .ok_or_else(|| Error::CorruptObject("-L argument not 'start,end:file'".to_owned()))?;
849 let path = &rest[idx + 1..];
850 if path.is_empty() {
851 return Err(Error::CorruptObject(
852 "-L argument not 'start,end:file'".to_owned(),
853 ));
854 }
855 return Ok((&spec[..2 + idx + 1], path));
857 }
858 if let Some(rest) = spec.strip_prefix(':') {
859 let idx = scan_funcname_pattern_colon(rest)
860 .ok_or_else(|| Error::CorruptObject("-L argument not 'start,end:file'".to_owned()))?;
861 let path = &rest[idx + 1..];
862 if path.is_empty() {
863 return Err(Error::CorruptObject(
864 "-L argument not 'start,end:file'".to_owned(),
865 ));
866 }
867 return Ok((&spec[..1 + idx + 1], path));
869 }
870 if spec.starts_with('/') {
871 return split_regex_line_range(spec)
872 .map_err(|_| Error::CorruptObject("-L argument not 'start,end:file'".to_owned()));
873 }
874 let idx = spec
875 .rfind(':')
876 .ok_or_else(|| Error::CorruptObject("-L argument not 'start,end:file'".to_owned()))?;
877 if idx == 0 {
878 return Err(Error::CorruptObject(
879 "-L argument not 'start,end:file'".to_owned(),
880 ));
881 }
882 let path = &spec[idx + 1..];
883 if path.is_empty() {
884 return Err(Error::CorruptObject(
885 "-L argument not 'start,end:file'".to_owned(),
886 ));
887 }
888 Ok((&spec[..idx], path))
889}
890
891fn read_blob_at_path(odb: &Odb, tree_oid: &ObjectId, path: &str) -> Result<Vec<u8>> {
892 let mut current = *tree_oid;
893 let parts: Vec<&str> = path.split('/').filter(|p| !p.is_empty()).collect();
894 for (pi, part) in parts.iter().enumerate() {
895 let obj = odb.read(¤t)?;
896 if obj.kind != ObjectKind::Tree {
897 return Err(Error::CorruptObject("not a tree".to_owned()));
898 }
899 let entries = parse_tree(&obj.data)?;
900 let name = part.as_bytes();
901 let mut found: Option<&TreeEntry> = None;
902 for e in &entries {
903 if e.name == name {
904 found = Some(e);
905 break;
906 }
907 }
908 let entry = found
909 .ok_or_else(|| Error::CorruptObject(format!("There is no path {path} in commit")))?;
910 if pi + 1 == parts.len() {
911 if (entry.mode & 0o170000) != 0o100000 {
912 return Err(Error::CorruptObject(format!(
913 "There is no path {path} in commit"
914 )));
915 }
916 let blob = odb.read(&entry.oid)?;
917 if blob.kind != ObjectKind::Blob {
918 return Err(Error::CorruptObject("not a blob".to_owned()));
919 }
920 return Ok(blob.data);
921 }
922 if entry.mode != 0o040000 {
923 return Err(Error::CorruptObject(format!(
924 "There is no path {path} in commit"
925 )));
926 }
927 current = entry.oid;
928 }
929 Err(Error::CorruptObject(format!(
930 "There is no path {path} in commit"
931 )))
932}
933
934pub fn parse_line_log_ranges(
936 odb: &Odb,
937 git_dir: &Path,
938 work_tree: Option<&Path>,
939 tip_oid: &ObjectId,
940 specs: &[String],
941) -> Result<Vec<LineLogFile>> {
942 let tip_obj = odb.read(tip_oid)?;
943 if tip_obj.kind != ObjectKind::Commit {
944 return Err(Error::CorruptObject("tip not a commit".to_owned()));
945 }
946 let tip_commit = parse_commit(&tip_obj.data)?;
947 let tree_oid = tip_commit.tree;
948
949 let mut files: Vec<LineLogFile> = Vec::new();
950
951 for spec in specs {
952 let (range_part, path) = split_l_arg(spec)?;
953 let userdiff = funcname_matcher_for_path(git_dir, work_tree, path);
954 let blob_data = read_blob_at_path(odb, &tree_oid, path)?;
955 let (lines, ends) = fill_line_ends(&blob_data);
956
957 let idx = search_insert_path(&files, path);
958 let anchor = if idx < files.len() && files[idx].path == path {
959 files[idx].ranges.ranges.last().map_or(1, |x| x.end + 1)
960 } else if idx > 0 && files[idx - 1].path == path {
961 files[idx - 1].ranges.ranges.last().map_or(1, |x| x.end + 1)
962 } else {
963 1
964 };
965
966 let (begin, end) = parse_range_arg(
967 range_part,
968 &blob_data,
969 &ends,
970 lines,
971 anchor,
972 userdiff.as_ref(),
973 )?;
974 line_log_insert(&mut files, path.to_owned(), begin, end);
975 }
976
977 for f in &mut files {
978 f.ranges.sort_and_merge();
979 }
980 Ok(files)
981}
982
983fn copy_line_state(files: &[LineLogFile]) -> Vec<LineLogFile> {
984 files
985 .iter()
986 .map(|f| LineLogFile {
987 path: f.path.clone(),
988 ranges: RangeSet {
989 ranges: f.ranges.ranges.clone(),
990 },
991 })
992 .collect()
993}
994
995fn filter_entries_for_paths(entries: Vec<DiffEntry>, paths: &[String]) -> Vec<DiffEntry> {
996 let set: HashSet<&str> = paths.iter().map(|p| p.as_str()).collect();
997 entries
998 .into_iter()
999 .filter(|e| {
1000 e.new_path
1001 .as_deref()
1002 .map(|p| set.contains(p))
1003 .unwrap_or(false)
1004 })
1005 .collect()
1006}
1007
1008fn paths_from_range_files(range: &[LineLogFile]) -> Vec<String> {
1009 let mut v: Vec<String> = range
1010 .iter()
1011 .filter(|f| !f.ranges.is_empty())
1012 .map(|f| f.path.clone())
1013 .collect();
1014 v.sort();
1015 v.dedup();
1016 v
1017}
1018
1019fn diff_tree_pair(
1020 odb: &Odb,
1021 old_tree: Option<&ObjectId>,
1022 new_tree: &ObjectId,
1023 paths: &[String],
1024 rename_threshold: u32,
1025) -> Result<Vec<DiffEntry>> {
1026 let old = old_tree.copied();
1027 let mut entries = diff_trees(odb, old.as_ref(), Some(new_tree), "")?;
1028 if rename_threshold < 100 {
1029 entries = detect_renames(odb, None, entries, rename_threshold);
1030 }
1031 Ok(filter_entries_for_paths(entries, paths))
1032}
1033
1034struct FileDiffArtifacts {
1035 old_path: String,
1036 new_path: String,
1037 old_text: String,
1038 new_text: String,
1039 diff: DiffRanges,
1040}
1041
1042#[derive(Clone, Debug)]
1044pub struct LineLogDisplay {
1045 pub old_path: String,
1046 pub new_path: String,
1047 pub old_bytes: Vec<u8>,
1048 pub new_bytes: Vec<u8>,
1049 pub commit_ranges: RangeSet,
1050 pub touched: DiffRanges,
1051}
1052
1053fn load_pair_content(
1054 odb: &Odb,
1055 entry: &DiffEntry,
1056 range_files: &[LineLogFile],
1057) -> Result<Option<FileDiffArtifacts>> {
1058 let new_path = entry
1059 .new_path
1060 .as_deref()
1061 .ok_or_else(|| Error::CorruptObject("diff entry missing path".to_owned()))?;
1062 let rg = range_files.iter().find(|f| f.path == new_path);
1063 let Some(rg) = rg else {
1064 return Ok(None);
1065 };
1066 if rg.ranges.is_empty() {
1067 return Ok(None);
1068 }
1069
1070 let z = zero_oid();
1071 let new_bytes = if entry.new_oid == z {
1072 Vec::new()
1073 } else {
1074 odb.read(&entry.new_oid)?.data
1075 };
1076 let old_bytes = if entry.old_oid == z {
1077 Vec::new()
1078 } else {
1079 odb.read(&entry.old_oid)?.data
1080 };
1081 let old_text = String::from_utf8_lossy(&old_bytes).into_owned();
1082 let new_text = String::from_utf8_lossy(&new_bytes).into_owned();
1083 let diff = collect_diff_ranges(&old_text, &new_text);
1084 let old_path = entry
1085 .old_path
1086 .clone()
1087 .unwrap_or_else(|| "/dev/null".to_owned());
1088 Ok(Some(FileDiffArtifacts {
1089 old_path,
1090 new_path: new_path.to_owned(),
1091 old_text,
1092 new_text,
1093 diff,
1094 }))
1095}
1096
1097fn process_diff_filepair(
1098 artifacts: &FileDiffArtifacts,
1099 range_files: &mut [LineLogFile],
1100) -> Result<Option<DiffRanges>> {
1101 let idx = range_files
1102 .iter()
1103 .position(|f| f.path == artifacts.new_path)
1104 .ok_or_else(|| Error::CorruptObject("range file missing".to_owned()))?;
1105 if range_files[idx].ranges.is_empty() {
1106 return Ok(None);
1107 }
1108
1109 let mut out_ranges = RangeSet::default();
1110 let mut touched = DiffRanges::default();
1111 range_set_map_across_diff(
1112 &mut out_ranges,
1113 &range_files[idx].ranges,
1114 &artifacts.diff,
1115 &mut touched,
1116 );
1117
1118 range_files[idx].path = artifacts.old_path.clone();
1119 range_files[idx].ranges = out_ranges;
1120
1121 if touched.hunks.is_empty() {
1122 Ok(None)
1123 } else {
1124 Ok(Some(touched))
1125 }
1126}
1127
1128fn process_all_files(
1129 odb: &Odb,
1130 queue: &[DiffEntry],
1131 range: &[LineLogFile],
1132) -> Result<(Vec<LineLogFile>, Vec<LineLogDisplay>, bool)> {
1133 let mut out = copy_line_state(range);
1134 let mut displays: Vec<LineLogDisplay> = Vec::new();
1135 let mut changed = false;
1136 for entry in queue {
1137 let Some(art) = load_pair_content(odb, entry, range)? else {
1138 continue;
1139 };
1140 let commit_ranges = range
1141 .iter()
1142 .find(|f| f.path == art.new_path)
1143 .map(|f| RangeSet {
1144 ranges: f.ranges.ranges.clone(),
1145 })
1146 .unwrap_or_default();
1147 if let Some(touched) = process_diff_filepair(&art, &mut out)? {
1148 if touched
1151 .hunks
1152 .iter()
1153 .any(|h| (h.parent.start < h.parent.end) || (h.target.start < h.target.end))
1154 {
1155 changed = true;
1156 }
1157 displays.push(LineLogDisplay {
1158 old_path: art.old_path.clone(),
1159 new_path: art.new_path.clone(),
1160 old_bytes: art.old_text.as_bytes().to_vec(),
1161 new_bytes: art.new_text.as_bytes().to_vec(),
1162 commit_ranges,
1163 touched,
1164 });
1165 }
1166 }
1167 Ok((out, displays, changed))
1168}
1169
1170fn process_ranges_ordinary_commit(
1171 odb: &Odb,
1172 parents: &[ObjectId],
1173 tree_oid: &ObjectId,
1174 range: &[LineLogFile],
1175 rename_threshold: u32,
1176 state: &mut LineLogState,
1177 display: &mut HashMap<ObjectId, Vec<LineLogDisplay>>,
1178 commit_oid: ObjectId,
1179) -> Result<bool> {
1180 let parent = parents.first();
1181 let parent_tree = parent
1182 .map(|p| parse_commit(&odb.read(p)?.data).map(|c| c.tree))
1183 .transpose()?;
1184 let path_keys = paths_from_range_files(range);
1185 let queue = diff_tree_pair(
1186 odb,
1187 parent_tree.as_ref(),
1188 tree_oid,
1189 &path_keys,
1190 rename_threshold,
1191 )?;
1192 let (parent_range, disp, changed) = process_all_files(odb, &queue, range)?;
1193 if !disp.is_empty() {
1194 display.insert(commit_oid, disp);
1195 }
1196 if let Some(p) = parent {
1197 state.insert(*p, parent_range);
1198 }
1199 Ok(changed)
1200}
1201
1202fn process_ranges_merge_commit(
1203 odb: &Odb,
1204 parents: &[ObjectId],
1205 tree_oid: &ObjectId,
1206 range: &[LineLogFile],
1207 rename_threshold: u32,
1208 first_parent_only: bool,
1209 state: &mut LineLogState,
1210 display: &mut HashMap<ObjectId, Vec<LineLogDisplay>>,
1211 commit_oid: ObjectId,
1212) -> Result<bool> {
1213 let nparents = if first_parent_only { 1 } else { parents.len() };
1214
1215 let mut candidates: Vec<Vec<LineLogFile>> = Vec::with_capacity(nparents);
1216
1217 for i in 0..nparents {
1218 let p = parents[i];
1219 let ptree = parse_commit(&odb.read(&p)?.data)?.tree;
1220 let path_keys = paths_from_range_files(range);
1221 let queue = diff_tree_pair(odb, Some(&ptree), tree_oid, &path_keys, rename_threshold)?;
1222 let (prange, _, changed_here) = process_all_files(odb, &queue, range)?;
1223 if !changed_here {
1224 state.insert(p, prange);
1225 state.remove(&commit_oid);
1226 return Ok(false);
1227 }
1228 candidates.push(prange);
1229 }
1230
1231 for (i, p) in parents.iter().enumerate().take(nparents) {
1232 state.insert(*p, candidates[i].clone());
1233 }
1234 state.remove(&commit_oid);
1235
1236 let p0 = parents[0];
1237 let ptree = parse_commit(&odb.read(&p0)?.data)?.tree;
1238 let path_keys = paths_from_range_files(range);
1239 let queue = diff_tree_pair(odb, Some(&ptree), tree_oid, &path_keys, rename_threshold)?;
1240 let (_, disp, _) = process_all_files(odb, &queue, range)?;
1242 if !disp.is_empty() {
1243 display.insert(commit_oid, disp);
1244 }
1245 Ok(true)
1246}
1247
1248pub fn line_log_filter_commits(
1250 odb: &Odb,
1251 commits: Vec<ObjectId>,
1252 tip: ObjectId,
1253 initial: Vec<LineLogFile>,
1254 rename_threshold: u32,
1255 first_parent_only: bool,
1256) -> Result<(
1257 Vec<ObjectId>,
1258 LineLogState,
1259 HashMap<ObjectId, Vec<LineLogDisplay>>,
1260)> {
1261 let mut state: LineLogState = HashMap::new();
1262 state.insert(tip, initial);
1263 let mut display_map: HashMap<ObjectId, Vec<LineLogDisplay>> = HashMap::new();
1264 let mut keep_commit: HashMap<ObjectId, bool> = HashMap::new();
1265
1266 for &oid in &commits {
1267 let Some(range) = state.get(&oid).cloned() else {
1268 continue;
1269 };
1270 if range.iter().all(|f| f.ranges.is_empty()) {
1271 continue;
1272 }
1273
1274 let c = parse_commit(&odb.read(&oid)?.data)?;
1275 let parents = c.parents.clone();
1276
1277 let show = if parents.len() > 1 {
1278 process_ranges_merge_commit(
1279 odb,
1280 &parents,
1281 &c.tree,
1282 &range,
1283 rename_threshold,
1284 first_parent_only,
1285 &mut state,
1286 &mut display_map,
1287 oid,
1288 )?
1289 } else {
1290 process_ranges_ordinary_commit(
1291 odb,
1292 &parents,
1293 &c.tree,
1294 &range,
1295 rename_threshold,
1296 &mut state,
1297 &mut display_map,
1298 oid,
1299 )?
1300 };
1301 keep_commit.insert(oid, show);
1302 }
1303
1304 let filtered: Vec<ObjectId> = commits
1305 .iter()
1306 .copied()
1307 .filter(|o| *keep_commit.get(o).unwrap_or(&false))
1308 .collect();
1309
1310 Ok((filtered, state, display_map))
1311}
1312
1313pub fn rewritten_first_parent(
1315 odb: &Odb,
1316 oid: &ObjectId,
1317 filtered: &HashSet<ObjectId>,
1318) -> Result<Option<ObjectId>> {
1319 let c = parse_commit(&odb.read(oid)?.data)?;
1320 let Some(mut p) = c.parents.first().copied() else {
1321 return Ok(None);
1322 };
1323 while !filtered.contains(&p) {
1324 let pc = parse_commit(&odb.read(&p)?.data)?;
1325 let Some(np) = pc.parents.first().copied() else {
1326 return Ok(Some(p));
1327 };
1328 p = np;
1329 }
1330 Ok(Some(p))
1331}
1332
1333pub fn format_line_log_diff(
1335 prefix: &str,
1336 old_path: &str,
1337 new_path: &str,
1338 old_data: &[u8],
1339 new_data: &[u8],
1340 ranges: &RangeSet,
1341 touched: &DiffRanges,
1342) -> String {
1343 let (p_lines, p_ends) = fill_line_ends(old_data);
1344 let (t_lines, t_ends) = fill_line_ends(new_data);
1345 let _ = (p_lines, t_lines);
1346
1347 let mut out = String::new();
1348 if touched.hunks.is_empty() {
1349 return out;
1350 }
1351
1352 out.push_str(prefix);
1353 out.push('\n');
1354 out.push_str(prefix);
1355 out.push_str("diff --git a/");
1356 out.push_str(new_path);
1357 out.push_str(" b/");
1358 out.push_str(new_path);
1359 out.push('\n');
1360 out.push_str(prefix);
1361 if old_path == "/dev/null" {
1362 out.push_str("--- /dev/null\n");
1363 } else {
1364 out.push_str("--- a/");
1365 out.push_str(old_path);
1366 out.push('\n');
1367 }
1368 out.push_str(prefix);
1369 out.push_str("+++ b/");
1370 out.push_str(new_path);
1371 out.push('\n');
1372
1373 let mut j = 0usize;
1374 for i in 0..ranges.ranges.len() {
1375 let t_start = ranges.ranges[i].start;
1376 let t_end = ranges.ranges[i].end;
1377 let mut t_cur = t_start;
1378
1379 if j > 0 && touched.hunks[j - 1].target.end > t_start {
1380 j -= 1;
1381 }
1382 while j < touched.hunks.len() && touched.hunks[j].target.end < t_start {
1383 j += 1;
1384 }
1385 if j >= touched.hunks.len() || touched.hunks[j].target.start >= t_end {
1386 continue;
1387 }
1388
1389 let mut j_last = j;
1390 while j_last < touched.hunks.len() && touched.hunks[j_last].target.start < t_end {
1391 j_last += 1;
1392 }
1393 if j_last > j {
1394 j_last -= 1;
1395 }
1396
1397 let p_start = if t_start < touched.hunks[j].target.start {
1398 touched.hunks[j].parent.start - (touched.hunks[j].target.start - t_start)
1399 } else {
1400 touched.hunks[j].parent.start
1401 };
1402 let p_end = if t_end > touched.hunks[j_last].target.end {
1403 touched.hunks[j_last].parent.end + (t_end - touched.hunks[j_last].target.end)
1404 } else {
1405 touched.hunks[j_last].parent.end
1406 };
1407
1408 let (p_start, p_end) = if p_start == 0 && p_end == 0 {
1409 (-1, -1)
1410 } else {
1411 (p_start, p_end)
1412 };
1413
1414 out.push_str(prefix);
1415 out.push_str(&format!(
1416 "@@ -{},{} +{},{} @@\n",
1417 p_start + 1,
1418 p_end - p_start,
1419 t_start + 1,
1420 t_end - t_start
1421 ));
1422
1423 while j < touched.hunks.len() && touched.hunks[j].target.start < t_end {
1424 while t_cur < touched.hunks[j].target.start {
1425 print_line(&mut out, prefix, ' ', t_cur, &t_ends, new_data);
1426 t_cur += 1;
1427 }
1428 let ps = touched.hunks[j].parent.start;
1429 let pe = touched.hunks[j].parent.end;
1430 for k in ps..pe {
1431 print_line(&mut out, prefix, '-', k, &p_ends, old_data);
1432 }
1433 while t_cur < touched.hunks[j].target.end && t_cur < t_end {
1434 print_line(&mut out, prefix, '+', t_cur, &t_ends, new_data);
1435 t_cur += 1;
1436 }
1437 j += 1;
1438 }
1439 while t_cur < t_end {
1440 print_line(&mut out, prefix, ' ', t_cur, &t_ends, new_data);
1441 t_cur += 1;
1442 }
1443 }
1444
1445 out
1446}
1447
1448fn print_line(out: &mut String, prefix: &str, first: char, line: i64, ends: &[usize], data: &[u8]) {
1449 let begin = if line == 0 {
1450 0
1451 } else {
1452 ends[line as usize] + 1
1453 };
1454 let end = if (line + 1) as usize >= ends.len() {
1455 data.len()
1456 } else {
1457 ends[(line + 1) as usize] + 1
1458 };
1459 let mut end2 = end.min(data.len());
1460 let had_nl = if end2 > begin && data[end2 - 1] == b'\n' {
1461 end2 -= 1;
1462 true
1463 } else {
1464 false
1465 };
1466 let slice = &data[begin..end2];
1467 out.push_str(prefix);
1468 out.push(first);
1469 out.push_str(&String::from_utf8_lossy(slice));
1470 out.push('\n');
1471 if !had_nl {
1472 out.push_str(prefix);
1473 out.push_str("\\ No newline at end of file\n");
1474 }
1475}