use crate::diff::{detect_renames, diff_trees, zero_oid, DiffEntry};
use crate::error::{Error, Result};
use crate::objects::{parse_commit, parse_tree, ObjectId, ObjectKind, TreeEntry};
use crate::odb::Odb;
use regex::bytes::Regex as BytesRegex;
use similar::{Algorithm, ChangeTag, TextDiff};
use std::collections::{HashMap, HashSet};
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct Range {
pub start: i64,
pub end: i64,
}
#[derive(Clone, Debug, Default)]
pub struct RangeSet {
pub ranges: Vec<Range>,
}
impl RangeSet {
fn append(&mut self, start: i64, end: i64) {
if start < end {
self.ranges.push(Range { start, end });
}
}
fn sort_and_merge(&mut self) {
if self.ranges.is_empty() {
return;
}
self.ranges.sort_by_key(|r| (r.start, r.end));
let mut out: Vec<Range> = Vec::new();
for r in std::mem::take(&mut self.ranges) {
if r.start >= r.end {
continue;
}
if let Some(last) = out.last_mut() {
if r.start <= last.end {
last.end = last.end.max(r.end);
} else {
out.push(r);
}
} else {
out.push(r);
}
}
self.ranges = out;
}
fn is_empty(&self) -> bool {
self.ranges.is_empty()
}
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct DiffHunk {
pub parent: Range,
pub target: Range,
}
#[derive(Clone, Debug, Default)]
pub struct DiffRanges {
pub hunks: Vec<DiffHunk>,
}
impl DiffRanges {
fn parent_ranges(&self) -> RangeSet {
let mut s = RangeSet::default();
for h in &self.hunks {
s.append(h.parent.start, h.parent.end);
}
s.sort_and_merge();
s
}
fn target_ranges(&self) -> RangeSet {
let mut s = RangeSet::default();
for h in &self.hunks {
s.append(h.target.start, h.target.end);
}
s.sort_and_merge();
s
}
}
fn push_diff_hunk(out: &mut DiffRanges, p0: i64, p1: i64, t0: i64, t1: i64) {
out.hunks.push(DiffHunk {
parent: Range { start: p0, end: p1 },
target: Range { start: t0, end: t1 },
});
}
fn collect_diff_ranges(old: &str, new: &str) -> DiffRanges {
let diff = TextDiff::configure()
.algorithm(Algorithm::Myers)
.diff_lines(old, new);
let mut out = DiffRanges::default();
let old_lines: Vec<&str> = old.lines().collect();
let new_lines: Vec<&str> = new.lines().collect();
let mut o = 0usize;
let mut n = 0usize;
let mut in_hunk = false;
let mut h_o_start = 0usize;
let mut h_n_start = 0usize;
let mut h_o_end = 0usize;
let mut h_n_end = 0usize;
for change in diff.iter_all_changes() {
let len = change.value().lines().count().max(1);
match change.tag() {
ChangeTag::Equal => {
if in_hunk {
push_diff_hunk(
&mut out,
h_o_start as i64,
h_o_end as i64,
h_n_start as i64,
h_n_end as i64,
);
in_hunk = false;
}
o = (o + len).min(old_lines.len());
n = (n + len).min(new_lines.len());
}
ChangeTag::Delete => {
if !in_hunk {
in_hunk = true;
h_o_start = o;
h_n_start = n;
}
h_o_end = (o + len).min(old_lines.len());
h_n_end = n;
o = h_o_end;
}
ChangeTag::Insert => {
if !in_hunk {
in_hunk = true;
h_o_start = o;
h_n_start = n;
}
h_o_end = o;
h_n_end = (n + len).min(new_lines.len());
n = h_n_end;
}
}
}
if in_hunk {
push_diff_hunk(
&mut out,
h_o_start as i64,
h_o_end as i64,
h_n_start as i64,
h_n_end as i64,
);
}
out
}
fn ranges_overlap(a: Range, b: Range) -> bool {
!(a.end <= b.start || b.end <= a.start)
}
fn diff_ranges_filter_touched(diff: &DiffRanges, rs: &RangeSet) -> DiffRanges {
let mut out = DiffRanges::default();
let mut j = 0usize;
for h in &diff.hunks {
let tr = h.target;
while j < rs.ranges.len() && tr.start >= rs.ranges[j].end {
j += 1;
}
if j == rs.ranges.len() {
break;
}
if ranges_overlap(tr, rs.ranges[j]) {
out.hunks.push(*h);
}
}
out
}
fn range_set_difference(out: &mut RangeSet, a: &RangeSet, b: &RangeSet) {
let mut j = 0usize;
for r in &a.ranges {
let mut start = r.start;
let end = r.end;
while start < end {
while j < b.ranges.len() && start >= b.ranges[j].end {
j += 1;
}
if j >= b.ranges.len() || end <= b.ranges[j].start {
out.append(start, end);
break;
}
if start >= b.ranges[j].start {
start = b.ranges[j].end;
} else if end > b.ranges[j].start {
if start < b.ranges[j].start {
out.append(start, b.ranges[j].start);
}
start = b.ranges[j].end;
}
}
}
}
fn range_set_shift_diff(out: &mut RangeSet, rs: &RangeSet, diff: &DiffRanges) {
let mut j = 0usize;
let mut offset: i64 = 0;
for r in &rs.ranges {
while j < diff.hunks.len() && r.start >= diff.hunks[j].target.start {
let h = diff.hunks[j];
offset += (h.parent.end - h.parent.start) - (h.target.end - h.target.start);
j += 1;
}
out.append(r.start + offset, r.end + offset);
}
}
fn range_set_union(out: &mut RangeSet, a: &RangeSet, b: &RangeSet) {
let mut i = 0usize;
let mut j = 0usize;
while i < a.ranges.len() || j < b.ranges.len() {
let (start, end, from_a) = match (i < a.ranges.len(), j < b.ranges.len()) {
(true, true) => {
let ra = a.ranges[i];
let rb = b.ranges[j];
if ra.start < rb.start {
(ra.start, ra.end, true)
} else if rb.start < ra.start {
(rb.start, rb.end, false)
} else if ra.end < rb.end {
(ra.start, ra.end, true)
} else {
(rb.start, rb.end, false)
}
}
(true, false) => {
let ra = a.ranges[i];
(ra.start, ra.end, true)
}
(false, true) => {
let rb = b.ranges[j];
(rb.start, rb.end, false)
}
(false, false) => break,
};
if from_a {
i += 1;
} else {
j += 1;
}
if start >= end {
continue;
}
if let Some(last) = out.ranges.last_mut() {
if last.end < start {
out.ranges.push(Range { start, end });
} else if last.end < end {
last.end = end;
}
} else {
out.ranges.push(Range { start, end });
}
}
}
fn range_set_map_across_diff(
out: &mut RangeSet,
rs: &RangeSet,
diff: &DiffRanges,
touched_out: &mut DiffRanges,
) {
let touched = diff_ranges_filter_touched(diff, rs);
let touched_target = touched.target_ranges();
let touched_parent = touched.parent_ranges();
let mut tmp1 = RangeSet::default();
range_set_difference(&mut tmp1, rs, &touched_target);
let mut tmp2 = RangeSet::default();
range_set_shift_diff(&mut tmp2, &tmp1, diff);
range_set_union(out, &tmp2, &touched_parent);
*touched_out = touched;
}
#[derive(Clone, Debug)]
pub struct LineLogFile {
pub path: String,
pub ranges: RangeSet,
}
pub type LineLogState = HashMap<ObjectId, Vec<LineLogFile>>;
fn search_insert_path(files: &[LineLogFile], path: &str) -> usize {
match files.binary_search_by_key(&path, |f| f.path.as_str()) {
Ok(i) => i,
Err(i) => i,
}
}
fn line_log_insert(files: &mut Vec<LineLogFile>, path: String, begin: i64, end: i64) {
let idx = search_insert_path(files, &path);
if idx < files.len() && files[idx].path == path {
files[idx].ranges.append(begin, end);
files[idx].ranges.sort_and_merge();
} else {
let mut rs = RangeSet::default();
rs.append(begin, end);
files.insert(idx, LineLogFile { path, ranges: rs });
}
}
fn fill_line_ends(data: &[u8]) -> (i64, Vec<usize>) {
let mut ends: Vec<usize> = vec![0];
let mut num = 0usize;
while num < data.len() {
if data[num] == b'\n' || num == data.len() - 1 {
ends.push(num);
}
num += 1;
}
let lines = (ends.len() as i64) - 1;
(lines, ends)
}
fn line_byte_offset(line: i64, ends: &[usize]) -> usize {
if line <= 0 {
0
} else {
ends[line as usize] + 1
}
}
fn nth_line<'a>(data: &'a [u8], line: i64, ends: &[usize]) -> &'a [u8] {
let idx = line_byte_offset(line, ends);
&data[idx..]
}
fn nth_line_end(data: &[u8], line: i64, ends: &[usize], total_lines: i64) -> usize {
if line >= total_lines {
return data.len();
}
ends[line as usize] + 1
}
fn parse_loc<'a>(
spec: &'a str,
data: &[u8],
ends: &[usize],
lines: i64,
anchor_for_regex: i64,
rel_begin_line: i64,
) -> Result<(i64, &'a str)> {
let rel_ok = (1..=lines).contains(&rel_begin_line);
let bytes = spec.as_bytes();
if rel_ok && !spec.is_empty() && (bytes[0] == b'+' || bytes[0] == b'-') {
let neg = bytes[0] == b'-';
let rest = &spec[1..];
let (digits, after) = split_prefix_digits(rest);
if digits.is_empty() {
return Err(Error::CorruptObject("-L invalid relative range".to_owned()));
}
let num: i64 = digits
.parse()
.map_err(|_| Error::CorruptObject("-L invalid relative range".to_owned()))?;
let num = if neg { -num } else { num };
let ret = if num > 0 {
rel_begin_line + num - 2
} else if num == 0 {
rel_begin_line
} else {
(rel_begin_line + num).max(1)
};
return Ok((ret, after));
}
let (digits, after_num) = split_prefix_digits(spec);
if !digits.is_empty() {
let num: i64 = digits
.parse()
.map_err(|_| Error::CorruptObject("-L invalid line number".to_owned()))?;
if num <= 0 {
return Err(Error::CorruptObject("-L invalid line number".to_owned()));
}
return Ok((num, after_num));
}
let mut s = spec;
let mut anchor_line = anchor_for_regex;
if anchor_line < 0 {
anchor_line = -anchor_line;
} else if s.starts_with('^') {
anchor_line = 1;
s = &s[1..];
}
let s_bytes = s.as_bytes();
if s_bytes.first() != Some(&b'/') {
return Err(Error::CorruptObject("malformed -L argument".to_owned()));
}
let mut i = 1usize;
let mut pattern = String::new();
while i < s_bytes.len() {
if s_bytes[i] == b'/' {
break;
}
if s_bytes[i] == b'\\' && i + 1 < s_bytes.len() {
pattern.push(s_bytes[i + 1] as char);
i += 2;
continue;
}
pattern.push(s_bytes[i] as char);
i += 1;
}
if i >= s_bytes.len() || s_bytes[i] != b'/' {
return Err(Error::CorruptObject("malformed -L regex".to_owned()));
}
let rest = &s[i + 1..];
let begin0 = (anchor_line - 1).max(0).min(lines.saturating_sub(1).max(0));
let line_start = nth_line(data, begin0, ends);
let re = BytesRegex::new(&pattern).map_err(|e| Error::CorruptObject(format!("regex: {e}")))?;
let mat = re
.find(line_start)
.ok_or_else(|| Error::CorruptObject("no match".to_owned()))?;
let mut line_idx = begin0;
let mut line_beg = line_byte_offset(line_idx, ends);
let cp = line_byte_offset(begin0, ends) + mat.start();
while line_idx < lines {
let next_beg = line_byte_offset(line_idx + 1, ends);
if line_beg <= cp && cp < next_beg {
break;
}
line_idx += 1;
line_beg = next_beg;
}
Ok((line_idx + 1, rest))
}
fn split_prefix_digits(s: &str) -> (&str, &str) {
let end = s
.as_bytes()
.iter()
.take_while(|b| b.is_ascii_digit())
.count();
(&s[..end], &s[end..])
}
fn match_funcname_line(line: &[u8]) -> bool {
let Some(&c) = line.first() else {
return false;
};
c.is_ascii_alphabetic() || c == b'_' || c == b'$'
}
fn parse_range_funcname<'a>(
arg: &'a str,
data: &[u8],
ends: &[usize],
lines: i64,
mut anchor: i64,
) -> Result<(i64, i64, &'a str)> {
let mut s = arg;
if s.starts_with('^') && s.get(1..2) == Some(":") {
anchor = 1;
s = &s[2..];
} else if s.starts_with(':') {
s = &s[1..];
} else {
return Err(Error::CorruptObject("bad funcname range".to_owned()));
}
let mut i = 0usize;
let b = s.as_bytes();
while i < b.len() && b[i] != b':' {
if b[i] == b'\\' && i + 1 < b.len() {
i += 2;
} else {
i += 1;
}
}
if i >= b.len() {
return Err(Error::CorruptObject("bad funcname range".to_owned()));
}
let pattern = &s[..i];
let after_colon = i + 1;
let anchor0 = (anchor - 1).max(0).min(lines.saturating_sub(1).max(0));
if pattern == "^" || pattern == "$" || pattern == ".*" {
let mut line_idx = anchor0;
let mut found_begin = None;
while line_idx < lines {
let bol = nth_line(data, line_idx, ends);
let eol_idx = nth_line_end(data, line_idx, ends, lines);
let slice = &data[bol.as_ptr() as usize - data.as_ptr() as usize..eol_idx];
if match_funcname_line(slice) {
found_begin = Some(line_idx);
break;
}
line_idx += 1;
}
let begin = found_begin.ok_or_else(|| Error::CorruptObject("no match".to_owned()))?;
let mut end = begin + 1;
while end < lines {
let bol = nth_line(data, end, ends);
let eol_idx = nth_line_end(data, end, ends, lines);
let slice = &data[bol.as_ptr() as usize - data.as_ptr() as usize..eol_idx];
if match_funcname_line(slice) {
break;
}
end += 1;
}
let tail = s.get(after_colon..).unwrap_or("");
return Ok((begin + 1, end, tail));
}
let start_search = nth_line(data, anchor0, ends);
let re = BytesRegex::new(pattern).map_err(|e| Error::CorruptObject(format!("regex: {e}")))?;
let mut p = start_search;
let found_bol = loop {
let Some(m) = re.find(p) else {
break None;
};
let match_start = p.as_ptr() as usize - data.as_ptr() as usize + m.start();
let mut bol = match_start;
while bol > 0 && data[bol - 1] != b'\n' {
bol -= 1;
}
let mut eol = match_start;
while eol < data.len() && data[eol] != b'\n' {
eol += 1;
}
if eol < data.len() {
eol += 1;
}
let line_slice = &data[bol..eol.min(data.len())];
if match_funcname_line(line_slice) {
break Some(bol);
}
p = &data[eol.min(data.len())..];
if p.is_empty() {
break None;
}
};
let bol = found_bol.ok_or_else(|| Error::CorruptObject("no match".to_owned()))?;
let mut begin = 0i64;
while begin < lines {
if line_byte_offset(begin, ends) == bol {
break;
}
begin += 1;
}
if begin >= lines {
return Err(Error::CorruptObject("funcname matches at EOF".to_owned()));
}
let mut end = begin + 1;
while end < lines {
let bol = nth_line(data, end, ends);
let eol_idx = nth_line_end(data, end, ends, lines);
let slice = &data[bol.as_ptr() as usize - data.as_ptr() as usize..eol_idx];
if match_funcname_line(slice) {
break;
}
end += 1;
}
let tail = s.get(after_colon..).unwrap_or("");
Ok((begin + 1, end, tail))
}
fn split_slash_regex_range(arg: &str) -> Result<(&str, Option<&str>)> {
let b = arg.as_bytes();
if b.first() != Some(&b'/') {
return Err(Error::CorruptObject("malformed -L argument".to_owned()));
}
let mut pos = 0usize;
let start_first = pos;
pos += 1;
while pos < b.len() {
if b[pos] == b'\\' {
pos = (pos + 2).min(b.len());
continue;
}
if b[pos] == b'/' {
pos += 1;
break;
}
pos += 1;
}
if pos > b.len() || pos == start_first + 1 {
return Err(Error::CorruptObject("malformed -L argument".to_owned()));
}
let first = &arg[start_first..pos];
if pos >= arg.len() {
return Ok((first, None));
}
if b.get(pos).copied() != Some(b',') {
return Err(Error::CorruptObject("malformed -L argument".to_owned()));
}
pos += 1;
if b.get(pos).copied() != Some(b'/') {
return Err(Error::CorruptObject("malformed -L argument".to_owned()));
}
let start_second = pos;
pos += 1;
while pos < b.len() {
if b[pos] == b'\\' {
pos = (pos + 2).min(b.len());
continue;
}
if b[pos] == b'/' {
pos += 1;
break;
}
pos += 1;
}
if pos != arg.len() {
return Err(Error::CorruptObject("malformed -L argument".to_owned()));
}
Ok((first, Some(&arg[start_second..pos])))
}
fn parse_range_arg(
arg: &str,
data: &[u8],
ends: &[usize],
lines: i64,
anchor: i64,
) -> Result<(i64, i64)> {
let (begin, end) = if arg.starts_with(':') || arg.starts_with("^:") {
let (b, e, rest) = parse_range_funcname(arg, data, ends, lines, anchor)?;
if !rest.is_empty() {
return Err(Error::CorruptObject("malformed -L argument".to_owned()));
}
(b, e)
} else if arg.starts_with('/') {
let (a, second) = split_slash_regex_range(arg)?;
let (b1, r1) = parse_loc(a, data, ends, lines, -anchor, 0)?;
if !r1.is_empty() {
return Err(Error::CorruptObject("malformed -L argument".to_owned()));
}
let (b2, r2) = if let Some(s2) = second {
parse_loc(s2, data, ends, lines, b1 + 1, b1 + 1)?
} else {
(b1, "")
};
if !r2.is_empty() {
return Err(Error::CorruptObject("malformed -L argument".to_owned()));
}
let mut b1 = b1;
let mut b2 = b2;
if b1 != 0 && b2 != 0 && b2 < b1 {
std::mem::swap(&mut b1, &mut b2);
}
(b1, b2)
} else if arg.contains(',') {
let comma = arg
.find(',')
.ok_or_else(|| Error::CorruptObject("malformed -L argument".to_owned()))?;
let (a, b) = arg.split_at(comma);
let b = &b[1..];
let (b1, r1) = if a.is_empty() {
(1i64, "")
} else {
parse_loc(a, data, ends, lines, -anchor, 0)?
};
if !r1.is_empty() {
return Err(Error::CorruptObject("malformed -L argument".to_owned()));
}
let (b2, r2) = if b.is_empty() {
(b1, "")
} else {
parse_loc(b, data, ends, lines, b1 + 1, b1 + 1)?
};
if !r2.is_empty() {
return Err(Error::CorruptObject("malformed -L argument".to_owned()));
}
let mut b1 = b1;
let mut b2 = b2;
if b1 != 0 && b2 != 0 && b2 < b1 {
std::mem::swap(&mut b1, &mut b2);
}
(b1, b2)
} else {
let (n, rest) = parse_loc(arg, data, ends, lines, -anchor, 0)?;
if !rest.is_empty() {
return Err(Error::CorruptObject("malformed -L argument".to_owned()));
}
(n, n)
};
if (lines == 0 && (begin != 0 || end != 0)) || (lines > 0 && lines < begin) {
return Err(Error::CorruptObject(format!("file has only {lines} lines")));
}
let mut begin = begin;
let mut end = end;
if begin < 1 {
begin = 1;
}
if end < 1 || lines < end {
end = lines;
}
begin -= 1;
Ok((begin, end))
}
fn scan_funcname_pattern_colon(s: &str) -> Option<usize> {
let b = s.as_bytes();
let mut i = 0usize;
while i < b.len() {
if b[i] == b':' {
return Some(i);
}
if b[i] == b'\\' && i + 1 < b.len() {
i += 2;
} else {
i += 1;
}
}
None
}
fn split_regex_line_range(spec: &str) -> Result<(&str, &str)> {
let b = spec.as_bytes();
if b.first() != Some(&b'/') {
return Err(Error::CorruptObject("malformed -L argument".to_owned()));
}
let mut pos = 0usize;
loop {
if pos >= b.len() || b[pos] != b'/' {
return Err(Error::CorruptObject("malformed -L argument".to_owned()));
}
pos += 1;
while pos < b.len() {
if b[pos] == b'\\' {
pos = (pos + 2).min(b.len());
continue;
}
if b[pos] == b'/' {
pos += 1;
break;
}
pos += 1;
}
if pos > b.len() {
return Err(Error::CorruptObject("malformed -L argument".to_owned()));
}
if pos < b.len() && b[pos] == b',' {
pos += 1;
continue;
}
break;
}
if pos >= b.len() || b[pos] != b':' {
return Err(Error::CorruptObject("malformed -L argument".to_owned()));
}
let path = &spec[pos + 1..];
if path.is_empty() {
return Err(Error::CorruptObject(
"-L argument not 'start,end:file'".to_owned(),
));
}
Ok((&spec[..pos], path))
}
fn split_l_arg(spec: &str) -> Result<(&str, &str)> {
if spec.is_empty() {
return Err(Error::CorruptObject(
"-L argument not 'start,end:file'".to_owned(),
));
}
if let Some(rest) = spec.strip_prefix("^:") {
let idx = scan_funcname_pattern_colon(rest)
.ok_or_else(|| Error::CorruptObject("-L argument not 'start,end:file'".to_owned()))?;
let path = &rest[idx + 1..];
if path.is_empty() {
return Err(Error::CorruptObject(
"-L argument not 'start,end:file'".to_owned(),
));
}
return Ok((&spec[..2 + idx + 1], path));
}
if let Some(rest) = spec.strip_prefix(':') {
let idx = scan_funcname_pattern_colon(rest)
.ok_or_else(|| Error::CorruptObject("-L argument not 'start,end:file'".to_owned()))?;
let path = &rest[idx + 1..];
if path.is_empty() {
return Err(Error::CorruptObject(
"-L argument not 'start,end:file'".to_owned(),
));
}
return Ok((&spec[..1 + idx + 1], path));
}
if spec.starts_with('/') {
return split_regex_line_range(spec);
}
let idx = spec
.rfind(':')
.ok_or_else(|| Error::CorruptObject("-L argument not 'start,end:file'".to_owned()))?;
if idx == 0 {
return Err(Error::CorruptObject(
"-L argument not 'start,end:file'".to_owned(),
));
}
let path = &spec[idx + 1..];
if path.is_empty() {
return Err(Error::CorruptObject(
"-L argument not 'start,end:file'".to_owned(),
));
}
Ok((&spec[..idx], path))
}
fn read_blob_at_path(odb: &Odb, tree_oid: &ObjectId, path: &str) -> Result<Vec<u8>> {
let mut current = *tree_oid;
let parts: Vec<&str> = path.split('/').filter(|p| !p.is_empty()).collect();
for (pi, part) in parts.iter().enumerate() {
let obj = odb.read(¤t)?;
if obj.kind != ObjectKind::Tree {
return Err(Error::CorruptObject("not a tree".to_owned()));
}
let entries = parse_tree(&obj.data)?;
let name = part.as_bytes();
let mut found: Option<&TreeEntry> = None;
for e in &entries {
if e.name == name {
found = Some(e);
break;
}
}
let entry = found
.ok_or_else(|| Error::CorruptObject(format!("There is no path {path} in commit")))?;
if pi + 1 == parts.len() {
if (entry.mode & 0o170000) != 0o100000 {
return Err(Error::CorruptObject(format!(
"There is no path {path} in commit"
)));
}
let blob = odb.read(&entry.oid)?;
if blob.kind != ObjectKind::Blob {
return Err(Error::CorruptObject("not a blob".to_owned()));
}
return Ok(blob.data);
}
if entry.mode != 0o040000 {
return Err(Error::CorruptObject(format!(
"There is no path {path} in commit"
)));
}
current = entry.oid;
}
Err(Error::CorruptObject(format!(
"There is no path {path} in commit"
)))
}
pub fn parse_line_log_ranges(
odb: &Odb,
tip_oid: &ObjectId,
specs: &[String],
) -> Result<Vec<LineLogFile>> {
let tip_obj = odb.read(tip_oid)?;
if tip_obj.kind != ObjectKind::Commit {
return Err(Error::CorruptObject("tip not a commit".to_owned()));
}
let tip_commit = parse_commit(&tip_obj.data)?;
let tree_oid = tip_commit.tree;
let mut files: Vec<LineLogFile> = Vec::new();
for spec in specs {
let (range_part, path) = split_l_arg(spec)?;
let blob_data = read_blob_at_path(odb, &tree_oid, path)?;
let (lines, ends) = fill_line_ends(&blob_data);
let idx = search_insert_path(&files, path);
let anchor = if idx < files.len() && files[idx].path == path {
files[idx].ranges.ranges.last().map_or(1, |x| x.end + 1)
} else if idx > 0 && files[idx - 1].path == path {
files[idx - 1].ranges.ranges.last().map_or(1, |x| x.end + 1)
} else {
1
};
let (begin, end) = parse_range_arg(range_part, &blob_data, &ends, lines, anchor)?;
line_log_insert(&mut files, path.to_owned(), begin, end);
}
for f in &mut files {
f.ranges.sort_and_merge();
}
Ok(files)
}
fn copy_line_state(files: &[LineLogFile]) -> Vec<LineLogFile> {
files
.iter()
.map(|f| LineLogFile {
path: f.path.clone(),
ranges: RangeSet {
ranges: f.ranges.ranges.clone(),
},
})
.collect()
}
fn filter_entries_for_paths(entries: Vec<DiffEntry>, paths: &[String]) -> Vec<DiffEntry> {
let set: HashSet<&str> = paths.iter().map(|p| p.as_str()).collect();
entries
.into_iter()
.filter(|e| {
e.new_path
.as_deref()
.map(|p| set.contains(p))
.unwrap_or(false)
})
.collect()
}
fn diff_tree_pair(
odb: &Odb,
old_tree: Option<&ObjectId>,
new_tree: &ObjectId,
paths: &[String],
rename_threshold: u32,
) -> Result<Vec<DiffEntry>> {
let old = old_tree.copied();
let mut entries = diff_trees(odb, old.as_ref(), Some(new_tree), "")?;
if rename_threshold < 100 {
entries = detect_renames(odb, entries, rename_threshold);
}
Ok(filter_entries_for_paths(entries, paths))
}
struct FileDiffArtifacts {
old_path: String,
new_path: String,
old_text: String,
new_text: String,
diff: DiffRanges,
}
#[derive(Clone, Debug)]
pub struct LineLogDisplay {
pub old_path: String,
pub new_path: String,
pub old_bytes: Vec<u8>,
pub new_bytes: Vec<u8>,
pub commit_ranges: RangeSet,
pub touched: DiffRanges,
}
fn load_pair_content(
odb: &Odb,
entry: &DiffEntry,
range_files: &[LineLogFile],
) -> Result<Option<FileDiffArtifacts>> {
let new_path = entry
.new_path
.as_deref()
.ok_or_else(|| Error::CorruptObject("diff entry missing path".to_owned()))?;
let rg = range_files.iter().find(|f| f.path == new_path);
let Some(rg) = rg else {
return Ok(None);
};
if rg.ranges.is_empty() {
return Ok(None);
}
let z = zero_oid();
let new_bytes = if entry.new_oid == z {
Vec::new()
} else {
odb.read(&entry.new_oid)?.data
};
let old_bytes = if entry.old_oid == z {
Vec::new()
} else {
odb.read(&entry.old_oid)?.data
};
let old_text = String::from_utf8_lossy(&old_bytes).into_owned();
let new_text = String::from_utf8_lossy(&new_bytes).into_owned();
let diff = collect_diff_ranges(&old_text, &new_text);
let old_path = entry
.old_path
.clone()
.unwrap_or_else(|| "/dev/null".to_owned());
Ok(Some(FileDiffArtifacts {
old_path,
new_path: new_path.to_owned(),
old_text,
new_text,
diff,
}))
}
fn process_diff_filepair(
artifacts: &FileDiffArtifacts,
range_files: &mut [LineLogFile],
) -> Result<Option<DiffRanges>> {
let idx = range_files
.iter()
.position(|f| f.path == artifacts.new_path)
.ok_or_else(|| Error::CorruptObject("range file missing".to_owned()))?;
if range_files[idx].ranges.is_empty() {
return Ok(None);
}
let mut out_ranges = RangeSet::default();
let mut touched = DiffRanges::default();
range_set_map_across_diff(
&mut out_ranges,
&range_files[idx].ranges,
&artifacts.diff,
&mut touched,
);
range_files[idx].path = artifacts.old_path.clone();
range_files[idx].ranges = out_ranges;
if touched.hunks.is_empty() {
Ok(None)
} else {
Ok(Some(touched))
}
}
fn process_all_files(
odb: &Odb,
queue: &[DiffEntry],
range: &[LineLogFile],
) -> Result<(Vec<LineLogFile>, Vec<LineLogDisplay>, bool)> {
let mut out = copy_line_state(range);
let mut displays: Vec<LineLogDisplay> = Vec::new();
let mut changed = false;
for entry in queue {
let Some(art) = load_pair_content(odb, entry, range)? else {
continue;
};
let commit_ranges = range
.iter()
.find(|f| f.path == art.new_path)
.map(|f| RangeSet {
ranges: f.ranges.ranges.clone(),
})
.unwrap_or_default();
if let Some(touched) = process_diff_filepair(&art, &mut out)? {
if touched
.hunks
.iter()
.any(|h| (h.parent.start < h.parent.end) || (h.target.start < h.target.end))
{
changed = true;
}
displays.push(LineLogDisplay {
old_path: art.old_path.clone(),
new_path: art.new_path.clone(),
old_bytes: art.old_text.as_bytes().to_vec(),
new_bytes: art.new_text.as_bytes().to_vec(),
commit_ranges,
touched,
});
}
}
Ok((out, displays, changed))
}
fn process_ranges_ordinary_commit(
odb: &Odb,
parents: &[ObjectId],
tree_oid: &ObjectId,
range: &[LineLogFile],
paths: &[String],
rename_threshold: u32,
state: &mut LineLogState,
display: &mut HashMap<ObjectId, Vec<LineLogDisplay>>,
commit_oid: ObjectId,
) -> Result<bool> {
let parent = parents.first();
let parent_tree = parent
.map(|p| parse_commit(&odb.read(p)?.data).map(|c| c.tree))
.transpose()?;
let queue = diff_tree_pair(odb, parent_tree.as_ref(), tree_oid, paths, rename_threshold)?;
let (parent_range, disp, changed) = process_all_files(odb, &queue, range)?;
if !disp.is_empty() {
display.insert(commit_oid, disp);
}
if let Some(p) = parent {
state.insert(*p, parent_range);
}
Ok(changed)
}
fn process_ranges_merge_commit(
odb: &Odb,
parents: &[ObjectId],
tree_oid: &ObjectId,
range: &[LineLogFile],
paths: &[String],
rename_threshold: u32,
first_parent_only: bool,
state: &mut LineLogState,
display: &mut HashMap<ObjectId, Vec<LineLogDisplay>>,
commit_oid: ObjectId,
) -> Result<bool> {
let nparents = if first_parent_only { 1 } else { parents.len() };
let mut candidates: Vec<Vec<LineLogFile>> = Vec::with_capacity(nparents);
for i in 0..nparents {
let p = parents[i];
let ptree = parse_commit(&odb.read(&p)?.data)?.tree;
let queue = diff_tree_pair(odb, Some(&ptree), tree_oid, paths, rename_threshold)?;
let (prange, _, changed_here) = process_all_files(odb, &queue, range)?;
if !changed_here {
state.insert(p, prange);
state.remove(&commit_oid);
return Ok(false);
}
candidates.push(prange);
}
for (i, p) in parents.iter().enumerate().take(nparents) {
state.insert(*p, candidates[i].clone());
}
state.remove(&commit_oid);
let p0 = parents[0];
let ptree = parse_commit(&odb.read(&p0)?.data)?.tree;
let queue = diff_tree_pair(odb, Some(&ptree), tree_oid, paths, rename_threshold)?;
let (_, disp, _) = process_all_files(odb, &queue, range)?;
if !disp.is_empty() {
display.insert(commit_oid, disp);
}
Ok(true)
}
pub fn line_log_filter_commits(
odb: &Odb,
commits: Vec<ObjectId>,
tip: ObjectId,
initial: Vec<LineLogFile>,
paths: Vec<String>,
rename_threshold: u32,
first_parent_only: bool,
) -> Result<(
Vec<ObjectId>,
LineLogState,
HashMap<ObjectId, Vec<LineLogDisplay>>,
)> {
let mut state: LineLogState = HashMap::new();
state.insert(tip, initial);
let mut display_map: HashMap<ObjectId, Vec<LineLogDisplay>> = HashMap::new();
let mut keep_commit: HashMap<ObjectId, bool> = HashMap::new();
for &oid in &commits {
let Some(range) = state.get(&oid).cloned() else {
continue;
};
if range.iter().all(|f| f.ranges.is_empty()) {
continue;
}
let c = parse_commit(&odb.read(&oid)?.data)?;
let parents = c.parents.clone();
let show = if parents.len() > 1 {
process_ranges_merge_commit(
odb,
&parents,
&c.tree,
&range,
&paths,
rename_threshold,
first_parent_only,
&mut state,
&mut display_map,
oid,
)?
} else {
process_ranges_ordinary_commit(
odb,
&parents,
&c.tree,
&range,
&paths,
rename_threshold,
&mut state,
&mut display_map,
oid,
)?
};
keep_commit.insert(oid, show);
}
let filtered: Vec<ObjectId> = commits
.iter()
.copied()
.filter(|o| *keep_commit.get(o).unwrap_or(&false))
.collect();
Ok((filtered, state, display_map))
}
pub fn rewritten_first_parent(
odb: &Odb,
oid: &ObjectId,
filtered: &HashSet<ObjectId>,
) -> Result<Option<ObjectId>> {
let c = parse_commit(&odb.read(oid)?.data)?;
let Some(mut p) = c.parents.first().copied() else {
return Ok(None);
};
while !filtered.contains(&p) {
let pc = parse_commit(&odb.read(&p)?.data)?;
let Some(np) = pc.parents.first().copied() else {
return Ok(Some(p));
};
p = np;
}
Ok(Some(p))
}
pub fn format_line_log_diff(
prefix: &str,
old_path: &str,
new_path: &str,
old_data: &[u8],
new_data: &[u8],
ranges: &RangeSet,
touched: &DiffRanges,
) -> String {
let (p_lines, p_ends) = fill_line_ends(old_data);
let (t_lines, t_ends) = fill_line_ends(new_data);
let _ = (p_lines, t_lines);
let mut out = String::new();
if touched.hunks.is_empty() {
return out;
}
out.push_str(prefix);
out.push('\n');
out.push_str(prefix);
out.push_str("diff --git a/");
out.push_str(new_path);
out.push_str(" b/");
out.push_str(new_path);
out.push('\n');
out.push_str(prefix);
if old_path == "/dev/null" {
out.push_str("--- /dev/null\n");
} else {
out.push_str("--- a/");
out.push_str(old_path);
out.push('\n');
}
out.push_str(prefix);
out.push_str("+++ b/");
out.push_str(new_path);
out.push('\n');
let mut j = 0usize;
for i in 0..ranges.ranges.len() {
let t_start = ranges.ranges[i].start;
let t_end = ranges.ranges[i].end;
let mut t_cur = t_start;
if j > 0 && touched.hunks[j - 1].target.end > t_start {
j -= 1;
}
while j < touched.hunks.len() && touched.hunks[j].target.end < t_start {
j += 1;
}
if j >= touched.hunks.len() || touched.hunks[j].target.start >= t_end {
continue;
}
let mut j_last = j;
while j_last < touched.hunks.len() && touched.hunks[j_last].target.start < t_end {
j_last += 1;
}
if j_last > j {
j_last -= 1;
}
let p_start = if t_start < touched.hunks[j].target.start {
touched.hunks[j].parent.start - (touched.hunks[j].target.start - t_start)
} else {
touched.hunks[j].parent.start
};
let p_end = if t_end > touched.hunks[j_last].target.end {
touched.hunks[j_last].parent.end + (t_end - touched.hunks[j_last].target.end)
} else {
touched.hunks[j_last].parent.end
};
let (p_start, p_end) = if p_start == 0 && p_end == 0 {
(-1, -1)
} else {
(p_start, p_end)
};
out.push_str(prefix);
out.push_str(&format!(
"@@ -{},{} +{},{} @@\n",
p_start + 1,
p_end - p_start,
t_start + 1,
t_end - t_start
));
while j < touched.hunks.len() && touched.hunks[j].target.start < t_end {
while t_cur < touched.hunks[j].target.start {
print_line(&mut out, prefix, ' ', t_cur, &t_ends, new_data);
t_cur += 1;
}
let ps = touched.hunks[j].parent.start;
let pe = touched.hunks[j].parent.end;
for k in ps..pe {
print_line(&mut out, prefix, '-', k, &p_ends, old_data);
}
while t_cur < touched.hunks[j].target.end && t_cur < t_end {
print_line(&mut out, prefix, '+', t_cur, &t_ends, new_data);
t_cur += 1;
}
j += 1;
}
while t_cur < t_end {
print_line(&mut out, prefix, ' ', t_cur, &t_ends, new_data);
t_cur += 1;
}
}
out
}
fn print_line(out: &mut String, prefix: &str, first: char, line: i64, ends: &[usize], data: &[u8]) {
let begin = if line == 0 {
0
} else {
ends[line as usize] + 1
};
let end = if (line + 1) as usize >= ends.len() {
data.len()
} else {
ends[(line + 1) as usize] + 1
};
let mut end2 = end.min(data.len());
let had_nl = if end2 > begin && data[end2 - 1] == b'\n' {
end2 -= 1;
true
} else {
false
};
let slice = &data[begin..end2];
out.push_str(prefix);
out.push(first);
out.push_str(&String::from_utf8_lossy(slice));
out.push('\n');
if !had_nl {
out.push_str(prefix);
out.push_str("\\ No newline at end of file\n");
}
}