#![allow(missing_docs)]
use std::io;
use std::io::Read;
use std::io::Write;
use std::iter::zip;
use bstr::BString;
use bstr::ByteSlice;
use futures::stream::BoxStream;
use futures::try_join;
use futures::Stream;
use futures::StreamExt;
use futures::TryStreamExt;
use itertools::Itertools;
use pollster::FutureExt;
use regex::bytes::Regex;
use regex::bytes::RegexBuilder;
use crate::backend::BackendError;
use crate::backend::BackendResult;
use crate::backend::CommitId;
use crate::backend::FileId;
use crate::backend::SymlinkId;
use crate::backend::TreeId;
use crate::backend::TreeValue;
use crate::copies::CopiesTreeDiffEntry;
use crate::copies::CopiesTreeDiffEntryPath;
use crate::diff::Diff;
use crate::diff::DiffHunk;
use crate::diff::DiffHunkKind;
use crate::files;
use crate::files::MergeResult;
use crate::merge::Merge;
use crate::merge::MergeBuilder;
use crate::merge::MergedTreeValue;
use crate::repo_path::RepoPath;
use crate::store::Store;
const CONFLICT_START_LINE: &str = "<<<<<<<";
const CONFLICT_END_LINE: &str = ">>>>>>>";
const CONFLICT_DIFF_LINE: &str = "%%%%%%%";
const CONFLICT_MINUS_LINE: &str = "-------";
const CONFLICT_PLUS_LINE: &str = "+++++++";
const CONFLICT_GIT_ANCESTOR_LINE: &str = "|||||||";
const CONFLICT_GIT_SEPARATOR_LINE: &str = "=======";
const CONFLICT_START_LINE_CHAR: u8 = CONFLICT_START_LINE.as_bytes()[0];
const CONFLICT_END_LINE_CHAR: u8 = CONFLICT_END_LINE.as_bytes()[0];
const CONFLICT_DIFF_LINE_CHAR: u8 = CONFLICT_DIFF_LINE.as_bytes()[0];
const CONFLICT_MINUS_LINE_CHAR: u8 = CONFLICT_MINUS_LINE.as_bytes()[0];
const CONFLICT_PLUS_LINE_CHAR: u8 = CONFLICT_PLUS_LINE.as_bytes()[0];
const CONFLICT_GIT_ANCESTOR_LINE_CHAR: u8 = CONFLICT_GIT_ANCESTOR_LINE.as_bytes()[0];
const CONFLICT_GIT_SEPARATOR_LINE_CHAR: u8 = CONFLICT_GIT_SEPARATOR_LINE.as_bytes()[0];
static CONFLICT_MARKER_REGEX: once_cell::sync::Lazy<Regex> = once_cell::sync::Lazy::new(|| {
RegexBuilder::new(r"^(<{7}|>{7}|%{7}|\-{7}|\+{7}|\|{7}|={7})( .*)?$")
.multi_line(true)
.build()
.unwrap()
});
fn write_diff_hunks(hunks: &[DiffHunk], file: &mut dyn Write) -> io::Result<()> {
for hunk in hunks {
match hunk.kind {
DiffHunkKind::Matching => {
debug_assert!(hunk.contents.iter().all_equal());
for line in hunk.contents[0].split_inclusive(|b| *b == b'\n') {
file.write_all(b" ")?;
file.write_all(line)?;
}
}
DiffHunkKind::Different => {
for line in hunk.contents[0].split_inclusive(|b| *b == b'\n') {
file.write_all(b"-")?;
file.write_all(line)?;
}
for line in hunk.contents[1].split_inclusive(|b| *b == b'\n') {
file.write_all(b"+")?;
file.write_all(line)?;
}
}
}
}
Ok(())
}
async fn get_file_contents(
store: &Store,
path: &RepoPath,
term: &Option<FileId>,
) -> BackendResult<BString> {
match term {
Some(id) => {
let mut content = vec![];
store
.read_file_async(path, id)
.await?
.read_to_end(&mut content)
.map_err(|err| BackendError::ReadFile {
path: path.to_owned(),
id: id.clone(),
source: err.into(),
})?;
Ok(BString::new(content))
}
None => Ok(BString::new(vec![])),
}
}
pub async fn extract_as_single_hunk(
merge: &Merge<Option<FileId>>,
store: &Store,
path: &RepoPath,
) -> BackendResult<Merge<BString>> {
let builder: MergeBuilder<BString> = futures::stream::iter(merge.iter())
.then(|term| get_file_contents(store, path, term))
.try_collect()
.await?;
Ok(builder.build())
}
pub enum MaterializedTreeValue {
Absent,
AccessDenied(Box<dyn std::error::Error + Send + Sync>),
File {
id: FileId,
executable: bool,
reader: Box<dyn Read>,
},
Symlink {
id: SymlinkId,
target: String,
},
FileConflict {
id: Merge<Option<FileId>>,
contents: Merge<BString>,
executable: bool,
},
OtherConflict {
id: MergedTreeValue,
},
GitSubmodule(CommitId),
Tree(TreeId),
}
impl MaterializedTreeValue {
pub fn is_absent(&self) -> bool {
matches!(self, MaterializedTreeValue::Absent)
}
pub fn is_present(&self) -> bool {
!self.is_absent()
}
}
pub async fn materialize_tree_value(
store: &Store,
path: &RepoPath,
value: MergedTreeValue,
) -> BackendResult<MaterializedTreeValue> {
match materialize_tree_value_no_access_denied(store, path, value).await {
Err(BackendError::ReadAccessDenied { source, .. }) => {
Ok(MaterializedTreeValue::AccessDenied(source))
}
result => result,
}
}
async fn materialize_tree_value_no_access_denied(
store: &Store,
path: &RepoPath,
value: MergedTreeValue,
) -> BackendResult<MaterializedTreeValue> {
match value.into_resolved() {
Ok(None) => Ok(MaterializedTreeValue::Absent),
Ok(Some(TreeValue::File { id, executable })) => {
let reader = store.read_file_async(path, &id).await?;
Ok(MaterializedTreeValue::File {
id,
executable,
reader,
})
}
Ok(Some(TreeValue::Symlink(id))) => {
let target = store.read_symlink_async(path, &id).await?;
Ok(MaterializedTreeValue::Symlink { id, target })
}
Ok(Some(TreeValue::GitSubmodule(id))) => Ok(MaterializedTreeValue::GitSubmodule(id)),
Ok(Some(TreeValue::Tree(id))) => Ok(MaterializedTreeValue::Tree(id)),
Ok(Some(TreeValue::Conflict(_))) => {
panic!("cannot materialize legacy conflict object at path {path:?}");
}
Err(conflict) => {
let Some(file_merge) = conflict.to_file_merge() else {
return Ok(MaterializedTreeValue::OtherConflict { id: conflict });
};
let file_merge = file_merge.simplify();
let contents = extract_as_single_hunk(&file_merge, store, path).await?;
let executable = if let Some(merge) = conflict.to_executable_merge() {
merge.resolve_trivial().copied().unwrap_or_default()
} else {
false
};
Ok(MaterializedTreeValue::FileConflict {
id: file_merge,
contents,
executable,
})
}
}
}
#[derive(Clone, Copy, PartialEq, Eq, Debug, Default, serde::Deserialize)]
#[serde(rename_all = "kebab-case")]
pub enum ConflictMarkerStyle {
#[default]
Diff,
Snapshot,
Git,
}
pub fn materialize_merge_result<T: AsRef<[u8]>>(
single_hunk: &Merge<T>,
conflict_marker_style: ConflictMarkerStyle,
output: &mut dyn Write,
) -> io::Result<()> {
let merge_result = files::merge(single_hunk);
match &merge_result {
MergeResult::Resolved(content) => output.write_all(content),
MergeResult::Conflict(hunks) => {
materialize_conflict_hunks(hunks, conflict_marker_style, output)
}
}
}
pub fn materialize_merge_result_to_bytes<T: AsRef<[u8]>>(
single_hunk: &Merge<T>,
conflict_marker_style: ConflictMarkerStyle,
) -> BString {
let merge_result = files::merge(single_hunk);
match merge_result {
MergeResult::Resolved(content) => content,
MergeResult::Conflict(hunks) => {
let mut output = Vec::new();
materialize_conflict_hunks(&hunks, conflict_marker_style, &mut output)
.expect("writing to an in-memory buffer should never fail");
output.into()
}
}
}
fn materialize_conflict_hunks(
hunks: &[Merge<BString>],
conflict_marker_style: ConflictMarkerStyle,
output: &mut dyn Write,
) -> io::Result<()> {
let num_conflicts = hunks
.iter()
.filter(|hunk| hunk.as_resolved().is_none())
.count();
let mut conflict_index = 0;
for hunk in hunks {
if let Some(content) = hunk.as_resolved() {
output.write_all(content)?;
} else {
conflict_index += 1;
let conflict_info = format!("Conflict {conflict_index} of {num_conflicts}");
match (conflict_marker_style, hunk.as_slice()) {
(ConflictMarkerStyle::Git, [left, base, right]) => {
materialize_git_style_conflict(left, base, right, &conflict_info, output)?;
}
_ => {
materialize_jj_style_conflict(
hunk,
&conflict_info,
conflict_marker_style,
output,
)?;
}
}
}
}
Ok(())
}
fn materialize_git_style_conflict(
left: &[u8],
base: &[u8],
right: &[u8],
conflict_info: &str,
output: &mut dyn Write,
) -> io::Result<()> {
writeln!(output, "{CONFLICT_START_LINE} Side #1 ({conflict_info})")?;
output.write_all(left)?;
writeln!(output, "{CONFLICT_GIT_ANCESTOR_LINE} Base")?;
output.write_all(base)?;
writeln!(output, "{CONFLICT_GIT_SEPARATOR_LINE}")?;
output.write_all(right)?;
writeln!(output, "{CONFLICT_END_LINE} Side #2 ({conflict_info} ends)")?;
Ok(())
}
fn materialize_jj_style_conflict(
hunk: &Merge<BString>,
conflict_info: &str,
conflict_marker_style: ConflictMarkerStyle,
output: &mut dyn Write,
) -> io::Result<()> {
fn write_side(add_index: usize, data: &[u8], output: &mut dyn Write) -> io::Result<()> {
writeln!(
output,
"{CONFLICT_PLUS_LINE} Contents of side #{}",
add_index + 1
)?;
output.write_all(data)
}
fn write_base(base_str: &str, data: &[u8], output: &mut dyn Write) -> io::Result<()> {
writeln!(output, "{CONFLICT_MINUS_LINE} Contents of {base_str}")?;
output.write_all(data)
}
fn write_diff(
base_str: &str,
add_index: usize,
diff: &[DiffHunk],
output: &mut dyn Write,
) -> io::Result<()> {
writeln!(
output,
"{CONFLICT_DIFF_LINE} Changes from {base_str} to side #{}",
add_index + 1
)?;
write_diff_hunks(diff, output)
}
writeln!(output, "{CONFLICT_START_LINE} {conflict_info}")?;
let mut add_index = 0;
for (base_index, left) in hunk.removes().enumerate() {
let base_str = if hunk.removes().len() == 1 {
"base".to_string()
} else {
format!("base #{}", base_index + 1)
};
let Some(right1) = hunk.get_add(add_index) else {
write_base(&base_str, left, output)?;
continue;
};
if conflict_marker_style != ConflictMarkerStyle::Diff {
write_side(add_index, right1, output)?;
write_base(&base_str, left, output)?;
add_index += 1;
continue;
}
let diff1 = Diff::by_line([&left, &right1]).hunks().collect_vec();
if let Some(right2) = hunk.get_add(add_index + 1) {
let diff2 = Diff::by_line([&left, &right2]).hunks().collect_vec();
if diff_size(&diff2) < diff_size(&diff1) {
write_side(add_index, right1, output)?;
write_diff(&base_str, add_index + 1, &diff2, output)?;
add_index += 2;
continue;
}
}
write_diff(&base_str, add_index, &diff1, output)?;
add_index += 1;
}
for (add_index, slice) in hunk.adds().enumerate().skip(add_index) {
write_side(add_index, slice, output)?;
}
writeln!(output, "{CONFLICT_END_LINE} {conflict_info} ends")?;
Ok(())
}
fn diff_size(hunks: &[DiffHunk]) -> usize {
hunks
.iter()
.map(|hunk| match hunk.kind {
DiffHunkKind::Matching => 0,
DiffHunkKind::Different => hunk.contents.iter().map(|content| content.len()).sum(),
})
.sum()
}
pub struct MaterializedTreeDiffEntry {
pub path: CopiesTreeDiffEntryPath,
pub values: BackendResult<(MaterializedTreeValue, MaterializedTreeValue)>,
}
pub fn materialized_diff_stream<'a>(
store: &'a Store,
tree_diff: BoxStream<'a, CopiesTreeDiffEntry>,
) -> impl Stream<Item = MaterializedTreeDiffEntry> + 'a {
tree_diff
.map(|CopiesTreeDiffEntry { path, values }| async {
match values {
Err(err) => MaterializedTreeDiffEntry {
path,
values: Err(err),
},
Ok((before, after)) => {
let before_future = materialize_tree_value(store, path.source(), before);
let after_future = materialize_tree_value(store, path.target(), after);
let values = try_join!(before_future, after_future);
MaterializedTreeDiffEntry { path, values }
}
}
})
.buffered((store.concurrency() / 2).max(1))
}
pub fn parse_conflict(input: &[u8], num_sides: usize) -> Option<Vec<Merge<BString>>> {
if input.is_empty() {
return None;
}
let mut hunks = vec![];
let mut pos = 0;
let mut resolved_start = 0;
let mut conflict_start = None;
let mut conflict_start_len = 0;
for line in input.split_inclusive(|b| *b == b'\n') {
if is_conflict_marker_line(line) {
if line[0] == CONFLICT_START_LINE_CHAR {
conflict_start = Some(pos);
conflict_start_len = line.len();
} else if conflict_start.is_some() && line[0] == CONFLICT_END_LINE_CHAR {
let conflict_body = &input[conflict_start.unwrap() + conflict_start_len..pos];
let hunk = parse_conflict_hunk(conflict_body);
if hunk.num_sides() == num_sides {
let resolved_slice = &input[resolved_start..conflict_start.unwrap()];
if !resolved_slice.is_empty() {
hunks.push(Merge::resolved(BString::from(resolved_slice)));
}
hunks.push(hunk);
resolved_start = pos + line.len();
}
conflict_start = None;
}
}
pos += line.len();
}
if hunks.is_empty() {
None
} else {
if resolved_start < input.len() {
hunks.push(Merge::resolved(BString::from(&input[resolved_start..])));
}
Some(hunks)
}
}
fn parse_conflict_hunk(input: &[u8]) -> Merge<BString> {
let initial_conflict_marker_char = input
.lines_with_terminator()
.next()
.filter(|line| is_conflict_marker_line(line))
.map(|line| line[0]);
match initial_conflict_marker_char {
Some(CONFLICT_DIFF_LINE_CHAR | CONFLICT_MINUS_LINE_CHAR | CONFLICT_PLUS_LINE_CHAR) => {
parse_jj_style_conflict_hunk(input)
}
None | Some(CONFLICT_GIT_ANCESTOR_LINE_CHAR) => parse_git_style_conflict_hunk(input),
Some(_) => Merge::resolved(BString::new(vec![])),
}
}
fn parse_jj_style_conflict_hunk(input: &[u8]) -> Merge<BString> {
enum State {
Diff,
Minus,
Plus,
Unknown,
}
let mut state = State::Unknown;
let mut removes = vec![];
let mut adds = vec![];
for line in input.lines_with_terminator() {
if is_conflict_marker_line(line) {
match line[0] {
CONFLICT_DIFF_LINE_CHAR => {
state = State::Diff;
removes.push(BString::new(vec![]));
adds.push(BString::new(vec![]));
continue;
}
CONFLICT_MINUS_LINE_CHAR => {
state = State::Minus;
removes.push(BString::new(vec![]));
continue;
}
CONFLICT_PLUS_LINE_CHAR => {
state = State::Plus;
adds.push(BString::new(vec![]));
continue;
}
_ => {}
}
}
match state {
State::Diff => {
if let Some(rest) = line.strip_prefix(b"-") {
removes.last_mut().unwrap().extend_from_slice(rest);
} else if let Some(rest) = line.strip_prefix(b"+") {
adds.last_mut().unwrap().extend_from_slice(rest);
} else if let Some(rest) = line.strip_prefix(b" ") {
removes.last_mut().unwrap().extend_from_slice(rest);
adds.last_mut().unwrap().extend_from_slice(rest);
} else if line == b"\n" || line == b"\r\n" {
removes.last_mut().unwrap().extend_from_slice(line);
adds.last_mut().unwrap().extend_from_slice(line);
} else {
return Merge::resolved(BString::new(vec![]));
}
}
State::Minus => {
removes.last_mut().unwrap().extend_from_slice(line);
}
State::Plus => {
adds.last_mut().unwrap().extend_from_slice(line);
}
State::Unknown => {
return Merge::resolved(BString::new(vec![]));
}
}
}
if adds.len() == removes.len() + 1 {
Merge::from_removes_adds(removes, adds)
} else {
Merge::resolved(BString::new(vec![]))
}
}
fn parse_git_style_conflict_hunk(input: &[u8]) -> Merge<BString> {
#[derive(PartialEq, Eq)]
enum State {
Left,
Base,
Right,
}
let mut state = State::Left;
let mut left = BString::new(vec![]);
let mut base = BString::new(vec![]);
let mut right = BString::new(vec![]);
for line in input.lines_with_terminator() {
if is_conflict_marker_line(line) {
match line[0] {
CONFLICT_GIT_ANCESTOR_LINE_CHAR => {
if state == State::Left {
state = State::Base;
continue;
} else {
return Merge::resolved(BString::new(vec![]));
}
}
CONFLICT_GIT_SEPARATOR_LINE_CHAR => {
if state == State::Base {
state = State::Right;
continue;
} else {
return Merge::resolved(BString::new(vec![]));
}
}
_ => {}
}
}
match state {
State::Left => left.extend_from_slice(line),
State::Base => base.extend_from_slice(line),
State::Right => right.extend_from_slice(line),
}
}
if state == State::Right {
Merge::from_vec(vec![left, base, right])
} else {
Merge::resolved(BString::new(vec![]))
}
}
fn is_conflict_marker_line(line: &[u8]) -> bool {
let line = line.trim_end_with(|ch| ch.is_ascii_whitespace());
CONFLICT_MARKER_REGEX.is_match_at(line, 0)
}
pub async fn update_from_content(
file_ids: &Merge<Option<FileId>>,
store: &Store,
path: &RepoPath,
content: &[u8],
conflict_marker_style: ConflictMarkerStyle,
) -> BackendResult<Merge<Option<FileId>>> {
let simplified_file_ids = file_ids.clone().simplify();
let simplified_file_ids = &simplified_file_ids;
let mut old_content = Vec::with_capacity(content.len());
let merge_hunk = extract_as_single_hunk(simplified_file_ids, store, path).await?;
materialize_merge_result(&merge_hunk, conflict_marker_style, &mut old_content).unwrap();
if content == old_content {
return Ok(file_ids.clone());
}
let (used_file_ids, hunks) = 'hunks: {
if let Some(hunks) = parse_conflict(content, simplified_file_ids.num_sides()) {
break 'hunks (simplified_file_ids, hunks);
};
if simplified_file_ids.num_sides() != file_ids.num_sides() {
if let Some(hunks) = parse_conflict(content, file_ids.num_sides()) {
break 'hunks (file_ids, hunks);
};
};
let file_id = store.write_file(path, &mut &content[..]).await?;
return Ok(Merge::normal(file_id));
};
let mut contents = used_file_ids.map(|_| vec![]);
for hunk in hunks {
if let Some(slice) = hunk.as_resolved() {
for content in contents.iter_mut() {
content.extend_from_slice(slice);
}
} else {
for (content, slice) in zip(contents.iter_mut(), hunk.into_iter()) {
content.extend(Vec::from(slice));
}
}
}
if zip(contents.iter(), used_file_ids.iter())
.any(|(content, file_id)| file_id.is_none() && !content.is_empty())
{
let file_id = store.write_file(path, &mut &content[..]).await?;
return Ok(Merge::normal(file_id));
}
let new_file_ids: Vec<Option<FileId>> = zip(contents.iter(), used_file_ids.iter())
.map(|(content, file_id)| -> BackendResult<Option<FileId>> {
match file_id {
Some(_) => {
let file_id = store.write_file(path, &mut content.as_slice()).block_on()?;
Ok(Some(file_id))
}
None => {
Ok(None)
}
}
})
.try_collect()?;
let new_file_ids = if new_file_ids.len() != file_ids.iter().len() {
file_ids
.clone()
.update_from_simplified(Merge::from_vec(new_file_ids))
} else {
Merge::from_vec(new_file_ids)
};
Ok(new_file_ids)
}