use std::path::Path;
use merge::{ConflictMarkers, MergeOutcome, text_hunk_merge_with_markers};
use crate::parser::{Language, ParsedFile};
mod items;
mod language_rules;
mod reconstruct;
#[cfg(test)]
mod tests;
use items::segment_file;
use reconstruct::reconstruct_merged_file;
pub fn semantic_three_way_merge(
base: &[u8],
ours: &[u8],
theirs: &[u8],
path: &Path,
markers: ConflictMarkers<'_>,
) -> MergeOutcome {
if base == ours && base == theirs {
return MergeOutcome::Clean(base.to_vec());
}
if base == ours {
return MergeOutcome::Clean(theirs.to_vec());
}
if base == theirs {
return MergeOutcome::Clean(ours.to_vec());
}
if ours == theirs {
return MergeOutcome::Clean(ours.to_vec());
}
let language = Language::from_path(path);
if matches!(language, Language::Unknown) {
return text_hunk_merge_with_markers(base, ours, theirs, markers);
}
let (Ok(base_text), Ok(ours_text), Ok(theirs_text)) = (
std::str::from_utf8(base),
std::str::from_utf8(ours),
std::str::from_utf8(theirs),
) else {
return text_hunk_merge_with_markers(base, ours, theirs, markers);
};
let (Some(base_parsed), Some(ours_parsed), Some(theirs_parsed)) = (
ParsedFile::parse(base_text, language),
ParsedFile::parse(ours_text, language),
ParsedFile::parse(theirs_text, language),
) else {
return text_hunk_merge_with_markers(base, ours, theirs, markers);
};
let mut base_segments = segment_file(&base_parsed);
let mut ours_segments = segment_file(&ours_parsed);
let mut theirs_segments = segment_file(&theirs_parsed);
items::canonicalize_use_keys(&mut base_segments, &mut ours_segments, &mut theirs_segments);
let counts = [
base_segments.items.len(),
ours_segments.items.len(),
theirs_segments.items.len(),
];
if counts.contains(&0) && counts.iter().any(|&c| c > 0) {
let addadd_in_empty_base = base_segments.items.is_empty() && {
let ours_keys: std::collections::BTreeSet<_> =
ours_segments.items.iter().map(|i| &i.key).collect();
theirs_segments
.items
.iter()
.any(|i| ours_keys.contains(&i.key))
};
if !addadd_in_empty_base {
return text_hunk_merge_with_markers(base, ours, theirs, markers);
}
}
let outcome = reconstruct_merged_file(
base_text,
ours_text,
theirs_text,
&base_segments,
&ours_segments,
&theirs_segments,
markers,
);
match &outcome {
MergeOutcome::Clean(output) => {
if !conserves_inputs(output, language, &base_parsed, &ours_parsed, &theirs_parsed) {
return text_hunk_merge_with_markers(base, ours, theirs, markers);
}
}
MergeOutcome::Conflicts {
merged_bytes_with_markers,
..
} => {
if !conflict_well_formed(merged_bytes_with_markers, language) {
return text_hunk_merge_with_markers(base, ours, theirs, markers);
}
}
MergeOutcome::Binary | MergeOutcome::DeleteVsModify => {}
}
outcome
}
fn conflict_well_formed(output: &[u8], language: Language) -> bool {
let Ok(text) = std::str::from_utf8(output) else {
return false;
};
let Some((ours, theirs)) = resolve_conflict_sides(text) else {
return false;
};
ParsedFile::parse(ours.as_str(), language).is_some()
&& ParsedFile::parse(theirs.as_str(), language).is_some()
}
fn resolve_conflict_sides(text: &str) -> Option<(String, String)> {
enum State {
Normal,
Ours,
Theirs,
}
let mut ours = String::new();
let mut theirs = String::new();
let mut state = State::Normal;
for line in text.split_inclusive('\n') {
let marker = conflict_marker(line);
if matches!(marker, Some(ConflictMarker::Start)) {
match state {
State::Normal => state = State::Ours,
_ => return None,
}
} else if matches!(marker, Some(ConflictMarker::Separator)) {
match state {
State::Ours => state = State::Theirs,
_ => return None,
}
} else if matches!(marker, Some(ConflictMarker::End)) {
match state {
State::Theirs => state = State::Normal,
_ => return None,
}
} else {
match state {
State::Normal => {
ours.push_str(line);
theirs.push_str(line);
}
State::Ours => ours.push_str(line),
State::Theirs => theirs.push_str(line),
}
}
}
match state {
State::Normal => Some((ours, theirs)),
_ => None,
}
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
enum ConflictMarker {
Start,
Separator,
End,
}
fn conflict_marker(line: &str) -> Option<ConflictMarker> {
let body = line.strip_suffix('\n').unwrap_or(line);
let body = body.strip_suffix('\r').unwrap_or(body).trim_start();
if marker_body_matches(body, "<<<<<<<") {
Some(ConflictMarker::Start)
} else if marker_body_matches(body, "=======") {
Some(ConflictMarker::Separator)
} else if marker_body_matches(body, ">>>>>>>") {
Some(ConflictMarker::End)
} else {
None
}
}
fn marker_body_matches(body: &str, marker: &str) -> bool {
let Some(rest) = body.strip_prefix(marker) else {
return false;
};
rest.is_empty() || rest.starts_with(' ')
}
fn conserves_inputs(
output: &[u8],
language: Language,
base_parsed: &ParsedFile,
ours_parsed: &ParsedFile,
theirs_parsed: &ParsedFile,
) -> bool {
use std::collections::BTreeSet;
let Ok(out_text) = std::str::from_utf8(output) else {
return false;
};
let Some(out_parsed) = ParsedFile::parse(out_text, language) else {
return false;
};
type Identity = (Vec<String>, items::ItemKind, String);
let collect = |seg: &items::FileSegments, set: &mut BTreeSet<Identity>| {
items::visit_items(&seg.items, &mut |i| {
set.insert((i.key.scope.clone(), i.key.kind, i.key.name.clone()));
});
};
let mut allowed: BTreeSet<Identity> = BTreeSet::new();
for parsed in [base_parsed, ours_parsed, theirs_parsed] {
collect(&segment_file(parsed), &mut allowed);
}
let mut got: BTreeSet<Identity> = BTreeSet::new();
collect(&segment_file(&out_parsed), &mut got);
got.is_subset(&allowed)
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum MergeStrategy {
HunkOnly,
Semantic,
}
pub fn three_way_merge(
base: &[u8],
ours: &[u8],
theirs: &[u8],
path: &Path,
markers: ConflictMarkers<'_>,
strategy: MergeStrategy,
) -> MergeOutcome {
match strategy {
MergeStrategy::HunkOnly => text_hunk_merge_with_markers(base, ours, theirs, markers),
MergeStrategy::Semantic => semantic_three_way_merge(base, ours, theirs, path, markers),
}
}
pub use merge::{ConflictMarkers as MergeConflictMarkers, MergeOutcome as MergeDriverOutcome};