use alloc::borrow::{Borrow, ToOwned};
use alloc::collections::BinaryHeap;
use alloc::vec;
use alloc::vec::Vec;
use core::cmp::Reverse;
use core::ops::{Index, Range};
use core::time::Duration;
mod abstraction;
#[cfg(feature = "inline")]
mod inline;
mod utils;
pub use self::abstraction::{DiffInput, DiffableStr, DiffableStrRef, IntoDiffInput};
#[cfg(feature = "inline")]
pub use self::inline::{InlineChange, InlineChangeMode, InlineChangeOptions};
use self::utils::{QuickSeqRatio, upper_seq_ratio};
use crate::algorithms::IdentifyDistinct;
use crate::deadline_support::{Instant, duration_to_deadline};
use crate::udiff::UnifiedDiff;
use crate::{
Algorithm, Change, ChangeTag, DiffOp, DiffTag, capture_diff_deadline, diff_ratio,
group_diff_ops,
};
#[derive(Debug, Clone, Copy)]
enum Deadline {
Absolute(Instant),
Relative(Duration),
}
impl Deadline {
fn into_instant(self) -> Option<Instant> {
match self {
Deadline::Absolute(instant) => Some(instant),
Deadline::Relative(duration) => duration_to_deadline(duration),
}
}
}
pub(crate) enum TextDiffSide<'a, T: DiffableStr + ?Sized> {
BorrowedTokens(Vec<&'a T>),
OwnedTokens(Vec<<T as ToOwned>::Owned>),
}
impl<'a, T: DiffableStr + ?Sized> TextDiffSide<'a, T> {
fn from_tokenized(input: DiffInput<'a, T>, tokenize: impl FnOnce(&T) -> Vec<&T>) -> Self {
match input {
DiffInput::Borrowed(value) => TextDiffSide::BorrowedTokens(tokenize(value)),
DiffInput::Owned(value) => {
let tokens = tokenize(value.borrow())
.into_iter()
.map(ToOwned::to_owned)
.collect();
TextDiffSide::OwnedTokens(tokens)
}
}
}
fn from_slices(slices: &[&'a T]) -> Self {
TextDiffSide::BorrowedTokens(slices.to_vec())
}
fn len(&self) -> usize {
match self {
TextDiffSide::BorrowedTokens(slices) => slices.len(),
TextDiffSide::OwnedTokens(slices) => slices.len(),
}
}
fn get(&self, index: usize) -> Option<&T> {
match self {
TextDiffSide::BorrowedTokens(slices) => slices.get(index).copied(),
TextDiffSide::OwnedTokens(slices) => slices.get(index).map(Borrow::borrow),
}
}
fn iter(&self) -> impl Iterator<Item = &T> {
(0..self.len()).map(|idx| self.get(idx).expect("slice out of bounds"))
}
}
impl<T: DiffableStr + ?Sized> Index<usize> for TextDiffSide<'_, T> {
type Output = T;
fn index(&self, index: usize) -> &Self::Output {
self.get(index).expect("slice out of bounds")
}
}
pub struct TextChangesIter<'diff, 'old, 'new, T: DiffableStr + ?Sized> {
diff: &'diff TextDiff<'old, 'new, T>,
old_range: Range<usize>,
new_range: Range<usize>,
old_index: usize,
new_index: usize,
old_i: usize,
new_i: usize,
tag: DiffTag,
}
impl<'diff, 'old, 'new, T: DiffableStr + ?Sized> TextChangesIter<'diff, 'old, 'new, T> {
fn new(diff: &'diff TextDiff<'old, 'new, T>, op: &DiffOp) -> Self {
let (tag, old_range, new_range) = op.as_tag_tuple();
let old_index = old_range.start;
let new_index = new_range.start;
let old_i = old_range.start;
let new_i = new_range.start;
TextChangesIter {
diff,
old_range,
new_range,
old_index,
new_index,
old_i,
new_i,
tag,
}
}
}
impl<'diff, 'old, 'new, T: DiffableStr + ?Sized> Iterator
for TextChangesIter<'diff, 'old, 'new, T>
{
type Item = Change<&'diff T>;
fn next(&mut self) -> Option<Self::Item> {
match self.tag {
DiffTag::Equal => {
if self.old_i < self.old_range.end {
let value = self
.diff
.old_side()
.get(self.old_i)
.expect("diff operation old range out of bounds");
self.old_i += 1;
self.old_index += 1;
self.new_index += 1;
Some(Change {
tag: ChangeTag::Equal,
old_index: Some(self.old_index - 1),
new_index: Some(self.new_index - 1),
value,
})
} else {
None
}
}
DiffTag::Delete => {
if self.old_i < self.old_range.end {
let value = self
.diff
.old_side()
.get(self.old_i)
.expect("diff operation old range out of bounds");
self.old_i += 1;
self.old_index += 1;
Some(Change {
tag: ChangeTag::Delete,
old_index: Some(self.old_index - 1),
new_index: None,
value,
})
} else {
None
}
}
DiffTag::Insert => {
if self.new_i < self.new_range.end {
let value = self
.diff
.new_side()
.get(self.new_i)
.expect("diff operation new range out of bounds");
self.new_i += 1;
self.new_index += 1;
Some(Change {
tag: ChangeTag::Insert,
old_index: None,
new_index: Some(self.new_index - 1),
value,
})
} else {
None
}
}
DiffTag::Replace => {
if self.old_i < self.old_range.end {
let value = self
.diff
.old_side()
.get(self.old_i)
.expect("diff operation old range out of bounds");
self.old_i += 1;
self.old_index += 1;
Some(Change {
tag: ChangeTag::Delete,
old_index: Some(self.old_index - 1),
new_index: None,
value,
})
} else if self.new_i < self.new_range.end {
let value = self
.diff
.new_side()
.get(self.new_i)
.expect("diff operation new range out of bounds");
self.new_i += 1;
self.new_index += 1;
Some(Change {
tag: ChangeTag::Insert,
old_index: None,
new_index: Some(self.new_index - 1),
value,
})
} else {
None
}
}
}
}
}
#[derive(Clone, Debug, Default)]
pub struct TextDiffConfig {
algorithm: Algorithm,
newline_terminated: Option<bool>,
deadline: Option<Deadline>,
}
impl TextDiffConfig {
pub const fn new() -> Self {
TextDiffConfig {
algorithm: Algorithm::Myers,
newline_terminated: None,
deadline: None,
}
}
pub fn algorithm(&mut self, alg: Algorithm) -> &mut Self {
self.algorithm = alg;
self
}
pub fn deadline(&mut self, deadline: Instant) -> &mut Self {
self.deadline = Some(Deadline::Absolute(deadline));
self
}
pub fn timeout(&mut self, timeout: Duration) -> &mut Self {
self.deadline = Some(Deadline::Relative(timeout));
self
}
pub fn newline_terminated(&mut self, yes: bool) -> &mut Self {
self.newline_terminated = Some(yes);
self
}
pub fn diff_lines<'old, 'new, Old, New, T>(&self, old: Old, new: New) -> TextDiff<'old, 'new, T>
where
Old: IntoDiffInput<'old, Output = T>,
New: IntoDiffInput<'new, Output = T>,
T: DiffableStr + ?Sized,
{
self.diff(
TextDiffSide::from_tokenized(old.into_diff_input(), DiffableStr::tokenize_lines),
TextDiffSide::from_tokenized(new.into_diff_input(), DiffableStr::tokenize_lines),
true,
)
}
pub fn diff_words<'old, 'new, Old, New, T>(&self, old: Old, new: New) -> TextDiff<'old, 'new, T>
where
Old: IntoDiffInput<'old, Output = T>,
New: IntoDiffInput<'new, Output = T>,
T: DiffableStr + ?Sized,
{
self.diff(
TextDiffSide::from_tokenized(old.into_diff_input(), DiffableStr::tokenize_words),
TextDiffSide::from_tokenized(new.into_diff_input(), DiffableStr::tokenize_words),
false,
)
}
pub fn diff_chars<'old, 'new, Old, New, T>(&self, old: Old, new: New) -> TextDiff<'old, 'new, T>
where
Old: IntoDiffInput<'old, Output = T>,
New: IntoDiffInput<'new, Output = T>,
T: DiffableStr + ?Sized,
{
self.diff(
TextDiffSide::from_tokenized(old.into_diff_input(), DiffableStr::tokenize_chars),
TextDiffSide::from_tokenized(new.into_diff_input(), DiffableStr::tokenize_chars),
false,
)
}
#[cfg(feature = "unicode")]
pub fn diff_unicode_words<'old, 'new, Old, New, T>(
&self,
old: Old,
new: New,
) -> TextDiff<'old, 'new, T>
where
Old: IntoDiffInput<'old, Output = T>,
New: IntoDiffInput<'new, Output = T>,
T: DiffableStr + ?Sized,
{
self.diff(
TextDiffSide::from_tokenized(
old.into_diff_input(),
DiffableStr::tokenize_unicode_words,
),
TextDiffSide::from_tokenized(
new.into_diff_input(),
DiffableStr::tokenize_unicode_words,
),
false,
)
}
#[cfg(feature = "unicode")]
pub fn diff_graphemes<'old, 'new, Old, New, T>(
&self,
old: Old,
new: New,
) -> TextDiff<'old, 'new, T>
where
Old: IntoDiffInput<'old, Output = T>,
New: IntoDiffInput<'new, Output = T>,
T: DiffableStr + ?Sized,
{
self.diff(
TextDiffSide::from_tokenized(old.into_diff_input(), DiffableStr::tokenize_graphemes),
TextDiffSide::from_tokenized(new.into_diff_input(), DiffableStr::tokenize_graphemes),
false,
)
}
pub fn diff_slices<'old, 'new, T: DiffableStr + ?Sized>(
&self,
old: &[&'old T],
new: &[&'new T],
) -> TextDiff<'old, 'new, T> {
self.diff(
TextDiffSide::from_slices(old),
TextDiffSide::from_slices(new),
false,
)
}
fn diff<'old, 'new, T: DiffableStr + ?Sized>(
&self,
old: TextDiffSide<'old, T>,
new: TextDiffSide<'new, T>,
newline_terminated: bool,
) -> TextDiff<'old, 'new, T> {
let deadline = self.deadline.and_then(|x| x.into_instant());
let ops = if old.len() > 100 || new.len() > 100 {
let ih = IdentifyDistinct::<u32>::new(&old, 0..old.len(), &new, 0..new.len());
capture_diff_deadline(
self.algorithm,
ih.old_lookup(),
ih.old_range(),
ih.new_lookup(),
ih.new_range(),
deadline,
)
} else {
capture_diff_deadline(
self.algorithm,
&old,
0..old.len(),
&new,
0..new.len(),
deadline,
)
};
TextDiff {
old,
new,
ops,
newline_terminated: self.newline_terminated.unwrap_or(newline_terminated),
algorithm: self.algorithm,
}
}
}
pub struct TextDiff<'old, 'new, T: DiffableStr + ?Sized> {
old: TextDiffSide<'old, T>,
new: TextDiffSide<'new, T>,
ops: Vec<DiffOp>,
newline_terminated: bool,
algorithm: Algorithm,
}
impl<'old, 'new> TextDiff<'old, 'new, str> {
pub const fn configure() -> TextDiffConfig {
TextDiffConfig::new()
}
pub fn from_lines<Old, New, T>(old: Old, new: New) -> TextDiff<'old, 'new, T>
where
Old: IntoDiffInput<'old, Output = T>,
New: IntoDiffInput<'new, Output = T>,
T: DiffableStr + ?Sized,
{
TextDiff::configure().diff_lines(old, new)
}
pub fn from_words<Old, New, T>(old: Old, new: New) -> TextDiff<'old, 'new, T>
where
Old: IntoDiffInput<'old, Output = T>,
New: IntoDiffInput<'new, Output = T>,
T: DiffableStr + ?Sized,
{
TextDiff::configure().diff_words(old, new)
}
pub fn from_chars<Old, New, T>(old: Old, new: New) -> TextDiff<'old, 'new, T>
where
Old: IntoDiffInput<'old, Output = T>,
New: IntoDiffInput<'new, Output = T>,
T: DiffableStr + ?Sized,
{
TextDiff::configure().diff_chars(old, new)
}
#[cfg(feature = "unicode")]
pub fn from_unicode_words<Old, New, T>(old: Old, new: New) -> TextDiff<'old, 'new, T>
where
Old: IntoDiffInput<'old, Output = T>,
New: IntoDiffInput<'new, Output = T>,
T: DiffableStr + ?Sized,
{
TextDiff::configure().diff_unicode_words(old, new)
}
#[cfg(feature = "unicode")]
pub fn from_graphemes<Old, New, T>(old: Old, new: New) -> TextDiff<'old, 'new, T>
where
Old: IntoDiffInput<'old, Output = T>,
New: IntoDiffInput<'new, Output = T>,
T: DiffableStr + ?Sized,
{
TextDiff::configure().diff_graphemes(old, new)
}
}
impl<'old, 'new, T: DiffableStr + ?Sized> TextDiff<'old, 'new, T> {
pub fn from_slices(old: &[&'old T], new: &[&'new T]) -> TextDiff<'old, 'new, T> {
TextDiff::configure().diff_slices(old, new)
}
pub fn algorithm(&self) -> Algorithm {
self.algorithm
}
pub fn newline_terminated(&self) -> bool {
self.newline_terminated
}
pub fn old_len(&self) -> usize {
self.old.len()
}
pub fn new_len(&self) -> usize {
self.new.len()
}
pub fn old_slice(&self, index: usize) -> Option<&T> {
self.old.get(index)
}
pub fn new_slice(&self, index: usize) -> Option<&T> {
self.new.get(index)
}
pub fn iter_old_slices(&self) -> impl Iterator<Item = &T> {
self.old.iter()
}
pub fn iter_new_slices(&self) -> impl Iterator<Item = &T> {
self.new.iter()
}
pub fn old_lookup(&self) -> &impl Index<usize, Output = T> {
&self.old
}
pub fn new_lookup(&self) -> &impl Index<usize, Output = T> {
&self.new
}
pub(crate) fn old_side(&self) -> &TextDiffSide<'old, T> {
&self.old
}
pub(crate) fn new_side(&self) -> &TextDiffSide<'new, T> {
&self.new
}
pub fn ratio(&self) -> f32 {
diff_ratio(self.ops(), self.old_len(), self.new_len())
}
pub fn iter_changes(&self, op: &DiffOp) -> TextChangesIter<'_, 'old, 'new, T> {
TextChangesIter::new(self, op)
}
pub fn ops(&self) -> &[DiffOp] {
&self.ops
}
pub fn grouped_ops(&self, n: usize) -> Vec<Vec<DiffOp>> {
group_diff_ops(self.ops().to_vec(), n)
}
pub fn iter_all_changes(&self) -> impl Iterator<Item = Change<&T>> + '_ {
self.ops().iter().flat_map(|op| self.iter_changes(op))
}
#[cfg(feature = "inline")]
pub fn iter_all_inline_changes(&self) -> impl Iterator<Item = InlineChange<'_, T>> + '_ {
self.ops()
.iter()
.flat_map(move |op| self.iter_inline_changes(op))
}
#[cfg(feature = "inline")]
pub fn iter_all_inline_changes_deadline(
&self,
deadline: Option<Instant>,
) -> impl Iterator<Item = InlineChange<'_, T>> + '_ {
self.ops()
.iter()
.flat_map(move |op| self.iter_inline_changes_deadline(op, deadline))
}
#[cfg(feature = "inline")]
pub fn iter_all_inline_changes_with_options(
&self,
options: InlineChangeOptions,
) -> impl Iterator<Item = InlineChange<'_, T>> + '_ {
self.ops()
.iter()
.flat_map(move |op| self.iter_inline_changes_with_options(op, options))
}
#[cfg(feature = "inline")]
pub fn iter_all_inline_changes_with_options_deadline(
&self,
options: InlineChangeOptions,
deadline: Option<Instant>,
) -> impl Iterator<Item = InlineChange<'_, T>> + '_ {
self.ops().iter().flat_map(move |op| {
self.iter_inline_changes_with_options_deadline(op, options, deadline)
})
}
pub fn unified_diff<'diff>(&'diff self) -> UnifiedDiff<'diff, 'old, 'new, T> {
UnifiedDiff::from_text_diff(self)
}
#[cfg(feature = "inline")]
pub fn iter_inline_changes(
&self,
op: &DiffOp,
) -> impl Iterator<Item = InlineChange<'_, T>> + '_ {
use crate::deadline_support::duration_to_deadline;
inline::iter_inline_changes(
self,
op,
duration_to_deadline(Duration::from_millis(500)),
InlineChangeOptions::default(),
)
}
#[cfg(feature = "inline")]
pub fn iter_inline_changes_deadline(
&self,
op: &DiffOp,
deadline: Option<Instant>,
) -> impl Iterator<Item = InlineChange<'_, T>> + '_ {
inline::iter_inline_changes(self, op, deadline, InlineChangeOptions::default())
}
#[cfg(feature = "inline")]
pub fn iter_inline_changes_with_options(
&self,
op: &DiffOp,
options: InlineChangeOptions,
) -> impl Iterator<Item = InlineChange<'_, T>> + '_ {
use crate::deadline_support::duration_to_deadline;
inline::iter_inline_changes(
self,
op,
duration_to_deadline(Duration::from_millis(500)),
options,
)
}
#[cfg(feature = "inline")]
pub fn iter_inline_changes_with_options_deadline(
&self,
op: &DiffOp,
options: InlineChangeOptions,
deadline: Option<Instant>,
) -> impl Iterator<Item = InlineChange<'_, T>> + '_ {
inline::iter_inline_changes(self, op, deadline, options)
}
}
pub fn get_close_matches<'a, T: DiffableStr + ?Sized>(
word: &T,
possibilities: &[&'a T],
n: usize,
cutoff: f32,
) -> Vec<&'a T> {
let mut matches = BinaryHeap::new();
let seq1 = word.tokenize_chars();
let quick_ratio = QuickSeqRatio::new(&seq1);
for &possibility in possibilities {
let seq2 = possibility.tokenize_chars();
if upper_seq_ratio(&seq1, &seq2) < cutoff || quick_ratio.calc(&seq2) < cutoff {
continue;
}
let diff = TextDiff::from_slices(&seq1, &seq2);
let ratio = diff.ratio();
if ratio >= cutoff {
matches.push(((ratio * u32::MAX as f32) as u32, Reverse(possibility)));
}
}
let mut rv = vec![];
for _ in 0..n {
if let Some((_, elt)) = matches.pop() {
rv.push(elt.0);
} else {
break;
}
}
rv
}
#[test]
fn test_captured_ops() {
let diff = TextDiff::from_lines(
"Hello World\nsome stuff here\nsome more stuff here\n",
"Hello World\nsome amazing stuff here\nsome more stuff here\n",
);
insta::assert_debug_snapshot!(&diff.ops());
}
#[test]
fn test_captured_word_ops() {
let diff = TextDiff::from_words(
"Hello World\nsome stuff here\nsome more stuff here\n",
"Hello World\nsome amazing stuff here\nsome more stuff here\n",
);
let changes = diff
.ops()
.iter()
.flat_map(|op| diff.iter_changes(op))
.collect::<Vec<_>>();
insta::assert_debug_snapshot!(&changes);
}
#[test]
fn test_unified_diff() {
let diff = TextDiff::from_lines(
"Hello World\nsome stuff here\nsome more stuff here\n",
"Hello World\nsome amazing stuff here\nsome more stuff here\n",
);
assert!(diff.newline_terminated());
insta::assert_snapshot!(
&diff
.unified_diff()
.context_radius(3)
.header("old", "new")
.to_string()
);
}
#[test]
fn test_line_ops() {
let a = "Hello World\nsome stuff here\nsome more stuff here\n";
let b = "Hello World\nsome amazing stuff here\nsome more stuff here\n";
let diff = TextDiff::from_lines(a, b);
assert!(diff.newline_terminated());
let changes = diff
.ops()
.iter()
.flat_map(|op| diff.iter_changes(op))
.collect::<Vec<_>>();
insta::assert_debug_snapshot!(&changes);
#[cfg(feature = "bytes")]
{
let byte_diff = TextDiff::from_lines(a.as_bytes(), b.as_bytes());
let byte_changes = byte_diff
.ops()
.iter()
.flat_map(|op| byte_diff.iter_changes(op))
.collect::<Vec<_>>();
for (change, byte_change) in changes.iter().zip(byte_changes.iter()) {
assert_eq!(change.to_string_lossy(), byte_change.to_string_lossy());
}
}
}
#[cfg(test)]
fn build_owned_diff() -> TextDiff<'static, 'static, str> {
TextDiff::from_lines("a\nb\n".to_string(), "a\nc\n".to_string())
}
#[test]
fn test_owned_inputs() {
let diff = build_owned_diff();
let changes = diff
.iter_all_changes()
.map(|x| (x.tag(), x.value().to_string_lossy().into_owned()))
.collect::<Vec<_>>();
assert_eq!(
changes,
vec![
(ChangeTag::Equal, "a\n".into()),
(ChangeTag::Delete, "b\n".into()),
(ChangeTag::Insert, "c\n".into()),
]
);
}
#[test]
#[should_panic(expected = "old range out of bounds")]
fn test_iter_changes_panics_on_invalid_ops() {
let diff = TextDiff::from_lines("a\n", "a\n");
let invalid = DiffOp::Delete {
old_index: 99,
old_len: 1,
new_index: 0,
};
let _ = diff.iter_changes(&invalid).next();
}
#[test]
fn test_virtual_newlines() {
let diff = TextDiff::from_lines("a\nb", "a\nc\n");
assert!(diff.newline_terminated());
let changes = diff
.ops()
.iter()
.flat_map(|op| diff.iter_changes(op))
.collect::<Vec<_>>();
insta::assert_debug_snapshot!(&changes);
}
#[test]
fn test_char_diff() {
let diff = TextDiff::from_chars("Hello World", "Hallo Welt");
insta::assert_debug_snapshot!(diff.ops());
#[cfg(feature = "bytes")]
{
let byte_diff = TextDiff::from_chars("Hello World".as_bytes(), "Hallo Welt".as_bytes());
assert_eq!(diff.ops(), byte_diff.ops());
}
}
#[test]
fn test_ratio() {
let diff = TextDiff::from_chars("abcd", "bcde");
assert_eq!(diff.ratio(), 0.75);
let diff = TextDiff::from_chars("", "");
assert_eq!(diff.ratio(), 1.0);
}
#[test]
fn test_get_close_matches() {
let matches = get_close_matches("appel", &["ape", "apple", "peach", "puppy"][..], 3, 0.6);
assert_eq!(matches, vec!["apple", "ape"]);
let matches = get_close_matches(
"hulo",
&[
"hi", "hulu", "hali", "hoho", "amaz", "zulo", "blah", "hopp", "uulo", "aulo",
][..],
5,
0.7,
);
assert_eq!(matches, vec!["aulo", "hulu", "uulo", "zulo"]);
}
#[test]
fn test_lifetimes_on_iter() {
let a = "1\n2\n3\n".to_string();
let b = "1\n99\n3\n".to_string();
let diff = TextDiff::from_lines(&a, &b);
let changes = diff.iter_all_changes().collect::<Vec<_>>();
insta::assert_debug_snapshot!(&changes);
}
#[test]
#[cfg(feature = "serde")]
fn test_serde() {
let diff = TextDiff::from_lines(
"Hello World\nsome stuff here\nsome more stuff here\n\nAha stuff here\nand more stuff",
"Stuff\nHello World\nsome amazing stuff here\nsome more stuff here\n",
);
let changes = diff
.ops()
.iter()
.flat_map(|op| diff.iter_changes(op))
.collect::<Vec<_>>();
let json = serde_json::to_string_pretty(&changes).unwrap();
insta::assert_snapshot!(&json);
}
#[test]
#[cfg(feature = "serde")]
fn test_serde_ops() {
let diff = TextDiff::from_lines(
"Hello World\nsome stuff here\nsome more stuff here\n\nAha stuff here\nand more stuff",
"Stuff\nHello World\nsome amazing stuff here\nsome more stuff here\n",
);
let changes = diff.ops();
let json = serde_json::to_string_pretty(&changes).unwrap();
insta::assert_snapshot!(&json);
}
#[test]
fn test_regression_issue_37() {
let config = TextDiffConfig::default();
let diff = config.diff_lines("\u{18}\n\n", "\n\n\r");
let mut output = diff.unified_diff();
assert_eq!(
output.context_radius(0).to_string(),
"@@ -1 +1,0 @@\n-\u{18}\n@@ -2,0 +2,2 @@\n+\n+\r"
);
}