use crate::alignment::CaptionBlock;
#[derive(Debug, Clone, PartialEq)]
pub enum DiffOp {
Equal {
left_idx: usize,
right_idx: usize,
timing_shift_ms: i64,
},
Replace {
left_idx: usize,
right_idx: usize,
left_text: String,
right_text: String,
},
Insert { right_idx: usize, text: String },
Delete { left_idx: usize, text: String },
}
impl DiffOp {
pub fn symbol(&self) -> char {
match self {
DiffOp::Equal {
timing_shift_ms: 0, ..
} => '=',
DiffOp::Equal { .. } => '~',
DiffOp::Replace { .. } => '~',
DiffOp::Insert { .. } => '+',
DiffOp::Delete { .. } => '-',
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct DiffSummary {
pub equal_count: usize,
pub timing_shifted_count: usize,
pub replaced_count: usize,
pub inserted_count: usize,
pub deleted_count: usize,
pub total_ops: usize,
pub similarity: f32,
}
impl DiffSummary {
fn from_ops(ops: &[DiffOp]) -> Self {
let mut equal_count = 0usize;
let mut timing_shifted_count = 0usize;
let mut replaced_count = 0usize;
let mut inserted_count = 0usize;
let mut deleted_count = 0usize;
for op in ops {
match op {
DiffOp::Equal {
timing_shift_ms: 0, ..
} => equal_count += 1,
DiffOp::Equal { .. } => timing_shifted_count += 1,
DiffOp::Replace { .. } => replaced_count += 1,
DiffOp::Insert { .. } => inserted_count += 1,
DiffOp::Delete { .. } => deleted_count += 1,
}
}
let total_ops = ops.len();
let similarity = if total_ops == 0 {
1.0
} else {
(equal_count + timing_shifted_count) as f32 / total_ops as f32
};
Self {
equal_count,
timing_shifted_count,
replaced_count,
inserted_count,
deleted_count,
total_ops,
similarity,
}
}
}
pub struct CaptionDiff;
impl CaptionDiff {
pub fn diff(left: &[CaptionBlock], right: &[CaptionBlock]) -> Vec<DiffOp> {
let left_texts: Vec<String> = left.iter().map(|b| normalise_text(b)).collect();
let right_texts: Vec<String> = right.iter().map(|b| normalise_text(b)).collect();
let edit_ops = lcs_diff(&left_texts, &right_texts);
edit_ops
.into_iter()
.map(|raw| match raw {
RawOp::Equal(li, ri) => {
let shift = right[ri].start_ms as i64 - left[li].start_ms as i64;
DiffOp::Equal {
left_idx: li,
right_idx: ri,
timing_shift_ms: shift,
}
}
RawOp::Replace(li, ri) => DiffOp::Replace {
left_idx: li,
right_idx: ri,
left_text: left_texts[li].clone(),
right_text: right_texts[ri].clone(),
},
RawOp::Insert(ri) => DiffOp::Insert {
right_idx: ri,
text: right_texts[ri].clone(),
},
RawOp::Delete(li) => DiffOp::Delete {
left_idx: li,
text: left_texts[li].clone(),
},
})
.collect()
}
pub fn summarise(left: &[CaptionBlock], right: &[CaptionBlock]) -> DiffSummary {
let ops = Self::diff(left, right);
DiffSummary::from_ops(&ops)
}
pub fn text_changes(left: &[CaptionBlock], right: &[CaptionBlock]) -> Vec<DiffOp> {
Self::diff(left, right)
.into_iter()
.filter(|op| !matches!(op, DiffOp::Equal { .. }))
.collect()
}
pub fn timing_shifts(
left: &[CaptionBlock],
right: &[CaptionBlock],
) -> Vec<(usize, usize, i64)> {
Self::diff(left, right)
.into_iter()
.filter_map(|op| match op {
DiffOp::Equal {
left_idx,
right_idx,
timing_shift_ms,
} if timing_shift_ms != 0 => Some((left_idx, right_idx, timing_shift_ms)),
_ => None,
})
.collect()
}
}
fn normalise_text(block: &CaptionBlock) -> String {
block
.lines
.iter()
.map(|l| l.trim())
.collect::<Vec<_>>()
.join(" ")
}
#[derive(Debug)]
enum RawOp {
Equal(usize, usize),
Replace(usize, usize),
Insert(usize),
Delete(usize),
}
fn lcs_diff(left: &[String], right: &[String]) -> Vec<RawOp> {
let n = left.len();
let m = right.len();
let mut lcs = vec![0u32; (n + 1) * (m + 1)];
let idx = |i: usize, j: usize| i * (m + 1) + j;
for i in (0..n).rev() {
for j in (0..m).rev() {
lcs[idx(i, j)] = if left[i] == right[j] {
lcs[idx(i + 1, j + 1)] + 1
} else {
lcs[idx(i + 1, j)].max(lcs[idx(i, j + 1)])
};
}
}
let mut ops: Vec<RawOp> = Vec::new();
let mut i = 0;
let mut j = 0;
while i < n || j < m {
if i < n && j < m && left[i] == right[j] {
ops.push(RawOp::Equal(i, j));
i += 1;
j += 1;
} else if j < m && (i >= n || lcs[idx(i, j + 1)] >= lcs[idx(i + 1, j)]) {
ops.push(RawOp::Insert(j));
j += 1;
} else if i < n {
if j < m && lcs[idx(i + 1, j)] == lcs[idx(i, j + 1)] {
ops.push(RawOp::Replace(i, j));
i += 1;
j += 1;
} else {
ops.push(RawOp::Delete(i));
i += 1;
}
}
}
ops
}
#[cfg(test)]
mod tests {
use super::*;
use crate::alignment::CaptionPosition;
fn make_block(id: u32, start_ms: u64, end_ms: u64, text: &str) -> CaptionBlock {
CaptionBlock {
id,
start_ms,
end_ms,
lines: vec![text.to_string()],
speaker_id: None,
position: CaptionPosition::Bottom,
}
}
#[test]
fn diff_identical_tracks_all_equal() {
let a = vec![
make_block(1, 0, 1000, "Hello"),
make_block(2, 1000, 2000, "World"),
];
let b = a.clone();
let ops = CaptionDiff::diff(&a, &b);
assert_eq!(ops.len(), 2);
assert!(ops.iter().all(|op| matches!(
op,
DiffOp::Equal {
timing_shift_ms: 0,
..
}
)));
}
#[test]
fn diff_same_text_different_timing_reports_shift() {
let a = vec![make_block(1, 0, 1000, "Hello")];
let b = vec![make_block(1, 500, 1500, "Hello")];
let ops = CaptionDiff::diff(&a, &b);
assert_eq!(ops.len(), 1);
assert!(matches!(
ops[0],
DiffOp::Equal {
timing_shift_ms: 500,
..
}
));
}
#[test]
fn diff_extra_block_in_right_is_insert() {
let a = vec![make_block(1, 0, 1000, "Hello")];
let b = vec![
make_block(1, 0, 1000, "Hello"),
make_block(2, 1000, 2000, "World"),
];
let ops = CaptionDiff::diff(&a, &b);
assert!(ops.iter().any(|op| matches!(op, DiffOp::Insert { .. })));
}
#[test]
fn diff_missing_block_in_right_is_delete() {
let a = vec![
make_block(1, 0, 1000, "Hello"),
make_block(2, 1000, 2000, "World"),
];
let b = vec![make_block(1, 0, 1000, "Hello")];
let ops = CaptionDiff::diff(&a, &b);
assert!(ops.iter().any(|op| matches!(op, DiffOp::Delete { .. })));
}
#[test]
fn diff_changed_text_is_replace_or_delete_insert() {
let a = vec![make_block(1, 0, 1000, "Hello world")];
let b = vec![make_block(1, 0, 1000, "Goodbye world")];
let ops = CaptionDiff::diff(&a, &b);
assert!(!ops.is_empty());
assert!(!ops.iter().all(|op| matches!(op, DiffOp::Equal { .. })));
}
#[test]
fn diff_both_empty_returns_empty() {
let ops = CaptionDiff::diff(&[], &[]);
assert!(ops.is_empty());
}
#[test]
fn diff_left_empty_all_inserts() {
let b = vec![make_block(1, 0, 1000, "Hello")];
let ops = CaptionDiff::diff(&[], &b);
assert!(ops.iter().all(|op| matches!(op, DiffOp::Insert { .. })));
}
#[test]
fn diff_right_empty_all_deletes() {
let a = vec![make_block(1, 0, 1000, "Hello")];
let ops = CaptionDiff::diff(&a, &[]);
assert!(ops.iter().all(|op| matches!(op, DiffOp::Delete { .. })));
}
#[test]
fn summarise_identical_similarity_one() {
let a = vec![make_block(1, 0, 1000, "A"), make_block(2, 1000, 2000, "B")];
let summary = CaptionDiff::summarise(&a, &a);
assert!((summary.similarity - 1.0).abs() < 1e-5);
assert_eq!(summary.equal_count, 2);
}
#[test]
fn summarise_all_different_similarity_zero() {
let a = vec![make_block(1, 0, 1000, "A")];
let b = vec![make_block(1, 0, 1000, "Z")];
let summary = CaptionDiff::summarise(&a, &b);
assert_eq!(summary.equal_count, 0);
}
#[test]
fn text_changes_excludes_equal_and_shifted() {
let a = vec![
make_block(1, 0, 1000, "Same text"),
make_block(2, 1000, 2000, "Different"),
];
let b = vec![
make_block(1, 500, 1500, "Same text"), make_block(2, 2000, 3000, "New text"), ];
let changes = CaptionDiff::text_changes(&a, &b);
assert!(changes.iter().all(|op| !matches!(op, DiffOp::Equal { .. })));
}
#[test]
fn timing_shifts_detects_shifted_blocks() {
let a = vec![make_block(1, 0, 1000, "Hello")];
let b = vec![make_block(1, 250, 1250, "Hello")];
let shifts = CaptionDiff::timing_shifts(&a, &b);
assert_eq!(shifts.len(), 1);
assert_eq!(shifts[0].2, 250);
}
#[test]
fn timing_shifts_ignores_unshifted_blocks() {
let a = vec![make_block(1, 0, 1000, "Hello")];
let b = a.clone();
let shifts = CaptionDiff::timing_shifts(&a, &b);
assert!(shifts.is_empty());
}
#[test]
fn diff_op_symbol() {
assert_eq!(
DiffOp::Equal {
left_idx: 0,
right_idx: 0,
timing_shift_ms: 0
}
.symbol(),
'='
);
assert_eq!(
DiffOp::Equal {
left_idx: 0,
right_idx: 0,
timing_shift_ms: 100
}
.symbol(),
'~'
);
assert_eq!(
DiffOp::Insert {
right_idx: 0,
text: "t".into()
}
.symbol(),
'+'
);
assert_eq!(
DiffOp::Delete {
left_idx: 0,
text: "t".into()
}
.symbol(),
'-'
);
}
#[test]
fn normalise_text_joins_lines() {
let block = CaptionBlock {
id: 1,
start_ms: 0,
end_ms: 1000,
lines: vec!["Line one".to_string(), "Line two".to_string()],
speaker_id: None,
position: CaptionPosition::Bottom,
};
let text = normalise_text(&block);
assert_eq!(text, "Line one Line two");
}
}