#![allow(dead_code)]
use std::collections::HashMap;
#[derive(Debug, Clone)]
pub struct HistogramDiffConfig {
pub max_anchor_freq: usize,
pub context_lines: usize,
}
impl Default for HistogramDiffConfig {
fn default() -> Self {
Self {
max_anchor_freq: 64,
context_lines: 3,
}
}
}
#[derive(Debug, Clone)]
pub struct HistogramHunk {
pub old_range: (usize, usize),
pub new_range: (usize, usize),
pub removed: Vec<String>,
pub added: Vec<String>,
}
#[derive(Debug, Clone)]
pub struct HistogramDiff {
pub hunks: Vec<HistogramHunk>,
}
impl HistogramDiff {
pub fn new() -> Self {
Self { hunks: Vec::new() }
}
pub fn hunk_count(&self) -> usize {
self.hunks.len()
}
pub fn is_identical(&self) -> bool {
self.hunks.is_empty()
}
pub fn changed_lines(&self) -> usize {
self.hunks
.iter()
.map(|h| h.removed.len() + h.added.len())
.sum()
}
}
impl Default for HistogramDiff {
fn default() -> Self {
Self::new()
}
}
pub fn build_histogram<'a>(lines: &[&'a str]) -> HashMap<&'a str, usize> {
let mut map: HashMap<&str, usize> = HashMap::new();
for &l in lines {
*map.entry(l).or_insert(0) += 1;
}
map
}
pub fn histogram_diff(old: &[&str], new: &[&str], cfg: &HistogramDiffConfig) -> HistogramDiff {
let mut result = HistogramDiff::new();
let old_hist = build_histogram(old);
let new_hist = build_histogram(new);
let mut oi = 0usize;
let mut ni = 0usize;
while oi < old.len() || ni < new.len() {
let o = old.get(oi).copied();
let n = new.get(ni).copied();
if o == n {
oi += 1;
ni += 1;
continue;
}
let o_eligible = o
.map(|l| old_hist.get(l).copied().unwrap_or(0) <= cfg.max_anchor_freq)
.unwrap_or(false);
let n_eligible = n
.map(|l| new_hist.get(l).copied().unwrap_or(0) <= cfg.max_anchor_freq)
.unwrap_or(false);
let mut hunk = HistogramHunk {
old_range: (oi, oi),
new_range: (ni, ni),
removed: Vec::new(),
added: Vec::new(),
};
if o_eligible || o.is_none() {
if let Some(ol) = o {
hunk.removed.push(ol.to_string());
oi += 1;
}
} else if let Some(ol) = o {
hunk.removed.push(ol.to_string());
oi += 1;
}
if n_eligible || n.is_none() {
if let Some(nl) = n {
hunk.added.push(nl.to_string());
ni += 1;
}
} else if let Some(nl) = n {
hunk.added.push(nl.to_string());
ni += 1;
}
hunk.old_range.1 = oi;
hunk.new_range.1 = ni;
result.hunks.push(hunk);
}
result
}
pub fn histogram_diff_to_string(diff: &HistogramDiff) -> String {
let mut out = String::new();
for h in &diff.hunks {
out.push_str(&format!(
"@@ -{},{} +{},{} @@\n",
h.old_range.0,
h.old_range.1.saturating_sub(h.old_range.0),
h.new_range.0,
h.new_range.1.saturating_sub(h.new_range.0),
));
for l in &h.removed {
out.push('-');
out.push_str(l);
out.push('\n');
}
for l in &h.added {
out.push('+');
out.push_str(l);
out.push('\n');
}
}
out
}
#[cfg(test)]
mod tests {
use super::*;
fn default_cfg() -> HistogramDiffConfig {
HistogramDiffConfig::default()
}
#[test]
fn test_identical_empty_hunks() {
let lines = ["a", "b"];
let d = histogram_diff(&lines, &lines, &default_cfg());
assert!(d.is_identical());
}
#[test]
fn test_single_change() {
let old = ["a", "b"];
let new = ["a", "c"];
let d = histogram_diff(&old, &new, &default_cfg());
assert!(!d.is_identical());
}
#[test]
fn test_hunk_count_one() {
let old = ["x"];
let new = ["y"];
let d = histogram_diff(&old, &new, &default_cfg());
assert_eq!(d.hunk_count(), 1);
}
#[test]
fn test_changed_lines_nonzero() {
let old = ["p"];
let new = ["q"];
let d = histogram_diff(&old, &new, &default_cfg());
assert!(d.changed_lines() > 0);
}
#[test]
fn test_build_histogram_counts() {
let lines = ["a", "b", "a"];
let h = build_histogram(&lines);
assert_eq!(h[&"a"], 2);
assert_eq!(h[&"b"], 1);
}
#[test]
fn test_all_added() {
let old: &[&str] = &[];
let new = ["x", "y"];
let d = histogram_diff(old, &new, &default_cfg());
assert!(d.changed_lines() > 0);
}
#[test]
fn test_to_string_has_at() {
let old = ["a"];
let new = ["b"];
let d = histogram_diff(&old, &new, &default_cfg());
let s = histogram_diff_to_string(&d);
assert!(s.contains("@@"));
}
#[test]
fn test_default_config() {
let cfg = HistogramDiffConfig::default();
assert_eq!(cfg.context_lines, 3);
}
#[test]
fn test_default_histogram_diff() {
let d = HistogramDiff::default();
assert!(d.is_identical());
}
}