Skip to main content

oxihuman_core/
histogram_diff.rs

1// Copyright (C) 2026 COOLJAPAN OU (Team KitaSan)
2// SPDX-License-Identifier: Apache-2.0
3#![allow(dead_code)]
4
5//! Histogram diff algorithm stub.
6//!
7//! Builds a frequency histogram of lines and anchors matching on the
8//! least-frequent common line, giving high-quality diffs for structured text.
9
10use std::collections::HashMap;
11
12/// Configuration for histogram diff.
13#[derive(Debug, Clone)]
14pub struct HistogramDiffConfig {
15    /// Maximum line frequency to consider as an anchor candidate.
16    pub max_anchor_freq: usize,
17    pub context_lines: usize,
18}
19
20impl Default for HistogramDiffConfig {
21    fn default() -> Self {
22        Self {
23            max_anchor_freq: 64,
24            context_lines: 3,
25        }
26    }
27}
28
29/// A change hunk produced by histogram diff.
30#[derive(Debug, Clone)]
31pub struct HistogramHunk {
32    pub old_range: (usize, usize),
33    pub new_range: (usize, usize),
34    pub removed: Vec<String>,
35    pub added: Vec<String>,
36}
37
38/// Result of histogram diff.
39#[derive(Debug, Clone)]
40pub struct HistogramDiff {
41    pub hunks: Vec<HistogramHunk>,
42}
43
44impl HistogramDiff {
45    pub fn new() -> Self {
46        Self { hunks: Vec::new() }
47    }
48
49    pub fn hunk_count(&self) -> usize {
50        self.hunks.len()
51    }
52
53    pub fn is_identical(&self) -> bool {
54        self.hunks.is_empty()
55    }
56
57    pub fn changed_lines(&self) -> usize {
58        self.hunks
59            .iter()
60            .map(|h| h.removed.len() + h.added.len())
61            .sum()
62    }
63}
64
65impl Default for HistogramDiff {
66    fn default() -> Self {
67        Self::new()
68    }
69}
70
71/// Build a frequency histogram of the given lines.
72pub fn build_histogram<'a>(lines: &[&'a str]) -> HashMap<&'a str, usize> {
73    let mut map: HashMap<&str, usize> = HashMap::new();
74    for &l in lines {
75        *map.entry(l).or_insert(0) += 1;
76    }
77    map
78}
79
80/// Run histogram diff on two line slices.
81pub fn histogram_diff(old: &[&str], new: &[&str], cfg: &HistogramDiffConfig) -> HistogramDiff {
82    let mut result = HistogramDiff::new();
83    let old_hist = build_histogram(old);
84    let new_hist = build_histogram(new);
85
86    let mut oi = 0usize;
87    let mut ni = 0usize;
88
89    while oi < old.len() || ni < new.len() {
90        let o = old.get(oi).copied();
91        let n = new.get(ni).copied();
92        if o == n {
93            oi += 1;
94            ni += 1;
95            continue;
96        }
97        /* Check anchor eligibility by frequency */
98        let o_eligible = o
99            .map(|l| old_hist.get(l).copied().unwrap_or(0) <= cfg.max_anchor_freq)
100            .unwrap_or(false);
101        let n_eligible = n
102            .map(|l| new_hist.get(l).copied().unwrap_or(0) <= cfg.max_anchor_freq)
103            .unwrap_or(false);
104
105        let mut hunk = HistogramHunk {
106            old_range: (oi, oi),
107            new_range: (ni, ni),
108            removed: Vec::new(),
109            added: Vec::new(),
110        };
111
112        if o_eligible || o.is_none() {
113            if let Some(ol) = o {
114                hunk.removed.push(ol.to_string());
115                oi += 1;
116            }
117        } else if let Some(ol) = o {
118            hunk.removed.push(ol.to_string());
119            oi += 1;
120        }
121        if n_eligible || n.is_none() {
122            if let Some(nl) = n {
123                hunk.added.push(nl.to_string());
124                ni += 1;
125            }
126        } else if let Some(nl) = n {
127            hunk.added.push(nl.to_string());
128            ni += 1;
129        }
130
131        hunk.old_range.1 = oi;
132        hunk.new_range.1 = ni;
133        result.hunks.push(hunk);
134    }
135    result
136}
137
138/// Format histogram diff as a string.
139pub fn histogram_diff_to_string(diff: &HistogramDiff) -> String {
140    let mut out = String::new();
141    for h in &diff.hunks {
142        out.push_str(&format!(
143            "@@ -{},{} +{},{} @@\n",
144            h.old_range.0,
145            h.old_range.1.saturating_sub(h.old_range.0),
146            h.new_range.0,
147            h.new_range.1.saturating_sub(h.new_range.0),
148        ));
149        for l in &h.removed {
150            out.push('-');
151            out.push_str(l);
152            out.push('\n');
153        }
154        for l in &h.added {
155            out.push('+');
156            out.push_str(l);
157            out.push('\n');
158        }
159    }
160    out
161}
162
163#[cfg(test)]
164mod tests {
165    use super::*;
166
167    fn default_cfg() -> HistogramDiffConfig {
168        HistogramDiffConfig::default()
169    }
170
171    #[test]
172    fn test_identical_empty_hunks() {
173        let lines = ["a", "b"];
174        let d = histogram_diff(&lines, &lines, &default_cfg());
175        assert!(d.is_identical());
176    }
177
178    #[test]
179    fn test_single_change() {
180        let old = ["a", "b"];
181        let new = ["a", "c"];
182        let d = histogram_diff(&old, &new, &default_cfg());
183        assert!(!d.is_identical());
184    }
185
186    #[test]
187    fn test_hunk_count_one() {
188        let old = ["x"];
189        let new = ["y"];
190        let d = histogram_diff(&old, &new, &default_cfg());
191        assert_eq!(d.hunk_count(), 1);
192    }
193
194    #[test]
195    fn test_changed_lines_nonzero() {
196        let old = ["p"];
197        let new = ["q"];
198        let d = histogram_diff(&old, &new, &default_cfg());
199        assert!(d.changed_lines() > 0);
200    }
201
202    #[test]
203    fn test_build_histogram_counts() {
204        let lines = ["a", "b", "a"];
205        let h = build_histogram(&lines);
206        assert_eq!(h[&"a"], 2);
207        assert_eq!(h[&"b"], 1);
208    }
209
210    #[test]
211    fn test_all_added() {
212        let old: &[&str] = &[];
213        let new = ["x", "y"];
214        let d = histogram_diff(old, &new, &default_cfg());
215        assert!(d.changed_lines() > 0);
216    }
217
218    #[test]
219    fn test_to_string_has_at() {
220        let old = ["a"];
221        let new = ["b"];
222        let d = histogram_diff(&old, &new, &default_cfg());
223        let s = histogram_diff_to_string(&d);
224        assert!(s.contains("@@"));
225    }
226
227    #[test]
228    fn test_default_config() {
229        let cfg = HistogramDiffConfig::default();
230        assert_eq!(cfg.context_lines, 3);
231    }
232
233    #[test]
234    fn test_default_histogram_diff() {
235        let d = HistogramDiff::default();
236        assert!(d.is_identical());
237    }
238}