oxihuman_core/
histogram_diff.rs1#![allow(dead_code)]
4
5use std::collections::HashMap;
11
12#[derive(Debug, Clone)]
14pub struct HistogramDiffConfig {
15 pub max_anchor_freq: usize,
17 pub context_lines: usize,
18}
19
20impl Default for HistogramDiffConfig {
21 fn default() -> Self {
22 Self {
23 max_anchor_freq: 64,
24 context_lines: 3,
25 }
26 }
27}
28
29#[derive(Debug, Clone)]
31pub struct HistogramHunk {
32 pub old_range: (usize, usize),
33 pub new_range: (usize, usize),
34 pub removed: Vec<String>,
35 pub added: Vec<String>,
36}
37
38#[derive(Debug, Clone)]
40pub struct HistogramDiff {
41 pub hunks: Vec<HistogramHunk>,
42}
43
44impl HistogramDiff {
45 pub fn new() -> Self {
46 Self { hunks: Vec::new() }
47 }
48
49 pub fn hunk_count(&self) -> usize {
50 self.hunks.len()
51 }
52
53 pub fn is_identical(&self) -> bool {
54 self.hunks.is_empty()
55 }
56
57 pub fn changed_lines(&self) -> usize {
58 self.hunks
59 .iter()
60 .map(|h| h.removed.len() + h.added.len())
61 .sum()
62 }
63}
64
65impl Default for HistogramDiff {
66 fn default() -> Self {
67 Self::new()
68 }
69}
70
71pub fn build_histogram<'a>(lines: &[&'a str]) -> HashMap<&'a str, usize> {
73 let mut map: HashMap<&str, usize> = HashMap::new();
74 for &l in lines {
75 *map.entry(l).or_insert(0) += 1;
76 }
77 map
78}
79
80pub fn histogram_diff(old: &[&str], new: &[&str], cfg: &HistogramDiffConfig) -> HistogramDiff {
82 let mut result = HistogramDiff::new();
83 let old_hist = build_histogram(old);
84 let new_hist = build_histogram(new);
85
86 let mut oi = 0usize;
87 let mut ni = 0usize;
88
89 while oi < old.len() || ni < new.len() {
90 let o = old.get(oi).copied();
91 let n = new.get(ni).copied();
92 if o == n {
93 oi += 1;
94 ni += 1;
95 continue;
96 }
97 let o_eligible = o
99 .map(|l| old_hist.get(l).copied().unwrap_or(0) <= cfg.max_anchor_freq)
100 .unwrap_or(false);
101 let n_eligible = n
102 .map(|l| new_hist.get(l).copied().unwrap_or(0) <= cfg.max_anchor_freq)
103 .unwrap_or(false);
104
105 let mut hunk = HistogramHunk {
106 old_range: (oi, oi),
107 new_range: (ni, ni),
108 removed: Vec::new(),
109 added: Vec::new(),
110 };
111
112 if o_eligible || o.is_none() {
113 if let Some(ol) = o {
114 hunk.removed.push(ol.to_string());
115 oi += 1;
116 }
117 } else if let Some(ol) = o {
118 hunk.removed.push(ol.to_string());
119 oi += 1;
120 }
121 if n_eligible || n.is_none() {
122 if let Some(nl) = n {
123 hunk.added.push(nl.to_string());
124 ni += 1;
125 }
126 } else if let Some(nl) = n {
127 hunk.added.push(nl.to_string());
128 ni += 1;
129 }
130
131 hunk.old_range.1 = oi;
132 hunk.new_range.1 = ni;
133 result.hunks.push(hunk);
134 }
135 result
136}
137
138pub fn histogram_diff_to_string(diff: &HistogramDiff) -> String {
140 let mut out = String::new();
141 for h in &diff.hunks {
142 out.push_str(&format!(
143 "@@ -{},{} +{},{} @@\n",
144 h.old_range.0,
145 h.old_range.1.saturating_sub(h.old_range.0),
146 h.new_range.0,
147 h.new_range.1.saturating_sub(h.new_range.0),
148 ));
149 for l in &h.removed {
150 out.push('-');
151 out.push_str(l);
152 out.push('\n');
153 }
154 for l in &h.added {
155 out.push('+');
156 out.push_str(l);
157 out.push('\n');
158 }
159 }
160 out
161}
162
163#[cfg(test)]
164mod tests {
165 use super::*;
166
167 fn default_cfg() -> HistogramDiffConfig {
168 HistogramDiffConfig::default()
169 }
170
171 #[test]
172 fn test_identical_empty_hunks() {
173 let lines = ["a", "b"];
174 let d = histogram_diff(&lines, &lines, &default_cfg());
175 assert!(d.is_identical());
176 }
177
178 #[test]
179 fn test_single_change() {
180 let old = ["a", "b"];
181 let new = ["a", "c"];
182 let d = histogram_diff(&old, &new, &default_cfg());
183 assert!(!d.is_identical());
184 }
185
186 #[test]
187 fn test_hunk_count_one() {
188 let old = ["x"];
189 let new = ["y"];
190 let d = histogram_diff(&old, &new, &default_cfg());
191 assert_eq!(d.hunk_count(), 1);
192 }
193
194 #[test]
195 fn test_changed_lines_nonzero() {
196 let old = ["p"];
197 let new = ["q"];
198 let d = histogram_diff(&old, &new, &default_cfg());
199 assert!(d.changed_lines() > 0);
200 }
201
202 #[test]
203 fn test_build_histogram_counts() {
204 let lines = ["a", "b", "a"];
205 let h = build_histogram(&lines);
206 assert_eq!(h[&"a"], 2);
207 assert_eq!(h[&"b"], 1);
208 }
209
210 #[test]
211 fn test_all_added() {
212 let old: &[&str] = &[];
213 let new = ["x", "y"];
214 let d = histogram_diff(old, &new, &default_cfg());
215 assert!(d.changed_lines() > 0);
216 }
217
218 #[test]
219 fn test_to_string_has_at() {
220 let old = ["a"];
221 let new = ["b"];
222 let d = histogram_diff(&old, &new, &default_cfg());
223 let s = histogram_diff_to_string(&d);
224 assert!(s.contains("@@"));
225 }
226
227 #[test]
228 fn test_default_config() {
229 let cfg = HistogramDiffConfig::default();
230 assert_eq!(cfg.context_lines, 3);
231 }
232
233 #[test]
234 fn test_default_histogram_diff() {
235 let d = HistogramDiff::default();
236 assert!(d.is_identical());
237 }
238}