layout_audit/
diff.rs

1use crate::types::StructLayout;
2use serde::Serialize;
3use std::collections::{BTreeMap, BTreeSet};
4
5/// Penalty for matching structs with mismatched source locations.
6/// Large enough to dominate all other scoring factors, preventing cross-location matching.
7const LOCATION_MISMATCH_PENALTY: i64 = i64::MIN / 4;
8
9// Similarity scoring weights for member matching.
10// Higher values = stronger signal for matching same-name duplicates.
11const SCORE_TYPE_MATCH: i64 = 5; // Type name matches
12const SCORE_SIZE_MATCH: i64 = 2; // Size matches
13const SCORE_OFFSET_MATCH: i64 = 1; // Offset matches
14const SCORE_MEMBER_OVERLAP: i64 = 10; // Per overlapping member name
15
16#[derive(Debug, Clone, Serialize)]
17pub struct DiffResult {
18    pub added: Vec<StructSummary>,
19    pub removed: Vec<StructSummary>,
20    pub changed: Vec<StructChange>,
21    pub unchanged_count: usize,
22}
23
24#[derive(Debug, Clone, Serialize)]
25pub struct StructSummary {
26    pub name: String,
27    pub size: u64,
28    pub padding_bytes: u64,
29}
30
31#[derive(Debug, Clone, Serialize)]
32pub struct StructChange {
33    pub name: String,
34    pub old_size: u64,
35    pub new_size: u64,
36    pub size_delta: i64,
37    pub old_padding: u64,
38    pub new_padding: u64,
39    pub padding_delta: i64,
40    pub member_changes: Vec<MemberChange>,
41}
42
43#[derive(Debug, Clone, Serialize)]
44pub struct MemberChange {
45    pub kind: MemberChangeKind,
46    pub name: String,
47    pub details: String,
48}
49
50#[derive(Debug, Clone, Serialize, PartialEq)]
51pub enum MemberChangeKind {
52    Added,
53    Removed,
54    OffsetChanged,
55    SizeChanged,
56    TypeChanged,
57}
58
59impl DiffResult {
60    pub fn has_changes(&self) -> bool {
61        !self.added.is_empty() || !self.removed.is_empty() || !self.changed.is_empty()
62    }
63
64    pub fn has_regressions(&self) -> bool {
65        self.changed.iter().any(|c| c.size_delta > 0 || c.padding_delta > 0)
66    }
67}
68
69pub fn diff_layouts(old: &[StructLayout], new: &[StructLayout]) -> DiffResult {
70    let mut added = Vec::new();
71    let mut removed = Vec::new();
72    let mut changed = Vec::new();
73    let mut unchanged_count = 0;
74
75    // Group by display name; allow duplicates and match them deterministically.
76    // IMPORTANT: Use BTreeMap (not HashMap) for deterministic iteration order.
77    // This ensures stable JSON output across runs. See AUDIT_FINDINGS.md Finding #1.
78    let mut old_by_name: BTreeMap<String, Vec<&StructLayout>> = BTreeMap::new();
79    let mut new_by_name: BTreeMap<String, Vec<&StructLayout>> = BTreeMap::new();
80
81    for s in old {
82        old_by_name.entry(s.name.clone()).or_default().push(s);
83    }
84    for s in new {
85        new_by_name.entry(s.name.clone()).or_default().push(s);
86    }
87
88    // Iterate names in a deterministic order using BTreeSet for uniqueness.
89    let all_names: BTreeSet<&str> =
90        old_by_name.keys().chain(new_by_name.keys()).map(String::as_str).collect();
91
92    for name in all_names {
93        let name = name.to_string(); // Convert to owned for lookups and output
94        let old_group = old_by_name.get(&name).map(Vec::as_slice).unwrap_or(&[]);
95        let new_group = new_by_name.get(&name).map(Vec::as_slice).unwrap_or(&[]);
96
97        if old_group.is_empty() {
98            for s in new_group {
99                added.push(StructSummary {
100                    name: name.clone(),
101                    size: s.size,
102                    padding_bytes: s.metrics.padding_bytes,
103                });
104            }
105            continue;
106        }
107
108        if new_group.is_empty() {
109            for s in old_group {
110                removed.push(StructSummary {
111                    name: name.clone(),
112                    size: s.size,
113                    padding_bytes: s.metrics.padding_bytes,
114                });
115            }
116            continue;
117        }
118
119        let (pairs, old_unmatched, new_unmatched) = match_structs(old_group, new_group);
120
121        for (old_s, new_s) in pairs {
122            if let Some(change) = diff_struct(old_s, new_s) {
123                changed.push(change);
124            } else {
125                unchanged_count += 1;
126            }
127        }
128
129        for s in old_unmatched {
130            removed.push(StructSummary {
131                name: name.clone(),
132                size: s.size,
133                padding_bytes: s.metrics.padding_bytes,
134            });
135        }
136
137        for s in new_unmatched {
138            added.push(StructSummary {
139                name: name.clone(),
140                size: s.size,
141                padding_bytes: s.metrics.padding_bytes,
142            });
143        }
144    }
145
146    added.sort_by(|a, b| a.name.cmp(&b.name).then_with(|| a.size.cmp(&b.size)));
147    removed.sort_by(|a, b| a.name.cmp(&b.name).then_with(|| a.size.cmp(&b.size)));
148    changed.sort_by(|a, b| {
149        a.name
150            .cmp(&b.name)
151            .then_with(|| a.old_size.cmp(&b.old_size))
152            .then_with(|| a.new_size.cmp(&b.new_size))
153    });
154
155    DiffResult { added, removed, changed, unchanged_count }
156}
157
158fn location_key(s: &StructLayout) -> Option<(&str, u64)> {
159    s.source_location.as_ref().map(|loc| (loc.file.as_str(), loc.line))
160}
161
162fn member_similarity_score(old: &StructLayout, new: &StructLayout) -> i64 {
163    // If both have source locations and they disagree, heavily penalize to avoid cross-matching.
164    if let (Some(ol), Some(nl)) = (location_key(old), location_key(new)) {
165        if ol != nl {
166            return LOCATION_MISMATCH_PENALTY;
167        }
168    }
169
170    let mut score: i64 = 0;
171
172    // Prefer similar overall sizes/padding.
173    // Use abs_diff() to avoid overflow when converting large u64 to i64.
174    let size_delta = old.size.abs_diff(new.size).min(i64::MAX as u64) as i64;
175    score = score.saturating_sub(size_delta);
176
177    let pad_delta =
178        old.metrics.padding_bytes.abs_diff(new.metrics.padding_bytes).min(i64::MAX as u64) as i64;
179    score = score.saturating_sub(pad_delta / 2);
180
181    // Member-name overlap drives matching for same-name duplicates.
182    // Use BTreeMap for deterministic iteration (consistent with rest of module).
183    let old_members: BTreeMap<&str, _> = old.members.iter().map(|m| (m.name.as_str(), m)).collect();
184    let new_members: BTreeMap<&str, _> = new.members.iter().map(|m| (m.name.as_str(), m)).collect();
185
186    let mut intersection: i64 = 0;
187    for (name, om) in &old_members {
188        if let Some(nm) = new_members.get(name) {
189            intersection += 1;
190            if om.type_name == nm.type_name {
191                score += SCORE_TYPE_MATCH;
192            }
193            if om.size == nm.size {
194                score += SCORE_SIZE_MATCH;
195            }
196            if om.offset == nm.offset {
197                score += SCORE_OFFSET_MATCH;
198            }
199        }
200    }
201
202    score += intersection * SCORE_MEMBER_OVERLAP;
203
204    // Light preference for similar member count.
205    // Use abs_diff to avoid overflow for large member counts.
206    let count_delta = old.members.len().abs_diff(new.members.len()).min(i64::MAX as usize) as i64;
207    score -= count_delta;
208
209    score
210}
211
212fn match_structs<'a>(
213    old_group: &[&'a StructLayout],
214    new_group: &[&'a StructLayout],
215) -> (Vec<(&'a StructLayout, &'a StructLayout)>, Vec<&'a StructLayout>, Vec<&'a StructLayout>) {
216    let mut pairs: Vec<(&StructLayout, &StructLayout)> = Vec::new();
217    let mut old_used = vec![false; old_group.len()];
218    let mut new_used = vec![false; new_group.len()];
219
220    // 1) Exact match by (file,line) when available.
221    let mut new_by_loc: BTreeMap<(&str, u64), Vec<usize>> = BTreeMap::new();
222    for (j, s) in new_group.iter().enumerate() {
223        if let Some(loc) = location_key(s) {
224            new_by_loc.entry(loc).or_default().push(j);
225        }
226    }
227
228    for (i, s) in old_group.iter().enumerate() {
229        let Some(loc) = location_key(s) else { continue };
230        let Some(candidates) = new_by_loc.get(&loc) else { continue };
231
232        if let Some(&j) = candidates.iter().find(|&&j| !new_used[j]) {
233            old_used[i] = true;
234            new_used[j] = true;
235            pairs.push((*s, new_group[j]));
236        }
237    }
238
239    // 2) Deterministic greedy matching by similarity score.
240    // First check if exactly one unpaired remains on each side - if so, pair them directly
241    // to preserve baseline semantics for non-duplicate names (avoids O(n*m) similarity calc).
242    let remaining_old: Vec<usize> =
243        old_used.iter().enumerate().filter_map(|(i, used)| (!*used).then_some(i)).collect();
244    let remaining_new: Vec<usize> =
245        new_used.iter().enumerate().filter_map(|(j, used)| (!*used).then_some(j)).collect();
246
247    if remaining_old.len() == 1 && remaining_new.len() == 1 {
248        let i = remaining_old[0];
249        let j = remaining_new[0];
250        old_used[i] = true;
251        new_used[j] = true;
252        pairs.push((old_group[i], new_group[j]));
253    } else if !remaining_old.is_empty() && !remaining_new.is_empty() {
254        // Build similarity scores only when needed.
255        let mut scored: Vec<(i64, usize, usize)> = Vec::new();
256        for &i in &remaining_old {
257            for &j in &remaining_new {
258                let score = member_similarity_score(old_group[i], new_group[j]);
259                scored.push((score, i, j));
260            }
261        }
262
263        scored
264            .sort_by(|a, b| b.0.cmp(&a.0).then_with(|| a.1.cmp(&b.1)).then_with(|| a.2.cmp(&b.2)));
265
266        // Require a positive score to avoid pairing completely unrelated duplicates.
267        for (score, i, j) in scored {
268            if score <= 0 {
269                break;
270            }
271            if old_used[i] || new_used[j] {
272                continue;
273            }
274            old_used[i] = true;
275            new_used[j] = true;
276            pairs.push((old_group[i], new_group[j]));
277        }
278    }
279
280    // Deterministic ordering of produced pairs.
281    pairs.sort_by(|(a_old, a_new), (b_old, b_new)| {
282        location_key(a_old)
283            .cmp(&location_key(b_old))
284            .then_with(|| a_old.size.cmp(&b_old.size))
285            .then_with(|| a_new.size.cmp(&b_new.size))
286    });
287
288    let old_unmatched: Vec<&StructLayout> =
289        old_group.iter().enumerate().filter_map(|(i, s)| (!old_used[i]).then_some(*s)).collect();
290    let new_unmatched: Vec<&StructLayout> =
291        new_group.iter().enumerate().filter_map(|(j, s)| (!new_used[j]).then_some(*s)).collect();
292
293    (pairs, old_unmatched, new_unmatched)
294}
295
296fn kind_rank(kind: &MemberChangeKind) -> u8 {
297    match kind {
298        MemberChangeKind::Removed => 0,
299        MemberChangeKind::Added => 1,
300        MemberChangeKind::TypeChanged => 2,
301        MemberChangeKind::SizeChanged => 3,
302        MemberChangeKind::OffsetChanged => 4,
303    }
304}
305
306fn diff_struct(old: &StructLayout, new: &StructLayout) -> Option<StructChange> {
307    // Use saturating signed subtraction to handle large u64 values safely.
308    let size_delta =
309        (new.size as i128 - old.size as i128).clamp(i64::MIN as i128, i64::MAX as i128) as i64;
310    let padding_delta = (new.metrics.padding_bytes as i128 - old.metrics.padding_bytes as i128)
311        .clamp(i64::MIN as i128, i64::MAX as i128) as i64;
312
313    let mut member_changes = Vec::new();
314
315    // Use BTreeMap for deterministic iteration order.
316    let old_members: BTreeMap<&str, _> = old.members.iter().map(|m| (m.name.as_str(), m)).collect();
317    let new_members: BTreeMap<&str, _> = new.members.iter().map(|m| (m.name.as_str(), m)).collect();
318
319    for (name, old_member) in &old_members {
320        if !new_members.contains_key(name) {
321            member_changes.push(MemberChange {
322                kind: MemberChangeKind::Removed,
323                name: name.to_string(),
324                details: format!("offset {:?}, size {:?}", old_member.offset, old_member.size),
325            });
326        }
327    }
328
329    for (name, new_member) in &new_members {
330        match old_members.get(name) {
331            None => {
332                member_changes.push(MemberChange {
333                    kind: MemberChangeKind::Added,
334                    name: name.to_string(),
335                    details: format!("offset {:?}, size {:?}", new_member.offset, new_member.size),
336                });
337            }
338            Some(old_member) => {
339                if old_member.offset != new_member.offset {
340                    member_changes.push(MemberChange {
341                        kind: MemberChangeKind::OffsetChanged,
342                        name: name.to_string(),
343                        details: format!("{:?} -> {:?}", old_member.offset, new_member.offset),
344                    });
345                }
346                if old_member.size != new_member.size {
347                    member_changes.push(MemberChange {
348                        kind: MemberChangeKind::SizeChanged,
349                        name: name.to_string(),
350                        details: format!("{:?} -> {:?}", old_member.size, new_member.size),
351                    });
352                }
353                if old_member.type_name != new_member.type_name {
354                    member_changes.push(MemberChange {
355                        kind: MemberChangeKind::TypeChanged,
356                        name: name.to_string(),
357                        details: format!("{} -> {}", old_member.type_name, new_member.type_name),
358                    });
359                }
360            }
361        }
362    }
363
364    member_changes.sort_by(|a, b| {
365        kind_rank(&a.kind)
366            .cmp(&kind_rank(&b.kind))
367            .then_with(|| a.name.cmp(&b.name))
368            .then_with(|| a.details.cmp(&b.details))
369    });
370
371    if size_delta == 0 && padding_delta == 0 && member_changes.is_empty() {
372        return None;
373    }
374
375    Some(StructChange {
376        name: old.name.clone(),
377        old_size: old.size,
378        new_size: new.size,
379        size_delta,
380        old_padding: old.metrics.padding_bytes,
381        new_padding: new.metrics.padding_bytes,
382        padding_delta,
383        member_changes,
384    })
385}
386
387#[cfg(test)]
388mod tests {
389    use super::*;
390    use crate::types::{LayoutMetrics, MemberLayout, StructLayout};
391
392    fn layout(
393        name: &str,
394        size: u64,
395        padding_bytes: u64,
396        members: Vec<MemberLayout>,
397    ) -> StructLayout {
398        let mut s = StructLayout::new(name.to_string(), size, Some(8));
399        s.members = members;
400        s.metrics = LayoutMetrics { padding_bytes, total_size: size, ..LayoutMetrics::default() };
401        s
402    }
403
404    #[test]
405    fn member_changes_are_sorted_deterministically() {
406        let mut old = layout(
407            "X",
408            16,
409            0,
410            vec![
411                MemberLayout::new("a".to_string(), "u32".to_string(), Some(0), Some(4)),
412                MemberLayout::new("b".to_string(), "u32".to_string(), Some(4), Some(4)),
413            ],
414        );
415        let mut new = layout(
416            "X",
417            20,
418            4,
419            vec![
420                MemberLayout::new("a".to_string(), "u64".to_string(), Some(0), Some(8)), // type/size change
421                MemberLayout::new("c".to_string(), "u32".to_string(), Some(8), Some(4)), // added
422            ],
423        );
424
425        // Ensure metrics exist to make diff meaningful.
426        old.metrics.padding_bytes = 0;
427        new.metrics.padding_bytes = 4;
428
429        let diff = diff_layouts(&[old], &[new]);
430        assert_eq!(diff.changed.len(), 1);
431        let changes = &diff.changed[0].member_changes;
432
433        // Expected ordering: Removed, Added, TypeChanged, SizeChanged, OffsetChanged (then name)
434        let kinds: Vec<_> = changes.iter().map(|c| &c.kind).collect();
435        assert!(kinds.windows(2).all(|w| kind_rank(w[0]) <= kind_rank(w[1])));
436    }
437}