layout_audit/
diff.rs

1use crate::types::{SourceLocation, StructLayout};
2use serde::Serialize;
3use std::collections::{BTreeMap, BTreeSet};
4
5/// Penalty for matching structs with mismatched source locations.
6/// Large enough to dominate all other scoring factors, preventing cross-location matching.
7const LOCATION_MISMATCH_PENALTY: i64 = i64::MIN / 4;
8
9// Similarity scoring weights for member matching.
10// Higher values = stronger signal for matching same-name duplicates.
11const SCORE_TYPE_MATCH: i64 = 5; // Type name matches
12const SCORE_SIZE_MATCH: i64 = 2; // Size matches
13const SCORE_OFFSET_MATCH: i64 = 1; // Offset matches
14const SCORE_MEMBER_OVERLAP: i64 = 10; // Per overlapping member name
15
16#[derive(Debug, Clone, Serialize)]
17pub struct DiffResult {
18    pub added: Vec<StructSummary>,
19    pub removed: Vec<StructSummary>,
20    pub changed: Vec<StructChange>,
21    pub unchanged_count: usize,
22}
23
24#[derive(Debug, Clone, Serialize)]
25pub struct StructSummary {
26    pub name: String,
27    pub size: u64,
28    pub padding_bytes: u64,
29    #[serde(skip_serializing_if = "Option::is_none")]
30    pub source_location: Option<SourceLocation>,
31}
32
33#[derive(Debug, Clone, Serialize)]
34pub struct StructChange {
35    pub name: String,
36    pub old_size: u64,
37    pub new_size: u64,
38    pub size_delta: i64,
39    pub old_padding: u64,
40    pub new_padding: u64,
41    pub padding_delta: i64,
42    pub member_changes: Vec<MemberChange>,
43    #[serde(skip_serializing_if = "Option::is_none")]
44    pub source_location: Option<SourceLocation>,
45    #[serde(skip_serializing_if = "Option::is_none")]
46    pub old_source_location: Option<SourceLocation>,
47}
48
49#[derive(Debug, Clone, Serialize)]
50pub struct MemberChange {
51    pub kind: MemberChangeKind,
52    pub name: String,
53    pub details: String,
54}
55
56#[derive(Debug, Clone, Serialize, PartialEq)]
57pub enum MemberChangeKind {
58    Added,
59    Removed,
60    OffsetChanged,
61    SizeChanged,
62    TypeChanged,
63}
64
65impl DiffResult {
66    pub fn has_changes(&self) -> bool {
67        !self.added.is_empty() || !self.removed.is_empty() || !self.changed.is_empty()
68    }
69
70    pub fn has_regressions(&self) -> bool {
71        self.changed.iter().any(|c| c.size_delta > 0 || c.padding_delta > 0)
72    }
73}
74
75pub fn diff_layouts(old: &[StructLayout], new: &[StructLayout]) -> DiffResult {
76    let mut added = Vec::new();
77    let mut removed = Vec::new();
78    let mut changed = Vec::new();
79    let mut unchanged_count = 0;
80
81    // Group by display name; allow duplicates and match them deterministically.
82    // IMPORTANT: Use BTreeMap (not HashMap) for deterministic iteration order.
83    // This ensures stable JSON output across runs. See AUDIT_FINDINGS.md Finding #1.
84    let mut old_by_name: BTreeMap<String, Vec<&StructLayout>> = BTreeMap::new();
85    let mut new_by_name: BTreeMap<String, Vec<&StructLayout>> = BTreeMap::new();
86
87    for s in old {
88        old_by_name.entry(s.name.clone()).or_default().push(s);
89    }
90    for s in new {
91        new_by_name.entry(s.name.clone()).or_default().push(s);
92    }
93
94    // Iterate names in a deterministic order using BTreeSet for uniqueness.
95    let all_names: BTreeSet<&str> =
96        old_by_name.keys().chain(new_by_name.keys()).map(String::as_str).collect();
97
98    for name in all_names {
99        let name = name.to_string(); // Convert to owned for lookups and output
100        let old_group = old_by_name.get(&name).map(Vec::as_slice).unwrap_or(&[]);
101        let new_group = new_by_name.get(&name).map(Vec::as_slice).unwrap_or(&[]);
102
103        if old_group.is_empty() {
104            for s in new_group {
105                added.push(StructSummary {
106                    name: name.clone(),
107                    size: s.size,
108                    padding_bytes: s.metrics.padding_bytes,
109                    source_location: s.source_location.clone(),
110                });
111            }
112            continue;
113        }
114
115        if new_group.is_empty() {
116            for s in old_group {
117                removed.push(StructSummary {
118                    name: name.clone(),
119                    size: s.size,
120                    padding_bytes: s.metrics.padding_bytes,
121                    source_location: s.source_location.clone(),
122                });
123            }
124            continue;
125        }
126
127        let (pairs, old_unmatched, new_unmatched) = match_structs(old_group, new_group);
128
129        for (old_s, new_s) in pairs {
130            if let Some(change) = diff_struct(old_s, new_s) {
131                changed.push(change);
132            } else {
133                unchanged_count += 1;
134            }
135        }
136
137        for s in old_unmatched {
138            removed.push(StructSummary {
139                name: name.clone(),
140                size: s.size,
141                padding_bytes: s.metrics.padding_bytes,
142                source_location: s.source_location.clone(),
143            });
144        }
145
146        for s in new_unmatched {
147            added.push(StructSummary {
148                name: name.clone(),
149                size: s.size,
150                padding_bytes: s.metrics.padding_bytes,
151                source_location: s.source_location.clone(),
152            });
153        }
154    }
155
156    added.sort_by(|a, b| a.name.cmp(&b.name).then_with(|| a.size.cmp(&b.size)));
157    removed.sort_by(|a, b| a.name.cmp(&b.name).then_with(|| a.size.cmp(&b.size)));
158    changed.sort_by(|a, b| {
159        a.name
160            .cmp(&b.name)
161            .then_with(|| a.old_size.cmp(&b.old_size))
162            .then_with(|| a.new_size.cmp(&b.new_size))
163    });
164
165    DiffResult { added, removed, changed, unchanged_count }
166}
167
168fn location_key(s: &StructLayout) -> Option<(&str, u64)> {
169    s.source_location.as_ref().map(|loc| (loc.file.as_str(), loc.line))
170}
171
172fn member_similarity_score(old: &StructLayout, new: &StructLayout) -> i64 {
173    // If both have source locations and they disagree, heavily penalize to avoid cross-matching.
174    if let (Some(ol), Some(nl)) = (location_key(old), location_key(new)) {
175        if ol != nl {
176            return LOCATION_MISMATCH_PENALTY;
177        }
178    }
179
180    let mut score: i64 = 0;
181
182    // Prefer similar overall sizes/padding.
183    // Use abs_diff() to avoid overflow when converting large u64 to i64.
184    let size_delta = old.size.abs_diff(new.size).min(i64::MAX as u64) as i64;
185    score = score.saturating_sub(size_delta);
186
187    let pad_delta =
188        old.metrics.padding_bytes.abs_diff(new.metrics.padding_bytes).min(i64::MAX as u64) as i64;
189    score = score.saturating_sub(pad_delta / 2);
190
191    // Member-name overlap drives matching for same-name duplicates.
192    // Use BTreeMap for deterministic iteration (consistent with rest of module).
193    let old_members: BTreeMap<&str, _> = old.members.iter().map(|m| (m.name.as_str(), m)).collect();
194    let new_members: BTreeMap<&str, _> = new.members.iter().map(|m| (m.name.as_str(), m)).collect();
195
196    let mut intersection: i64 = 0;
197    for (name, om) in &old_members {
198        if let Some(nm) = new_members.get(name) {
199            intersection += 1;
200            if om.type_name == nm.type_name {
201                score += SCORE_TYPE_MATCH;
202            }
203            if om.size == nm.size {
204                score += SCORE_SIZE_MATCH;
205            }
206            if om.offset == nm.offset {
207                score += SCORE_OFFSET_MATCH;
208            }
209        }
210    }
211
212    score += intersection * SCORE_MEMBER_OVERLAP;
213
214    // Light preference for similar member count.
215    // Use abs_diff to avoid overflow for large member counts.
216    let count_delta = old.members.len().abs_diff(new.members.len()).min(i64::MAX as usize) as i64;
217    score -= count_delta;
218
219    score
220}
221
222fn match_structs<'a>(
223    old_group: &[&'a StructLayout],
224    new_group: &[&'a StructLayout],
225) -> (Vec<(&'a StructLayout, &'a StructLayout)>, Vec<&'a StructLayout>, Vec<&'a StructLayout>) {
226    let mut pairs: Vec<(&StructLayout, &StructLayout)> = Vec::new();
227    let mut old_used = vec![false; old_group.len()];
228    let mut new_used = vec![false; new_group.len()];
229
230    // 1) Exact match by (file,line) when available.
231    let mut new_by_loc: BTreeMap<(&str, u64), Vec<usize>> = BTreeMap::new();
232    for (j, s) in new_group.iter().enumerate() {
233        if let Some(loc) = location_key(s) {
234            new_by_loc.entry(loc).or_default().push(j);
235        }
236    }
237
238    for (i, s) in old_group.iter().enumerate() {
239        let Some(loc) = location_key(s) else { continue };
240        let Some(candidates) = new_by_loc.get(&loc) else { continue };
241
242        if let Some(&j) = candidates.iter().find(|&&j| !new_used[j]) {
243            old_used[i] = true;
244            new_used[j] = true;
245            pairs.push((*s, new_group[j]));
246        }
247    }
248
249    // 2) Deterministic greedy matching by similarity score.
250    // First check if exactly one unpaired remains on each side - if so, pair them directly
251    // to preserve baseline semantics for non-duplicate names (avoids O(n*m) similarity calc).
252    let remaining_old: Vec<usize> =
253        old_used.iter().enumerate().filter_map(|(i, used)| (!*used).then_some(i)).collect();
254    let remaining_new: Vec<usize> =
255        new_used.iter().enumerate().filter_map(|(j, used)| (!*used).then_some(j)).collect();
256
257    if remaining_old.len() == 1 && remaining_new.len() == 1 {
258        let i = remaining_old[0];
259        let j = remaining_new[0];
260        old_used[i] = true;
261        new_used[j] = true;
262        pairs.push((old_group[i], new_group[j]));
263    } else if !remaining_old.is_empty() && !remaining_new.is_empty() {
264        // Build similarity scores only when needed.
265        let mut scored: Vec<(i64, usize, usize)> = Vec::new();
266        for &i in &remaining_old {
267            for &j in &remaining_new {
268                let score = member_similarity_score(old_group[i], new_group[j]);
269                scored.push((score, i, j));
270            }
271        }
272
273        scored
274            .sort_by(|a, b| b.0.cmp(&a.0).then_with(|| a.1.cmp(&b.1)).then_with(|| a.2.cmp(&b.2)));
275
276        // Require a positive score to avoid pairing completely unrelated duplicates.
277        for (score, i, j) in scored {
278            if score <= 0 {
279                break;
280            }
281            if old_used[i] || new_used[j] {
282                continue;
283            }
284            old_used[i] = true;
285            new_used[j] = true;
286            pairs.push((old_group[i], new_group[j]));
287        }
288    }
289
290    // Deterministic ordering of produced pairs.
291    pairs.sort_by(|(a_old, a_new), (b_old, b_new)| {
292        location_key(a_old)
293            .cmp(&location_key(b_old))
294            .then_with(|| a_old.size.cmp(&b_old.size))
295            .then_with(|| a_new.size.cmp(&b_new.size))
296    });
297
298    let old_unmatched: Vec<&StructLayout> =
299        old_group.iter().enumerate().filter_map(|(i, s)| (!old_used[i]).then_some(*s)).collect();
300    let new_unmatched: Vec<&StructLayout> =
301        new_group.iter().enumerate().filter_map(|(j, s)| (!new_used[j]).then_some(*s)).collect();
302
303    (pairs, old_unmatched, new_unmatched)
304}
305
306fn kind_rank(kind: &MemberChangeKind) -> u8 {
307    match kind {
308        MemberChangeKind::Removed => 0,
309        MemberChangeKind::Added => 1,
310        MemberChangeKind::TypeChanged => 2,
311        MemberChangeKind::SizeChanged => 3,
312        MemberChangeKind::OffsetChanged => 4,
313    }
314}
315
316fn diff_struct(old: &StructLayout, new: &StructLayout) -> Option<StructChange> {
317    // Use saturating signed subtraction to handle large u64 values safely.
318    let size_delta =
319        (new.size as i128 - old.size as i128).clamp(i64::MIN as i128, i64::MAX as i128) as i64;
320    let padding_delta = (new.metrics.padding_bytes as i128 - old.metrics.padding_bytes as i128)
321        .clamp(i64::MIN as i128, i64::MAX as i128) as i64;
322
323    let mut member_changes = Vec::new();
324
325    // Use BTreeMap for deterministic iteration order.
326    let old_members: BTreeMap<&str, _> = old.members.iter().map(|m| (m.name.as_str(), m)).collect();
327    let new_members: BTreeMap<&str, _> = new.members.iter().map(|m| (m.name.as_str(), m)).collect();
328
329    for (name, old_member) in &old_members {
330        if !new_members.contains_key(name) {
331            member_changes.push(MemberChange {
332                kind: MemberChangeKind::Removed,
333                name: name.to_string(),
334                details: format!("offset {:?}, size {:?}", old_member.offset, old_member.size),
335            });
336        }
337    }
338
339    for (name, new_member) in &new_members {
340        match old_members.get(name) {
341            None => {
342                member_changes.push(MemberChange {
343                    kind: MemberChangeKind::Added,
344                    name: name.to_string(),
345                    details: format!("offset {:?}, size {:?}", new_member.offset, new_member.size),
346                });
347            }
348            Some(old_member) => {
349                if old_member.offset != new_member.offset {
350                    member_changes.push(MemberChange {
351                        kind: MemberChangeKind::OffsetChanged,
352                        name: name.to_string(),
353                        details: format!("{:?} -> {:?}", old_member.offset, new_member.offset),
354                    });
355                }
356                if old_member.size != new_member.size {
357                    member_changes.push(MemberChange {
358                        kind: MemberChangeKind::SizeChanged,
359                        name: name.to_string(),
360                        details: format!("{:?} -> {:?}", old_member.size, new_member.size),
361                    });
362                }
363                if old_member.type_name != new_member.type_name {
364                    member_changes.push(MemberChange {
365                        kind: MemberChangeKind::TypeChanged,
366                        name: name.to_string(),
367                        details: format!("{} -> {}", old_member.type_name, new_member.type_name),
368                    });
369                }
370            }
371        }
372    }
373
374    member_changes.sort_by(|a, b| {
375        kind_rank(&a.kind)
376            .cmp(&kind_rank(&b.kind))
377            .then_with(|| a.name.cmp(&b.name))
378            .then_with(|| a.details.cmp(&b.details))
379    });
380
381    if size_delta == 0 && padding_delta == 0 && member_changes.is_empty() {
382        return None;
383    }
384
385    Some(StructChange {
386        name: old.name.clone(),
387        old_size: old.size,
388        new_size: new.size,
389        size_delta,
390        old_padding: old.metrics.padding_bytes,
391        new_padding: new.metrics.padding_bytes,
392        padding_delta,
393        member_changes,
394        source_location: new.source_location.clone(),
395        old_source_location: old.source_location.clone(),
396    })
397}
398
399#[cfg(test)]
400mod tests {
401    use super::*;
402    use crate::types::{LayoutMetrics, MemberLayout, SourceLocation, StructLayout};
403
404    fn layout(
405        name: &str,
406        size: u64,
407        padding_bytes: u64,
408        members: Vec<MemberLayout>,
409    ) -> StructLayout {
410        let mut s = StructLayout::new(name.to_string(), size, Some(8));
411        s.members = members;
412        s.metrics = LayoutMetrics { padding_bytes, total_size: size, ..LayoutMetrics::default() };
413        s
414    }
415
416    fn layout_with_loc(name: &str, file: &str, line: u64) -> StructLayout {
417        let mut s = layout(name, 16, 0, Vec::new());
418        s.source_location = Some(SourceLocation { file: file.to_string(), line });
419        s
420    }
421
422    #[test]
423    fn member_changes_are_sorted_deterministically() {
424        let mut old = layout(
425            "X",
426            16,
427            0,
428            vec![
429                MemberLayout::new("a".to_string(), "u32".to_string(), Some(0), Some(4)),
430                MemberLayout::new("b".to_string(), "u32".to_string(), Some(4), Some(4)),
431            ],
432        );
433        let mut new = layout(
434            "X",
435            20,
436            4,
437            vec![
438                MemberLayout::new("a".to_string(), "u64".to_string(), Some(0), Some(8)), // type/size change
439                MemberLayout::new("c".to_string(), "u32".to_string(), Some(8), Some(4)), // added
440            ],
441        );
442
443        // Ensure metrics exist to make diff meaningful.
444        old.metrics.padding_bytes = 0;
445        new.metrics.padding_bytes = 4;
446
447        let diff = diff_layouts(&[old], &[new]);
448        assert_eq!(diff.changed.len(), 1);
449        let changes = &diff.changed[0].member_changes;
450
451        // Expected ordering: Removed, Added, TypeChanged, SizeChanged, OffsetChanged (then name)
452        let kinds: Vec<_> = changes.iter().map(|c| &c.kind).collect();
453        assert!(kinds.windows(2).all(|w| kind_rank(w[0]) <= kind_rank(w[1])));
454    }
455
456    #[test]
457    fn diff_added_and_removed() {
458        let old = layout("A", 8, 0, Vec::new());
459        let new = layout("B", 8, 0, Vec::new());
460        let diff = diff_layouts(&[old], &[new]);
461        assert_eq!(diff.added.len(), 1);
462        assert_eq!(diff.removed.len(), 1);
463    }
464
465    #[test]
466    fn diff_matches_by_location_for_duplicates() {
467        let old1 = layout_with_loc("Dup", "a.c", 1);
468        let old2 = layout_with_loc("Dup", "b.c", 2);
469        let new1 = layout_with_loc("Dup", "b.c", 2);
470        let new2 = layout_with_loc("Dup", "a.c", 1);
471        let diff = diff_layouts(&[old1, old2], &[new1, new2]);
472        assert_eq!(diff.changed.len(), 0);
473        assert_eq!(diff.unchanged_count, 2);
474    }
475
476    #[test]
477    fn diff_detects_member_offset_change() {
478        let old = layout(
479            "X",
480            8,
481            0,
482            vec![MemberLayout::new("a".to_string(), "u32".to_string(), Some(0), Some(4))],
483        );
484        let new = layout(
485            "X",
486            8,
487            0,
488            vec![MemberLayout::new("a".to_string(), "u32".to_string(), Some(4), Some(4))],
489        );
490        let diff = diff_layouts(&[old], &[new]);
491        assert_eq!(diff.changed.len(), 1);
492        assert!(
493            diff.changed[0]
494                .member_changes
495                .iter()
496                .any(|c| c.kind == MemberChangeKind::OffsetChanged)
497        );
498    }
499
500    #[test]
501    fn diff_reports_all_member_change_kinds() {
502        let old = layout(
503            "Y",
504            16,
505            0,
506            vec![
507                MemberLayout::new("a".to_string(), "u32".to_string(), Some(0), Some(4)),
508                MemberLayout::new("b".to_string(), "u16".to_string(), Some(4), Some(2)),
509            ],
510        );
511        let mut new = layout(
512            "Y",
513            20,
514            4,
515            vec![
516                MemberLayout::new("b".to_string(), "u32".to_string(), Some(8), Some(4)),
517                MemberLayout::new("c".to_string(), "u8".to_string(), Some(0), Some(1)),
518            ],
519        );
520        new.metrics.padding_bytes = 4;
521
522        let diff = diff_layouts(&[old], &[new]);
523        let changes = &diff.changed[0].member_changes;
524        let kinds: Vec<_> = changes.iter().map(|c| &c.kind).collect();
525        assert!(kinds.contains(&&MemberChangeKind::Added));
526        assert!(kinds.contains(&&MemberChangeKind::Removed));
527        assert!(kinds.contains(&&MemberChangeKind::TypeChanged));
528        assert!(kinds.contains(&&MemberChangeKind::SizeChanged));
529        assert!(kinds.contains(&&MemberChangeKind::OffsetChanged));
530        assert!(diff.has_changes());
531        assert!(diff.has_regressions());
532    }
533
534    #[test]
535    fn member_similarity_location_penalty() {
536        let mut a = layout_with_loc("T", "a.c", 1);
537        let mut b = layout_with_loc("T", "b.c", 2);
538        a.members.push(MemberLayout::new("x".to_string(), "u8".to_string(), Some(0), Some(1)));
539        b.members.push(MemberLayout::new("x".to_string(), "u8".to_string(), Some(0), Some(1)));
540        let score = member_similarity_score(&a, &b);
541        assert_eq!(score, LOCATION_MISMATCH_PENALTY);
542    }
543
544    #[test]
545    fn match_structs_pairs_by_similarity() {
546        let old1 = layout(
547            "Z",
548            8,
549            0,
550            vec![MemberLayout::new("a".to_string(), "u8".to_string(), Some(0), Some(1))],
551        );
552        let old2 = layout(
553            "Z",
554            16,
555            0,
556            vec![MemberLayout::new("b".to_string(), "u32".to_string(), Some(0), Some(4))],
557        );
558        let new1 = layout(
559            "Z",
560            16,
561            0,
562            vec![MemberLayout::new("b".to_string(), "u32".to_string(), Some(0), Some(4))],
563        );
564        let new2 = layout(
565            "Z",
566            8,
567            0,
568            vec![MemberLayout::new("a".to_string(), "u8".to_string(), Some(0), Some(1))],
569        );
570
571        let (pairs, old_unmatched, new_unmatched) = match_structs(&[&old1, &old2], &[&new1, &new2]);
572        assert_eq!(pairs.len(), 2);
573        assert!(old_unmatched.is_empty());
574        assert!(new_unmatched.is_empty());
575    }
576}