layout_audit/dwarf/
context.rs

1use crate::error::{Error, Result};
2use crate::loader::{DwarfSlice, LoadedDwarf};
3use crate::types::{MemberLayout, SourceLocation, StructLayout};
4use gimli::{AttributeValue, DebuggingInformationEntry, Dwarf, Unit};
5
6use super::TypeResolver;
7use super::expr::{evaluate_member_offset, try_simple_offset};
8use super::{debug_info_ref_to_unit_offset, read_u64_from_attr};
9
10/// Check if a type name is a Go runtime internal type that should be filtered.
11/// These are compiler/runtime-generated types not useful for layout analysis.
12pub fn is_go_internal_type(name: &str) -> bool {
13    // Go runtime and standard library internals
14    name.starts_with("runtime.")
15        || name.starts_with("runtime/")
16        || name.starts_with("internal/")
17        || name.starts_with("reflect.")
18        || name.starts_with("sync.")
19        || name.starts_with("sync/")
20        || name.starts_with("syscall.")
21        || name.starts_with("unsafe.")
22        // Go internal symbol separator (middle dot)
23        || name.contains('\u{00B7}')
24        // Runtime type descriptors
25        || name.starts_with("type:")
26        || name.starts_with("type..")
27        // Go map/channel internal types
28        || name.starts_with("hash<")
29        || name.starts_with("bucket<")
30        || name.starts_with("hmap")
31        || name.starts_with("hchan")
32        || name.starts_with("waitq")
33        || name.starts_with("sudog")
34        // Goroutine internals
35        || name == "g"
36        || name == "m"
37        || name == "p"
38        || name.starts_with("stack")
39}
40
41pub struct DwarfContext<'a> {
42    dwarf: &'a Dwarf<DwarfSlice<'a>>,
43    address_size: u8,
44    endian: gimli::RunTimeEndian,
45}
46
47impl<'a> DwarfContext<'a> {
48    pub fn new(loaded: &'a LoadedDwarf<'a>) -> Self {
49        Self { dwarf: &loaded.dwarf, address_size: loaded.address_size, endian: loaded.endian }
50    }
51
52    /// Find all structs in the binary.
53    ///
54    /// - `filter`: Optional substring filter for struct names
55    /// - `include_go_runtime`: If false, Go runtime internal types are filtered out
56    pub fn find_structs(
57        &self,
58        filter: Option<&str>,
59        include_go_runtime: bool,
60    ) -> Result<Vec<StructLayout>> {
61        let mut structs = Vec::new();
62        let mut units = self.dwarf.units();
63
64        while let Some(header) =
65            units.next().map_err(|e| Error::Dwarf(format!("Failed to read unit header: {}", e)))?
66        {
67            let unit = self
68                .dwarf
69                .unit(header)
70                .map_err(|e| Error::Dwarf(format!("Failed to parse unit: {}", e)))?;
71
72            self.process_unit(&unit, filter, include_go_runtime, &mut structs)?;
73        }
74
75        // DWARF can contain duplicate identical type entries (e.g., across units or due to
76        // language/compiler quirks). Deduplicate exact duplicates to avoid double-counting in
77        // `check` and unstable matching in `diff`.
78        // Use enumerated index as tiebreaker for stable deduplication (Rust's sort_by is unstable).
79        let mut with_fp: Vec<(StructFingerprint, usize, StructLayout)> =
80            structs.into_iter().enumerate().map(|(i, s)| (struct_fingerprint(&s), i, s)).collect();
81        with_fp.sort_by(|a, b| a.0.cmp(&b.0).then_with(|| a.1.cmp(&b.1)));
82        with_fp.dedup_by(|a, b| a.0 == b.0);
83
84        Ok(with_fp.into_iter().map(|(_, _, s)| s).collect())
85    }
86
87    fn process_unit(
88        &self,
89        unit: &Unit<DwarfSlice<'a>>,
90        filter: Option<&str>,
91        include_go_runtime: bool,
92        structs: &mut Vec<StructLayout>,
93    ) -> Result<()> {
94        let mut type_resolver = TypeResolver::new(self.dwarf, unit, self.address_size);
95        let mut entries = unit.entries();
96
97        while let Some((_, entry)) =
98            entries.next_dfs().map_err(|e| Error::Dwarf(format!("Failed to read DIE: {}", e)))?
99        {
100            if !matches!(entry.tag(), gimli::DW_TAG_structure_type | gimli::DW_TAG_class_type) {
101                continue;
102            }
103
104            if let Some(layout) = self.process_struct_entry(
105                unit,
106                entry,
107                filter,
108                include_go_runtime,
109                &mut type_resolver,
110            )? {
111                structs.push(layout);
112            }
113        }
114
115        Ok(())
116    }
117
118    fn process_struct_entry(
119        &self,
120        unit: &Unit<DwarfSlice<'a>>,
121        entry: &DebuggingInformationEntry<DwarfSlice<'a>>,
122        filter: Option<&str>,
123        include_go_runtime: bool,
124        type_resolver: &mut TypeResolver<'a, '_>,
125    ) -> Result<Option<StructLayout>> {
126        // Use consolidated helper for attribute extraction (see read_u64_from_attr).
127        let Some(size) =
128            read_u64_from_attr(entry.attr_value(gimli::DW_AT_byte_size).ok().flatten())
129        else {
130            return Ok(None); // Forward declaration or no size
131        };
132
133        let name = self.get_die_name(unit, entry)?;
134        let name = match name {
135            Some(n) if !n.starts_with("__") => n, // Skip compiler-generated
136            None => return Ok(None),              // Anonymous struct
137            _ => return Ok(None),
138        };
139
140        // Filter Go runtime internal types unless explicitly included
141        if !include_go_runtime && is_go_internal_type(&name) {
142            return Ok(None);
143        }
144
145        if filter.is_some_and(|f| !name.contains(f)) {
146            return Ok(None);
147        }
148
149        let alignment = read_u64_from_attr(entry.attr_value(gimli::DW_AT_alignment).ok().flatten());
150
151        let mut layout = StructLayout::new(name, size, alignment);
152        layout.source_location = self.get_source_location(unit, entry)?;
153        layout.members = self.extract_members(unit, entry, type_resolver)?;
154
155        Ok(Some(layout))
156    }
157
158    fn extract_members(
159        &self,
160        unit: &Unit<DwarfSlice<'a>>,
161        struct_entry: &DebuggingInformationEntry<DwarfSlice<'a>>,
162        type_resolver: &mut TypeResolver<'a, '_>,
163    ) -> Result<Vec<MemberLayout>> {
164        let mut members = Vec::new();
165        let mut tree = unit
166            .entries_tree(Some(struct_entry.offset()))
167            .map_err(|e| Error::Dwarf(format!("Failed to create entries tree: {}", e)))?;
168
169        let root =
170            tree.root().map_err(|e| Error::Dwarf(format!("Failed to get tree root: {}", e)))?;
171
172        let mut children = root.children();
173        while let Some(child) = children
174            .next()
175            .map_err(|e| Error::Dwarf(format!("Failed to iterate children: {}", e)))?
176        {
177            let entry = child.entry();
178            match entry.tag() {
179                gimli::DW_TAG_member => {
180                    if let Some(member) = self.process_member(unit, entry, type_resolver)? {
181                        members.push(member);
182                    }
183                }
184                gimli::DW_TAG_inheritance => {
185                    if let Some(member) = self.process_inheritance(unit, entry, type_resolver)? {
186                        members.push(member);
187                    }
188                }
189                _ => {}
190            }
191        }
192
193        members.sort_by_key(|m| m.offset.unwrap_or(u64::MAX));
194        Ok(members)
195    }
196
197    /// Resolve type information from a DW_AT_type attribute.
198    /// Returns (type_name, size, is_atomic) or a default for unknown types.
199    fn resolve_type_attr(
200        &self,
201        unit: &Unit<DwarfSlice<'a>>,
202        entry: &DebuggingInformationEntry<DwarfSlice<'a>>,
203        type_resolver: &mut TypeResolver<'a, '_>,
204    ) -> Result<(String, Option<u64>, bool)> {
205        match entry.attr_value(gimli::DW_AT_type) {
206            Ok(Some(AttributeValue::UnitRef(type_offset))) => {
207                type_resolver.resolve_type(type_offset)
208            }
209            Ok(Some(AttributeValue::DebugInfoRef(debug_info_offset))) => {
210                // Use shared helper for cross-unit reference conversion.
211                if let Some(unit_offset) =
212                    debug_info_ref_to_unit_offset(debug_info_offset, &unit.header)
213                {
214                    type_resolver.resolve_type(unit_offset)
215                } else {
216                    Ok(("unknown".to_string(), None, false))
217                }
218            }
219            _ => Ok(("unknown".to_string(), None, false)),
220        }
221    }
222
223    fn process_inheritance(
224        &self,
225        unit: &Unit<DwarfSlice<'a>>,
226        entry: &DebuggingInformationEntry<DwarfSlice<'a>>,
227        type_resolver: &mut TypeResolver<'a, '_>,
228    ) -> Result<Option<MemberLayout>> {
229        let offset = self.get_member_offset(unit, entry)?;
230        let (type_name, size, is_atomic) = self.resolve_type_attr(unit, entry, type_resolver)?;
231
232        let name = format!("<base: {}>", type_name);
233        Ok(Some(MemberLayout::new(name, type_name, offset, size).with_atomic(is_atomic)))
234    }
235
236    fn process_member(
237        &self,
238        unit: &Unit<DwarfSlice<'a>>,
239        entry: &DebuggingInformationEntry<DwarfSlice<'a>>,
240        type_resolver: &mut TypeResolver<'a, '_>,
241    ) -> Result<Option<MemberLayout>> {
242        let name = self.get_die_name(unit, entry)?.unwrap_or_else(|| "<anonymous>".to_string());
243        let (type_name, size, is_atomic) = self.resolve_type_attr(unit, entry, type_resolver)?;
244
245        let offset = self.get_member_offset(unit, entry)?;
246
247        let mut member = MemberLayout::new(name, type_name, offset, size).with_atomic(is_atomic);
248
249        let bit_size = read_u64_from_attr(entry.attr_value(gimli::DW_AT_bit_size).ok().flatten());
250        let dwarf5_data_bit_offset =
251            read_u64_from_attr(entry.attr_value(gimli::DW_AT_data_bit_offset).ok().flatten());
252        let dwarf4_bit_offset =
253            read_u64_from_attr(entry.attr_value(gimli::DW_AT_bit_offset).ok().flatten());
254
255        member.bit_size = bit_size;
256
257        if let Some(bit_size) = bit_size
258            && let Some(storage_bytes) = member.size
259            && storage_bytes > 0
260        {
261            let storage_bits = storage_bytes.saturating_mul(8);
262
263            // Determine the containing storage unit byte offset for this bitfield.
264            // Prefer DW_AT_data_member_location when present. If absent, infer the
265            // storage unit start by aligning the absolute DW_AT_data_bit_offset down
266            // to the storage unit size.
267            let container_offset = member.offset.or_else(|| {
268                let data_bit_offset = dwarf5_data_bit_offset?;
269                let start_byte = data_bit_offset.checked_div(8)?;
270                // storage_bytes > 0 is guaranteed by the outer if-let guard
271                start_byte.checked_div(storage_bytes)?.checked_mul(storage_bytes)
272            });
273
274            if member.offset.is_none() {
275                member.offset = container_offset;
276            }
277
278            // Compute bit offset within the containing storage unit.
279            if let Some(container_offset) = container_offset {
280                if let Some(data_bit_offset) = dwarf5_data_bit_offset {
281                    // Use checked_mul to avoid overflow for large container offsets.
282                    if let Some(container_bits) = container_offset.checked_mul(8) {
283                        member.bit_offset = Some(data_bit_offset.saturating_sub(container_bits));
284                    }
285                } else if let Some(raw_bit_offset) = dwarf4_bit_offset {
286                    // Use checked_add to avoid overflow in boundary check.
287                    if let Some(end_bit) = raw_bit_offset.checked_add(bit_size) {
288                        if end_bit <= storage_bits {
289                            let bit_offset = match self.endian {
290                                gimli::RunTimeEndian::Little => {
291                                    storage_bits - raw_bit_offset - bit_size
292                                }
293                                gimli::RunTimeEndian::Big => raw_bit_offset,
294                            };
295                            member.bit_offset = Some(bit_offset);
296                        }
297                    }
298                }
299            }
300        }
301
302        Ok(Some(member))
303    }
304
305    fn get_member_offset(
306        &self,
307        unit: &Unit<DwarfSlice<'a>>,
308        entry: &DebuggingInformationEntry<DwarfSlice<'a>>,
309    ) -> Result<Option<u64>> {
310        match entry.attr_value(gimli::DW_AT_data_member_location) {
311            Ok(Some(AttributeValue::Udata(offset))) => Ok(Some(offset)),
312            Ok(Some(AttributeValue::Data1(offset))) => Ok(Some(offset as u64)),
313            Ok(Some(AttributeValue::Data2(offset))) => Ok(Some(offset as u64)),
314            Ok(Some(AttributeValue::Data4(offset))) => Ok(Some(offset as u64)),
315            Ok(Some(AttributeValue::Data8(offset))) => Ok(Some(offset)),
316            Ok(Some(AttributeValue::Sdata(offset))) if offset >= 0 => Ok(Some(offset as u64)),
317            Ok(Some(AttributeValue::Exprloc(expr))) => {
318                // Try simple constant extraction first (fast path)
319                if let Some(offset) = try_simple_offset(expr, unit.encoding()) {
320                    return Ok(Some(offset));
321                }
322                // Fall back to full expression evaluation
323                evaluate_member_offset(expr, unit.encoding())
324            }
325            Ok(None) => Ok(None), // Missing offset - don't assume 0 (bitfields, packed structs)
326            _ => Ok(None),
327        }
328    }
329
330    fn get_die_name(
331        &self,
332        unit: &Unit<DwarfSlice<'a>>,
333        entry: &DebuggingInformationEntry<DwarfSlice<'a>>,
334    ) -> Result<Option<String>> {
335        match entry.attr_value(gimli::DW_AT_name) {
336            Ok(Some(attr)) => {
337                let name = self
338                    .dwarf
339                    .attr_string(unit, attr)
340                    .map_err(|e| Error::Dwarf(format!("Failed to read name: {}", e)))?;
341                Ok(Some(name.to_string_lossy().into_owned()))
342            }
343            Ok(None) => Ok(None),
344            Err(e) => Err(Error::Dwarf(format!("Failed to read name attribute: {}", e))),
345        }
346    }
347
348    fn get_source_location(
349        &self,
350        unit: &Unit<DwarfSlice<'a>>,
351        entry: &DebuggingInformationEntry<DwarfSlice<'a>>,
352    ) -> Result<Option<SourceLocation>> {
353        let Some(file_index) =
354            read_u64_from_attr(entry.attr_value(gimli::DW_AT_decl_file).ok().flatten())
355        else {
356            return Ok(None);
357        };
358        let Some(line) =
359            read_u64_from_attr(entry.attr_value(gimli::DW_AT_decl_line).ok().flatten())
360        else {
361            return Ok(None);
362        };
363
364        // Try to resolve the file name from the line program header
365        let file_name = self.resolve_file_name(unit, file_index).unwrap_or_else(|| {
366            // Fall back to file index if resolution fails
367            format!("file#{}", file_index)
368        });
369
370        Ok(Some(SourceLocation { file: file_name, line }))
371    }
372
373    /// Resolve a file index to an actual file path using the .debug_line section.
374    fn resolve_file_name(&self, unit: &Unit<DwarfSlice<'a>>, file_index: u64) -> Option<String> {
375        // Get the line program for this unit (borrow instead of clone for efficiency)
376        let line_program = unit.line_program.as_ref()?;
377
378        let header = line_program.header();
379
380        // File indices in DWARF are 1-based (0 means no file in DWARF 4, or the compilation
381        // directory in DWARF 5). We need to handle both cases.
382        let file = header.file(file_index)?;
383
384        // Get the file name
385        let file_name =
386            self.dwarf.attr_string(unit, file.path_name()).ok()?.to_string_lossy().into_owned();
387
388        // Try to get the directory
389        if let Some(dir) = file.directory(header) {
390            if let Ok(dir_str) = self.dwarf.attr_string(unit, dir) {
391                let dir_name = dir_str.to_string_lossy();
392                if !dir_name.is_empty() {
393                    // Combine directory and file name
394                    return Some(format!("{}/{}", dir_name, file_name));
395                }
396            }
397        }
398
399        Some(file_name)
400    }
401}
402
403#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
404struct StructFingerprint {
405    name: String,
406    size: u64,
407    alignment: Option<u64>,
408    source: Option<(String, u64)>,
409    members: Vec<MemberFingerprint>,
410}
411
412#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
413struct MemberFingerprint {
414    name: String,
415    type_name: String,
416    offset: Option<u64>,
417    size: Option<u64>,
418    bit_offset: Option<u64>,
419    bit_size: Option<u64>,
420    is_atomic: bool,
421}
422
423fn struct_fingerprint(s: &StructLayout) -> StructFingerprint {
424    StructFingerprint {
425        name: s.name.clone(),
426        size: s.size,
427        alignment: s.alignment,
428        source: s.source_location.as_ref().map(|l| (l.file.clone(), l.line)),
429        members: s
430            .members
431            .iter()
432            .map(|m| MemberFingerprint {
433                name: m.name.clone(),
434                type_name: m.type_name.clone(),
435                offset: m.offset,
436                size: m.size,
437                bit_offset: m.bit_offset,
438                bit_size: m.bit_size,
439                is_atomic: m.is_atomic,
440            })
441            .collect(),
442    }
443}
444
445#[cfg(test)]
446mod tests {
447    use super::*;
448
449    #[test]
450    fn test_is_go_internal_type() {
451        // Should be filtered
452        assert!(is_go_internal_type("runtime.g"));
453        assert!(is_go_internal_type("runtime.m"));
454        assert!(is_go_internal_type("runtime.stack"));
455        assert!(is_go_internal_type("runtime/internal/atomic.Uint64"));
456        assert!(is_go_internal_type("internal/abi.Type"));
457        assert!(is_go_internal_type("reflect.Value"));
458        assert!(is_go_internal_type("sync.Mutex"));
459        assert!(is_go_internal_type("sync/atomic.Int64"));
460        assert!(is_go_internal_type("syscall.Stat_t"));
461        assert!(is_go_internal_type("unsafe.Pointer"));
462        assert!(is_go_internal_type("type:main.MyStruct"));
463        assert!(is_go_internal_type("type..hash.main.MyStruct"));
464        assert!(is_go_internal_type("hmap"));
465        assert!(is_go_internal_type("hchan"));
466        assert!(is_go_internal_type("g"));
467        assert!(is_go_internal_type("m"));
468        assert!(is_go_internal_type("p"));
469        assert!(is_go_internal_type("stackObject"));
470
471        // Should NOT be filtered (user types)
472        assert!(!is_go_internal_type("main.Order"));
473        assert!(!is_go_internal_type("main.Config"));
474        assert!(!is_go_internal_type("mypackage.MyStruct"));
475        assert!(!is_go_internal_type("github.com/user/pkg.Type"));
476        assert!(!is_go_internal_type("Order"));
477        assert!(!is_go_internal_type("Config"));
478    }
479}