layout_audit/dwarf/
context.rs

1use crate::error::{Error, Result};
2use crate::loader::{DwarfSlice, LoadedDwarf};
3use crate::types::{MemberLayout, SourceLocation, StructLayout};
4use gimli::{AttributeValue, DebuggingInformationEntry, Dwarf, Unit};
5
6use super::TypeResolver;
7use super::expr::{evaluate_member_offset, try_simple_offset};
8use super::{debug_info_ref_to_unit_offset, read_u64_from_attr};
9
10pub struct DwarfContext<'a> {
11    dwarf: &'a Dwarf<DwarfSlice<'a>>,
12    address_size: u8,
13    endian: gimli::RunTimeEndian,
14}
15
16impl<'a> DwarfContext<'a> {
17    pub fn new(loaded: &'a LoadedDwarf<'a>) -> Self {
18        Self { dwarf: &loaded.dwarf, address_size: loaded.address_size, endian: loaded.endian }
19    }
20
21    pub fn find_structs(&self, filter: Option<&str>) -> Result<Vec<StructLayout>> {
22        let mut structs = Vec::new();
23        let mut units = self.dwarf.units();
24
25        while let Some(header) =
26            units.next().map_err(|e| Error::Dwarf(format!("Failed to read unit header: {}", e)))?
27        {
28            let unit = self
29                .dwarf
30                .unit(header)
31                .map_err(|e| Error::Dwarf(format!("Failed to parse unit: {}", e)))?;
32
33            self.process_unit(&unit, filter, &mut structs)?;
34        }
35
36        // DWARF can contain duplicate identical type entries (e.g., across units or due to
37        // language/compiler quirks). Deduplicate exact duplicates to avoid double-counting in
38        // `check` and unstable matching in `diff`.
39        // Use enumerated index as tiebreaker for stable deduplication (Rust's sort_by is unstable).
40        let mut with_fp: Vec<(StructFingerprint, usize, StructLayout)> =
41            structs.into_iter().enumerate().map(|(i, s)| (struct_fingerprint(&s), i, s)).collect();
42        with_fp.sort_by(|a, b| a.0.cmp(&b.0).then_with(|| a.1.cmp(&b.1)));
43        with_fp.dedup_by(|a, b| a.0 == b.0);
44
45        Ok(with_fp.into_iter().map(|(_, _, s)| s).collect())
46    }
47
48    fn process_unit(
49        &self,
50        unit: &Unit<DwarfSlice<'a>>,
51        filter: Option<&str>,
52        structs: &mut Vec<StructLayout>,
53    ) -> Result<()> {
54        let mut type_resolver = TypeResolver::new(self.dwarf, unit, self.address_size);
55        let mut entries = unit.entries();
56
57        while let Some((_, entry)) =
58            entries.next_dfs().map_err(|e| Error::Dwarf(format!("Failed to read DIE: {}", e)))?
59        {
60            if !matches!(entry.tag(), gimli::DW_TAG_structure_type | gimli::DW_TAG_class_type) {
61                continue;
62            }
63
64            if let Some(layout) =
65                self.process_struct_entry(unit, entry, filter, &mut type_resolver)?
66            {
67                structs.push(layout);
68            }
69        }
70
71        Ok(())
72    }
73
74    fn process_struct_entry(
75        &self,
76        unit: &Unit<DwarfSlice<'a>>,
77        entry: &DebuggingInformationEntry<DwarfSlice<'a>>,
78        filter: Option<&str>,
79        type_resolver: &mut TypeResolver<'a, '_>,
80    ) -> Result<Option<StructLayout>> {
81        // Use consolidated helper for attribute extraction (see read_u64_from_attr).
82        let Some(size) =
83            read_u64_from_attr(entry.attr_value(gimli::DW_AT_byte_size).ok().flatten())
84        else {
85            return Ok(None); // Forward declaration or no size
86        };
87
88        let name = self.get_die_name(unit, entry)?;
89        let name = match name {
90            Some(n) if !n.starts_with("__") => n, // Skip compiler-generated
91            None => return Ok(None),              // Anonymous struct
92            _ => return Ok(None),
93        };
94
95        if filter.is_some_and(|f| !name.contains(f)) {
96            return Ok(None);
97        }
98
99        let alignment = read_u64_from_attr(entry.attr_value(gimli::DW_AT_alignment).ok().flatten());
100
101        let mut layout = StructLayout::new(name, size, alignment);
102        layout.source_location = self.get_source_location(unit, entry)?;
103        layout.members = self.extract_members(unit, entry, type_resolver)?;
104
105        Ok(Some(layout))
106    }
107
108    fn extract_members(
109        &self,
110        unit: &Unit<DwarfSlice<'a>>,
111        struct_entry: &DebuggingInformationEntry<DwarfSlice<'a>>,
112        type_resolver: &mut TypeResolver<'a, '_>,
113    ) -> Result<Vec<MemberLayout>> {
114        let mut members = Vec::new();
115        let mut tree = unit
116            .entries_tree(Some(struct_entry.offset()))
117            .map_err(|e| Error::Dwarf(format!("Failed to create entries tree: {}", e)))?;
118
119        let root =
120            tree.root().map_err(|e| Error::Dwarf(format!("Failed to get tree root: {}", e)))?;
121
122        let mut children = root.children();
123        while let Some(child) = children
124            .next()
125            .map_err(|e| Error::Dwarf(format!("Failed to iterate children: {}", e)))?
126        {
127            let entry = child.entry();
128            match entry.tag() {
129                gimli::DW_TAG_member => {
130                    if let Some(member) = self.process_member(unit, entry, type_resolver)? {
131                        members.push(member);
132                    }
133                }
134                gimli::DW_TAG_inheritance => {
135                    if let Some(member) = self.process_inheritance(unit, entry, type_resolver)? {
136                        members.push(member);
137                    }
138                }
139                _ => {}
140            }
141        }
142
143        members.sort_by_key(|m| m.offset.unwrap_or(u64::MAX));
144        Ok(members)
145    }
146
147    /// Resolve type information from a DW_AT_type attribute.
148    /// Returns (type_name, size, is_atomic) or a default for unknown types.
149    fn resolve_type_attr(
150        &self,
151        unit: &Unit<DwarfSlice<'a>>,
152        entry: &DebuggingInformationEntry<DwarfSlice<'a>>,
153        type_resolver: &mut TypeResolver<'a, '_>,
154    ) -> Result<(String, Option<u64>, bool)> {
155        match entry.attr_value(gimli::DW_AT_type) {
156            Ok(Some(AttributeValue::UnitRef(type_offset))) => {
157                type_resolver.resolve_type(type_offset)
158            }
159            Ok(Some(AttributeValue::DebugInfoRef(debug_info_offset))) => {
160                // Use shared helper for cross-unit reference conversion.
161                if let Some(unit_offset) =
162                    debug_info_ref_to_unit_offset(debug_info_offset, &unit.header)
163                {
164                    type_resolver.resolve_type(unit_offset)
165                } else {
166                    Ok(("unknown".to_string(), None, false))
167                }
168            }
169            _ => Ok(("unknown".to_string(), None, false)),
170        }
171    }
172
173    fn process_inheritance(
174        &self,
175        unit: &Unit<DwarfSlice<'a>>,
176        entry: &DebuggingInformationEntry<DwarfSlice<'a>>,
177        type_resolver: &mut TypeResolver<'a, '_>,
178    ) -> Result<Option<MemberLayout>> {
179        let offset = self.get_member_offset(unit, entry)?;
180        let (type_name, size, is_atomic) = self.resolve_type_attr(unit, entry, type_resolver)?;
181
182        let name = format!("<base: {}>", type_name);
183        Ok(Some(MemberLayout::new(name, type_name, offset, size).with_atomic(is_atomic)))
184    }
185
186    fn process_member(
187        &self,
188        unit: &Unit<DwarfSlice<'a>>,
189        entry: &DebuggingInformationEntry<DwarfSlice<'a>>,
190        type_resolver: &mut TypeResolver<'a, '_>,
191    ) -> Result<Option<MemberLayout>> {
192        let name = self.get_die_name(unit, entry)?.unwrap_or_else(|| "<anonymous>".to_string());
193        let (type_name, size, is_atomic) = self.resolve_type_attr(unit, entry, type_resolver)?;
194
195        let offset = self.get_member_offset(unit, entry)?;
196
197        let mut member = MemberLayout::new(name, type_name, offset, size).with_atomic(is_atomic);
198
199        let bit_size = read_u64_from_attr(entry.attr_value(gimli::DW_AT_bit_size).ok().flatten());
200        let dwarf5_data_bit_offset =
201            read_u64_from_attr(entry.attr_value(gimli::DW_AT_data_bit_offset).ok().flatten());
202        let dwarf4_bit_offset =
203            read_u64_from_attr(entry.attr_value(gimli::DW_AT_bit_offset).ok().flatten());
204
205        member.bit_size = bit_size;
206
207        if let Some(bit_size) = bit_size
208            && let Some(storage_bytes) = member.size
209            && storage_bytes > 0
210        {
211            let storage_bits = storage_bytes.saturating_mul(8);
212
213            // Determine the containing storage unit byte offset for this bitfield.
214            // Prefer DW_AT_data_member_location when present. If absent, infer the
215            // storage unit start by aligning the absolute DW_AT_data_bit_offset down
216            // to the storage unit size.
217            let container_offset = member.offset.or_else(|| {
218                let data_bit_offset = dwarf5_data_bit_offset?;
219                let start_byte = data_bit_offset.checked_div(8)?;
220                // storage_bytes > 0 is guaranteed by the outer if-let guard
221                start_byte.checked_div(storage_bytes)?.checked_mul(storage_bytes)
222            });
223
224            if member.offset.is_none() {
225                member.offset = container_offset;
226            }
227
228            // Compute bit offset within the containing storage unit.
229            if let Some(container_offset) = container_offset {
230                if let Some(data_bit_offset) = dwarf5_data_bit_offset {
231                    // Use checked_mul to avoid overflow for large container offsets.
232                    if let Some(container_bits) = container_offset.checked_mul(8) {
233                        member.bit_offset = Some(data_bit_offset.saturating_sub(container_bits));
234                    }
235                } else if let Some(raw_bit_offset) = dwarf4_bit_offset {
236                    // Use checked_add to avoid overflow in boundary check.
237                    if let Some(end_bit) = raw_bit_offset.checked_add(bit_size) {
238                        if end_bit <= storage_bits {
239                            let bit_offset = match self.endian {
240                                gimli::RunTimeEndian::Little => {
241                                    storage_bits - raw_bit_offset - bit_size
242                                }
243                                gimli::RunTimeEndian::Big => raw_bit_offset,
244                            };
245                            member.bit_offset = Some(bit_offset);
246                        }
247                    }
248                }
249            }
250        }
251
252        Ok(Some(member))
253    }
254
255    fn get_member_offset(
256        &self,
257        unit: &Unit<DwarfSlice<'a>>,
258        entry: &DebuggingInformationEntry<DwarfSlice<'a>>,
259    ) -> Result<Option<u64>> {
260        match entry.attr_value(gimli::DW_AT_data_member_location) {
261            Ok(Some(AttributeValue::Udata(offset))) => Ok(Some(offset)),
262            Ok(Some(AttributeValue::Data1(offset))) => Ok(Some(offset as u64)),
263            Ok(Some(AttributeValue::Data2(offset))) => Ok(Some(offset as u64)),
264            Ok(Some(AttributeValue::Data4(offset))) => Ok(Some(offset as u64)),
265            Ok(Some(AttributeValue::Data8(offset))) => Ok(Some(offset)),
266            Ok(Some(AttributeValue::Sdata(offset))) if offset >= 0 => Ok(Some(offset as u64)),
267            Ok(Some(AttributeValue::Exprloc(expr))) => {
268                // Try simple constant extraction first (fast path)
269                if let Some(offset) = try_simple_offset(expr, unit.encoding()) {
270                    return Ok(Some(offset));
271                }
272                // Fall back to full expression evaluation
273                evaluate_member_offset(expr, unit.encoding())
274            }
275            Ok(None) => Ok(None), // Missing offset - don't assume 0 (bitfields, packed structs)
276            _ => Ok(None),
277        }
278    }
279
280    fn get_die_name(
281        &self,
282        unit: &Unit<DwarfSlice<'a>>,
283        entry: &DebuggingInformationEntry<DwarfSlice<'a>>,
284    ) -> Result<Option<String>> {
285        match entry.attr_value(gimli::DW_AT_name) {
286            Ok(Some(attr)) => {
287                let name = self
288                    .dwarf
289                    .attr_string(unit, attr)
290                    .map_err(|e| Error::Dwarf(format!("Failed to read name: {}", e)))?;
291                Ok(Some(name.to_string_lossy().into_owned()))
292            }
293            Ok(None) => Ok(None),
294            Err(e) => Err(Error::Dwarf(format!("Failed to read name attribute: {}", e))),
295        }
296    }
297
298    fn get_source_location(
299        &self,
300        unit: &Unit<DwarfSlice<'a>>,
301        entry: &DebuggingInformationEntry<DwarfSlice<'a>>,
302    ) -> Result<Option<SourceLocation>> {
303        let Some(file_index) =
304            read_u64_from_attr(entry.attr_value(gimli::DW_AT_decl_file).ok().flatten())
305        else {
306            return Ok(None);
307        };
308        let Some(line) =
309            read_u64_from_attr(entry.attr_value(gimli::DW_AT_decl_line).ok().flatten())
310        else {
311            return Ok(None);
312        };
313
314        // Try to resolve the file name from the line program header
315        let file_name = self.resolve_file_name(unit, file_index).unwrap_or_else(|| {
316            // Fall back to file index if resolution fails
317            format!("file#{}", file_index)
318        });
319
320        Ok(Some(SourceLocation { file: file_name, line }))
321    }
322
323    /// Resolve a file index to an actual file path using the .debug_line section.
324    fn resolve_file_name(&self, unit: &Unit<DwarfSlice<'a>>, file_index: u64) -> Option<String> {
325        // Get the line program for this unit (borrow instead of clone for efficiency)
326        let line_program = unit.line_program.as_ref()?;
327
328        let header = line_program.header();
329
330        // File indices in DWARF are 1-based (0 means no file in DWARF 4, or the compilation
331        // directory in DWARF 5). We need to handle both cases.
332        let file = header.file(file_index)?;
333
334        // Get the file name
335        let file_name =
336            self.dwarf.attr_string(unit, file.path_name()).ok()?.to_string_lossy().into_owned();
337
338        // Try to get the directory
339        if let Some(dir) = file.directory(header) {
340            if let Ok(dir_str) = self.dwarf.attr_string(unit, dir) {
341                let dir_name = dir_str.to_string_lossy();
342                if !dir_name.is_empty() {
343                    // Combine directory and file name
344                    return Some(format!("{}/{}", dir_name, file_name));
345                }
346            }
347        }
348
349        Some(file_name)
350    }
351}
352
353#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
354struct StructFingerprint {
355    name: String,
356    size: u64,
357    alignment: Option<u64>,
358    source: Option<(String, u64)>,
359    members: Vec<MemberFingerprint>,
360}
361
362#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
363struct MemberFingerprint {
364    name: String,
365    type_name: String,
366    offset: Option<u64>,
367    size: Option<u64>,
368    bit_offset: Option<u64>,
369    bit_size: Option<u64>,
370    is_atomic: bool,
371}
372
373fn struct_fingerprint(s: &StructLayout) -> StructFingerprint {
374    StructFingerprint {
375        name: s.name.clone(),
376        size: s.size,
377        alignment: s.alignment,
378        source: s.source_location.as_ref().map(|l| (l.file.clone(), l.line)),
379        members: s
380            .members
381            .iter()
382            .map(|m| MemberFingerprint {
383                name: m.name.clone(),
384                type_name: m.type_name.clone(),
385                offset: m.offset,
386                size: m.size,
387                bit_offset: m.bit_offset,
388                bit_size: m.bit_size,
389                is_atomic: m.is_atomic,
390            })
391            .collect(),
392    }
393}