layout_audit/dwarf/
types.rs

1use crate::error::{Error, Result};
2use crate::loader::DwarfSlice;
3use gimli::{AttributeValue, Dwarf, Unit, UnitOffset};
4use std::collections::HashMap;
5
6use super::{debug_info_ref_to_unit_offset, read_u64_from_attr};
7
8/// Result of resolving a type: (type_name, size, is_atomic)
9pub type TypeInfo = (String, Option<u64>, bool);
10
11pub struct TypeResolver<'a, 'b> {
12    dwarf: &'b Dwarf<DwarfSlice<'a>>,
13    unit: &'b Unit<DwarfSlice<'a>>,
14    address_size: u8,
15    cache: HashMap<UnitOffset, TypeInfo>,
16}
17
18impl<'a, 'b> TypeResolver<'a, 'b> {
19    pub fn new(
20        dwarf: &'b Dwarf<DwarfSlice<'a>>,
21        unit: &'b Unit<DwarfSlice<'a>>,
22        address_size: u8,
23    ) -> Self {
24        Self { dwarf, unit, address_size, cache: HashMap::new() }
25    }
26
27    pub fn resolve_type(&mut self, offset: UnitOffset) -> Result<TypeInfo> {
28        if let Some(cached) = self.cache.get(&offset) {
29            return Ok(cached.clone());
30        }
31
32        let result = self.resolve_type_inner(offset, 0, false)?;
33        self.cache.insert(offset, result.clone());
34        Ok(result)
35    }
36
37    fn resolve_type_inner(
38        &mut self,
39        offset: UnitOffset,
40        depth: usize,
41        is_atomic: bool,
42    ) -> Result<TypeInfo> {
43        if depth > 20 {
44            return Ok(("...".to_string(), None, is_atomic));
45        }
46
47        let entry = self
48            .unit
49            .entry(offset)
50            .map_err(|e| Error::Dwarf(format!("Failed to get type entry: {}", e)))?;
51
52        let tag = entry.tag();
53
54        match tag {
55            gimli::DW_TAG_base_type => {
56                let name = self.get_type_name(&entry)?.unwrap_or_else(|| "?".to_string());
57                let size = self.get_byte_size(&entry)?;
58                Ok((name, size, is_atomic))
59            }
60
61            gimli::DW_TAG_pointer_type => {
62                let pointee = if let Some(type_offset) = self.get_type_ref(&entry)? {
63                    let (pointee_name, _, _) =
64                        self.resolve_type_inner(type_offset, depth + 1, false)?;
65                    pointee_name
66                } else {
67                    "void".to_string()
68                };
69                Ok((format!("*{}", pointee), Some(self.address_size as u64), is_atomic))
70            }
71
72            gimli::DW_TAG_reference_type => {
73                let referee = if let Some(type_offset) = self.get_type_ref(&entry)? {
74                    let (referee_name, _, _) =
75                        self.resolve_type_inner(type_offset, depth + 1, false)?;
76                    referee_name
77                } else {
78                    "void".to_string()
79                };
80                Ok((format!("&{}", referee), Some(self.address_size as u64), is_atomic))
81            }
82
83            gimli::DW_TAG_const_type
84            | gimli::DW_TAG_volatile_type
85            | gimli::DW_TAG_restrict_type => {
86                // All three tags are matched in the outer arm, so this is exhaustive.
87                let prefix = match tag {
88                    gimli::DW_TAG_const_type => "const ",
89                    gimli::DW_TAG_volatile_type => "volatile ",
90                    _ => "restrict ", // DW_TAG_restrict_type
91                };
92                if let Some(type_offset) = self.get_type_ref(&entry)? {
93                    let (inner_name, size, inner_atomic) =
94                        self.resolve_type_inner(type_offset, depth + 1, is_atomic)?;
95                    Ok((format!("{}{}", prefix, inner_name), size, inner_atomic))
96                } else {
97                    Ok((format!("{}void", prefix), None, is_atomic))
98                }
99            }
100
101            gimli::DW_TAG_atomic_type => {
102                // Mark as atomic and propagate through the type chain
103                if let Some(type_offset) = self.get_type_ref(&entry)? {
104                    let (inner_name, size, _) =
105                        self.resolve_type_inner(type_offset, depth + 1, true)?;
106                    Ok((format!("_Atomic {}", inner_name), size, true))
107                } else {
108                    Ok(("_Atomic void".to_string(), None, true))
109                }
110            }
111
112            gimli::DW_TAG_typedef => {
113                let name = self.get_type_name(&entry)?;
114                if let Some(type_offset) = self.get_type_ref(&entry)? {
115                    let (_, size, inner_atomic) =
116                        self.resolve_type_inner(type_offset, depth + 1, is_atomic)?;
117                    // Propagate atomic flag through typedefs
118                    Ok((
119                        name.unwrap_or_else(|| "typedef".to_string()),
120                        size,
121                        inner_atomic || is_atomic,
122                    ))
123                } else {
124                    Ok((name.unwrap_or_else(|| "typedef".to_string()), None, is_atomic))
125                }
126            }
127
128            gimli::DW_TAG_array_type => {
129                let element_type = if let Some(type_offset) = self.get_type_ref(&entry)? {
130                    self.resolve_type_inner(type_offset, depth + 1, is_atomic)?
131                } else {
132                    ("?".to_string(), None, is_atomic)
133                };
134
135                let count = self.get_array_count(&entry)?;
136                let size = match (element_type.1, count) {
137                    // Use checked_mul to prevent overflow for very large arrays.
138                    // Fall back to DW_AT_byte_size if multiplication overflows.
139                    (Some(elem_size), Some(c)) => elem_size
140                        .checked_mul(c)
141                        .or_else(|| self.get_byte_size(&entry).ok().flatten()),
142                    _ => self.get_byte_size(&entry)?,
143                };
144
145                let count_str = count.map(|c| c.to_string()).unwrap_or_else(|| "?".to_string());
146                Ok((format!("[{}; {}]", element_type.0, count_str), size, element_type.2))
147            }
148
149            gimli::DW_TAG_structure_type | gimli::DW_TAG_class_type | gimli::DW_TAG_union_type => {
150                let name = self.get_type_name(&entry)?.unwrap_or_else(|| "<anonymous>".to_string());
151                let size = self.get_byte_size(&entry)?;
152                Ok((name, size, is_atomic))
153            }
154
155            gimli::DW_TAG_enumeration_type => {
156                let name = self.get_type_name(&entry)?.unwrap_or_else(|| "enum".to_string());
157                let size = self.get_byte_size(&entry)?;
158                Ok((name, size, is_atomic))
159            }
160
161            gimli::DW_TAG_subroutine_type => {
162                Ok(("fn(...)".to_string(), Some(self.address_size as u64), is_atomic))
163            }
164
165            _ => {
166                let name = self.get_type_name(&entry)?.unwrap_or_else(|| format!("?<{:?}>", tag));
167                let size = self.get_byte_size(&entry)?;
168                Ok((name, size, is_atomic))
169            }
170        }
171    }
172
173    fn get_type_name(
174        &self,
175        entry: &gimli::DebuggingInformationEntry<DwarfSlice<'a>>,
176    ) -> Result<Option<String>> {
177        match entry.attr_value(gimli::DW_AT_name) {
178            Ok(Some(attr)) => {
179                let name = self
180                    .dwarf
181                    .attr_string(self.unit, attr)
182                    .map_err(|e| Error::Dwarf(format!("Failed to read type name: {}", e)))?;
183                Ok(Some(name.to_string_lossy().into_owned()))
184            }
185            Ok(None) => Ok(None),
186            Err(e) => Err(Error::Dwarf(format!("Failed to read name attr: {}", e))),
187        }
188    }
189
190    fn get_byte_size(
191        &self,
192        entry: &gimli::DebuggingInformationEntry<DwarfSlice<'a>>,
193    ) -> Result<Option<u64>> {
194        // Use shared helper for consistent attribute extraction.
195        Ok(read_u64_from_attr(entry.attr_value(gimli::DW_AT_byte_size).ok().flatten()))
196    }
197
198    fn get_type_ref(
199        &self,
200        entry: &gimli::DebuggingInformationEntry<DwarfSlice<'a>>,
201    ) -> Result<Option<UnitOffset>> {
202        match entry.attr_value(gimli::DW_AT_type) {
203            Ok(Some(AttributeValue::UnitRef(offset))) => Ok(Some(offset)),
204            Ok(Some(AttributeValue::DebugInfoRef(debug_info_offset))) => {
205                // Use shared helper for cross-unit reference conversion.
206                Ok(debug_info_ref_to_unit_offset(debug_info_offset, &self.unit.header))
207            }
208            _ => Ok(None),
209        }
210    }
211
212    fn get_array_count(
213        &self,
214        entry: &gimli::DebuggingInformationEntry<DwarfSlice<'a>>,
215    ) -> Result<Option<u64>> {
216        let mut tree = self
217            .unit
218            .entries_tree(Some(entry.offset()))
219            .map_err(|e| Error::Dwarf(format!("Failed to create tree: {}", e)))?;
220
221        let root = tree.root().map_err(|e| Error::Dwarf(format!("Failed to get root: {}", e)))?;
222
223        let mut children = root.children();
224        while let Some(child) =
225            children.next().map_err(|e| Error::Dwarf(format!("Failed to iterate: {}", e)))?
226        {
227            let child_entry = child.entry();
228            if child_entry.tag() == gimli::DW_TAG_subrange_type {
229                // Try DW_AT_count first (can be various data encodings)
230                if let Some(count) = self.extract_count_attr(child_entry, gimli::DW_AT_count)? {
231                    return Ok(Some(count));
232                }
233                // Fall back to DW_AT_upper_bound (0-indexed, so add 1).
234                // Use checked_add to handle corrupted DWARF with upper == u64::MAX.
235                if let Some(upper) =
236                    self.extract_count_attr(child_entry, gimli::DW_AT_upper_bound)?
237                {
238                    return Ok(upper.checked_add(1));
239                }
240            }
241        }
242
243        Ok(None)
244    }
245
246    fn extract_count_attr(
247        &self,
248        entry: &gimli::DebuggingInformationEntry<DwarfSlice<'a>>,
249        attr: gimli::DwAt,
250    ) -> Result<Option<u64>> {
251        // Use shared helper for consistent attribute extraction.
252        Ok(read_u64_from_attr(entry.attr_value(attr).ok().flatten()))
253    }
254}
255
256#[cfg(test)]
257mod tests {
258    use super::*;
259    use crate::loader::BinaryData;
260    use gimli::DwTag;
261    use std::path::{Path, PathBuf};
262
263    fn find_fixture_path(name: &str) -> Option<PathBuf> {
264        let base = Path::new("tests/fixtures/bin");
265        let dsym_path = base.join(format!("{}.dSYM/Contents/Resources/DWARF/{}", name, name));
266        if dsym_path.exists() {
267            return Some(dsym_path);
268        }
269
270        let exe_path = base.join(format!("{}.exe", name));
271        if exe_path.exists() {
272            return Some(exe_path);
273        }
274
275        let direct_path = base.join(name);
276        if direct_path.exists() {
277            return Some(direct_path);
278        }
279
280        None
281    }
282
283    fn find_type_offset(unit: &Unit<DwarfSlice<'_>>, tag: DwTag) -> Option<UnitOffset> {
284        let mut entries = unit.entries();
285        while let Some((_, entry)) = entries.next_dfs().ok().flatten() {
286            if entry.tag() == tag {
287                return Some(entry.offset());
288            }
289        }
290        None
291    }
292
293    #[test]
294    fn resolve_common_c_types() {
295        let path = match find_fixture_path("test_simple") {
296            Some(p) => p,
297            None => return,
298        };
299
300        let binary = match BinaryData::load(&path) {
301            Ok(b) => b,
302            Err(_) => return,
303        };
304        let loaded = match binary.load_dwarf() {
305            Ok(l) => l,
306            Err(_) => return,
307        };
308        let dwarf = &loaded.dwarf;
309        let mut units = dwarf.units();
310        let header = match units.next() {
311            Ok(Some(h)) => h,
312            _ => return,
313        };
314        let unit = match dwarf.unit(header) {
315            Ok(u) => u,
316            Err(_) => return,
317        };
318
319        let mut resolver = TypeResolver::new(&loaded.dwarf, &unit, loaded.address_size);
320
321        let tags = [
322            gimli::DW_TAG_base_type,
323            gimli::DW_TAG_pointer_type,
324            gimli::DW_TAG_const_type,
325            gimli::DW_TAG_volatile_type,
326            gimli::DW_TAG_restrict_type,
327            gimli::DW_TAG_typedef,
328            gimli::DW_TAG_array_type,
329            gimli::DW_TAG_enumeration_type,
330            gimli::DW_TAG_subroutine_type,
331            gimli::DW_TAG_atomic_type,
332            gimli::DW_TAG_structure_type,
333        ];
334
335        for tag in tags {
336            if let Some(offset) = find_type_offset(&unit, tag) {
337                let _ = resolver.resolve_type(offset).expect("resolve type");
338            }
339        }
340    }
341
342    #[test]
343    fn resolve_reference_type_from_cpp() {
344        let path = match find_fixture_path("test_cpp_templates") {
345            Some(p) => p,
346            None => return,
347        };
348
349        let binary = match BinaryData::load(&path) {
350            Ok(b) => b,
351            Err(_) => return,
352        };
353        let loaded = match binary.load_dwarf() {
354            Ok(l) => l,
355            Err(_) => return,
356        };
357        let dwarf = &loaded.dwarf;
358        let mut units = dwarf.units();
359        let header = match units.next() {
360            Ok(Some(h)) => h,
361            _ => return,
362        };
363        let unit = match dwarf.unit(header) {
364            Ok(u) => u,
365            Err(_) => return,
366        };
367
368        let mut resolver = TypeResolver::new(&loaded.dwarf, &unit, loaded.address_size);
369        if let Some(offset) = find_type_offset(&unit, gimli::DW_TAG_reference_type) {
370            let _ = resolver.resolve_type(offset).expect("resolve reference type");
371        }
372    }
373
374    #[test]
375    fn resolve_cpp_class_and_union_types() {
376        let path = match find_fixture_path("test_cpp_templates") {
377            Some(p) => p,
378            None => return,
379        };
380
381        let binary = match BinaryData::load(&path) {
382            Ok(b) => b,
383            Err(_) => return,
384        };
385        let loaded = match binary.load_dwarf() {
386            Ok(l) => l,
387            Err(_) => return,
388        };
389        let dwarf = &loaded.dwarf;
390        let mut units = dwarf.units();
391        let header = match units.next() {
392            Ok(Some(h)) => h,
393            _ => return,
394        };
395        let unit = match dwarf.unit(header) {
396            Ok(u) => u,
397            Err(_) => return,
398        };
399
400        let mut resolver = TypeResolver::new(&loaded.dwarf, &unit, loaded.address_size);
401        for tag in [gimli::DW_TAG_class_type, gimli::DW_TAG_union_type] {
402            if let Some(offset) = find_type_offset(&unit, tag) {
403                let _ = resolver.resolve_type(offset).expect("resolve cpp type");
404            }
405        }
406    }
407
408    #[test]
409    fn resolve_all_type_entries_best_effort() {
410        let path = match find_fixture_path("test_simple") {
411            Some(p) => p,
412            None => return,
413        };
414
415        let binary = match BinaryData::load(&path) {
416            Ok(b) => b,
417            Err(_) => return,
418        };
419        let loaded = match binary.load_dwarf() {
420            Ok(l) => l,
421            Err(_) => return,
422        };
423        let dwarf = &loaded.dwarf;
424        let mut units = dwarf.units();
425        let header = match units.next() {
426            Ok(Some(h)) => h,
427            _ => return,
428        };
429        let unit = match dwarf.unit(header) {
430            Ok(u) => u,
431            Err(_) => return,
432        };
433
434        let mut resolver = TypeResolver::new(&loaded.dwarf, &unit, loaded.address_size);
435        let mut entries = unit.entries();
436        while let Some((_, entry)) = entries.next_dfs().ok().flatten() {
437            let tag = entry.tag();
438            if matches!(
439                tag,
440                gimli::DW_TAG_base_type
441                    | gimli::DW_TAG_pointer_type
442                    | gimli::DW_TAG_reference_type
443                    | gimli::DW_TAG_const_type
444                    | gimli::DW_TAG_volatile_type
445                    | gimli::DW_TAG_restrict_type
446                    | gimli::DW_TAG_atomic_type
447                    | gimli::DW_TAG_typedef
448                    | gimli::DW_TAG_array_type
449                    | gimli::DW_TAG_structure_type
450                    | gimli::DW_TAG_class_type
451                    | gimli::DW_TAG_union_type
452                    | gimli::DW_TAG_enumeration_type
453                    | gimli::DW_TAG_subroutine_type
454            ) {
455                let _ = resolver.resolve_type(entry.offset());
456            }
457        }
458    }
459}