Skip to main content

padlock_dwarf/
extractor.rs

1// padlock-dwarf/src/extractor.rs
2
3use std::collections::HashMap;
4
5use gimli::{DebuggingInformationEntry, Reader, Unit, UnitOffset};
6use padlock_core::arch::ArchConfig;
7use padlock_core::ir::{AccessPattern, Field, StructLayout};
8
9pub struct Extractor<'a, R: Reader> {
10    pub(crate) dwarf: &'a gimli::Dwarf<R>,
11    pub(crate) arch: &'static ArchConfig,
12}
13
14impl<'a, R: Reader> Extractor<'a, R> {
15    pub fn new(dwarf: &'a gimli::Dwarf<R>, arch: &'static ArchConfig) -> Self {
16        Self { dwarf, arch }
17    }
18
19    pub fn extract_all(&self) -> anyhow::Result<Vec<StructLayout>> {
20        let mut layouts = Vec::new();
21
22        let mut iter = self.dwarf.units();
23        while let Some(header) = iter.next()? {
24            let unit = self.dwarf.unit(header)?;
25            self.extract_from_unit(&unit, &mut layouts)?;
26        }
27
28        Ok(layouts)
29    }
30
31    fn extract_from_unit(&self, unit: &Unit<R>, out: &mut Vec<StructLayout>) -> anyhow::Result<()> {
32        // First pass: build a map from struct offset → typedef name.
33        // Handles `typedef struct { ... } Foo` where the struct has no tag name.
34        let typedef_names = self.collect_typedef_names(unit)?;
35
36        let mut entries = unit.entries();
37        while let Some((_, entry)) = entries.next_dfs()? {
38            if entry.tag() == gimli::DW_TAG_structure_type
39                && let Some(mut layout) = self.extract_struct(unit, entry)?
40            {
41                if layout.name == "<anonymous>"
42                    && let Some(name) = typedef_names.get(&entry.offset())
43                {
44                    layout.name = name.clone();
45                }
46                out.push(layout);
47            }
48        }
49        Ok(())
50    }
51
52    /// Walk all top-level DIEs and collect DW_TAG_typedef entries that point
53    /// directly to a DW_TAG_structure_type, returning struct_offset → typedef_name.
54    fn collect_typedef_names(
55        &self,
56        unit: &Unit<R>,
57    ) -> anyhow::Result<HashMap<UnitOffset<R::Offset>, String>> {
58        let mut map = HashMap::new();
59        let mut entries = unit.entries();
60        while let Some((_, entry)) = entries.next_dfs()? {
61            if entry.tag() != gimli::DW_TAG_typedef {
62                continue;
63            }
64            let typedef_name = match self.attr_string(unit, entry, gimli::DW_AT_name)? {
65                Some(n) => n,
66                None => continue,
67            };
68            let struct_offset = match entry.attr_value(gimli::DW_AT_type)? {
69                Some(gimli::AttributeValue::UnitRef(off)) => off,
70                _ => continue,
71            };
72            map.insert(struct_offset, typedef_name);
73        }
74        Ok(map)
75    }
76
77    pub(crate) fn extract_struct(
78        &self,
79        unit: &Unit<R>,
80        entry: &DebuggingInformationEntry<R>,
81    ) -> anyhow::Result<Option<StructLayout>> {
82        if entry.attr(gimli::DW_AT_declaration)?.is_some() {
83            return Ok(None);
84        }
85
86        let name = self
87            .attr_string(unit, entry, gimli::DW_AT_name)?
88            .unwrap_or_else(|| "<anonymous>".to_string());
89
90        let total_size = match entry.attr_value(gimli::DW_AT_byte_size)? {
91            Some(gimli::AttributeValue::Udata(s)) => s as usize,
92            _ => return Ok(None),
93        };
94
95        let source_file = self.attr_string(unit, entry, gimli::DW_AT_decl_file)?;
96        let source_line = entry.attr_value(gimli::DW_AT_decl_line)?.and_then(|v| {
97            if let gimli::AttributeValue::Udata(n) = v {
98                Some(n as u32)
99            } else {
100                None
101            }
102        });
103
104        let mut fields = Vec::new();
105        let mut children = unit.entries_tree(Some(entry.offset()))?;
106        let root = children.root()?;
107        let mut child_iter = root.children();
108
109        while let Some(child) = child_iter.next()? {
110            let child_entry = child.entry();
111            if child_entry.tag() == gimli::DW_TAG_member
112                && let Some(field) = self.extract_field(unit, child_entry)?
113            {
114                fields.push(field);
115            }
116        }
117
118        fields.sort_by_key(|f| f.offset);
119
120        Ok(Some(StructLayout {
121            name,
122            total_size,
123            align: fields.iter().map(|f| f.align).max().unwrap_or(1),
124            fields,
125            source_file,
126            source_line,
127            arch: self.arch,
128            is_packed: false,
129            is_union: false,
130        }))
131    }
132
133    fn extract_field(
134        &self,
135        unit: &Unit<R>,
136        entry: &DebuggingInformationEntry<R>,
137    ) -> anyhow::Result<Option<Field>> {
138        let name = self
139            .attr_string(unit, entry, gimli::DW_AT_name)?
140            .unwrap_or_else(|| "<unnamed>".to_string());
141
142        let offset = match entry.attr_value(gimli::DW_AT_data_member_location)? {
143            Some(gimli::AttributeValue::Udata(n)) => n as usize,
144            Some(gimli::AttributeValue::Sdata(n)) => n as usize,
145            _ => return Ok(None),
146        };
147
148        // Bit-field members carry DW_AT_bit_size. They share byte offsets with
149        // adjacent fields and cannot be represented in the byte-level IR without
150        // losing accuracy. Skip them entirely — use source analysis for structs
151        // that contain bit-fields.
152        if entry.attr(gimli::DW_AT_bit_size)?.is_some() {
153            return Ok(None);
154        }
155
156        let type_offset = match entry.attr_value(gimli::DW_AT_type)? {
157            Some(gimli::AttributeValue::UnitRef(off)) => off,
158            _ => return Ok(None),
159        };
160
161        let (size, align, ty) = self.resolve_type(unit, type_offset)?;
162
163        Ok(Some(Field {
164            name,
165            ty,
166            offset,
167            size,
168            align,
169            source_file: None,
170            source_line: entry.attr_value(gimli::DW_AT_decl_line)?.and_then(|v| {
171                if let gimli::AttributeValue::Udata(n) = v {
172                    Some(n as u32)
173                } else {
174                    None
175                }
176            }),
177            access: AccessPattern::Unknown,
178        }))
179    }
180
181    pub(crate) fn attr_string(
182        &self,
183        unit: &Unit<R>,
184        entry: &DebuggingInformationEntry<R>,
185        attr: gimli::DwAt,
186    ) -> anyhow::Result<Option<String>> {
187        match entry.attr(attr)? {
188            Some(a) => match self.dwarf.attr_string(unit, a.value()) {
189                Ok(s) => Ok(Some(s.to_string_lossy()?.into_owned())),
190                Err(_) => Ok(None),
191            },
192            None => Ok(None),
193        }
194    }
195
196    pub(crate) fn attr_usize(
197        &self,
198        entry: &DebuggingInformationEntry<R>,
199        attr: gimli::DwAt,
200    ) -> anyhow::Result<Option<usize>> {
201        match entry.attr_value(attr)? {
202            Some(gimli::AttributeValue::Udata(n)) => Ok(Some(n as usize)),
203            Some(gimli::AttributeValue::Data1(n)) => Ok(Some(n as usize)),
204            Some(gimli::AttributeValue::Data2(n)) => Ok(Some(n as usize)),
205            Some(gimli::AttributeValue::Data4(n)) => Ok(Some(n as usize)),
206            Some(gimli::AttributeValue::Data8(n)) => Ok(Some(n as usize)),
207            _ => Ok(None),
208        }
209    }
210
211    pub(crate) fn extract_array_count(
212        &self,
213        unit: &Unit<R>,
214        entry: &DebuggingInformationEntry<R>,
215    ) -> anyhow::Result<usize> {
216        let mut children = unit.entries_tree(Some(entry.offset()))?;
217        let root = children.root()?;
218        let mut child_iter = root.children();
219
220        while let Some(child) = child_iter.next()? {
221            let child_entry = child.entry();
222            if child_entry.tag() == gimli::DW_TAG_subrange_type {
223                if let Some(count) = self.attr_usize(child_entry, gimli::DW_AT_count)? {
224                    return Ok(count);
225                }
226                if let Some(upper) = self.attr_usize(child_entry, gimli::DW_AT_upper_bound)? {
227                    return Ok(upper + 1);
228                }
229            }
230        }
231
232        Ok(0)
233    }
234}