Skip to main content

padlock_dwarf/
extractor.rs

1// padlock-dwarf/src/extractor.rs
2
3use std::collections::HashMap;
4
5use gimli::{DebuggingInformationEntry, Reader, Unit, UnitOffset};
6use padlock_core::arch::ArchConfig;
7use padlock_core::ir::{AccessPattern, Field, StructLayout};
8
9pub struct Extractor<'a, R: Reader> {
10    pub(crate) dwarf: &'a gimli::Dwarf<R>,
11    pub(crate) arch: &'static ArchConfig,
12}
13
14impl<'a, R: Reader> Extractor<'a, R> {
15    pub fn new(dwarf: &'a gimli::Dwarf<R>, arch: &'static ArchConfig) -> Self {
16        Self { dwarf, arch }
17    }
18
19    pub fn extract_all(&self) -> anyhow::Result<Vec<StructLayout>> {
20        let mut layouts = Vec::new();
21
22        let mut iter = self.dwarf.units();
23        while let Some(header) = iter.next()? {
24            let unit = self.dwarf.unit(header)?;
25            self.extract_from_unit(&unit, &mut layouts)?;
26        }
27
28        Ok(layouts)
29    }
30
31    fn extract_from_unit(&self, unit: &Unit<R>, out: &mut Vec<StructLayout>) -> anyhow::Result<()> {
32        // First pass: build a map from struct offset → typedef name.
33        // Handles `typedef struct { ... } Foo` where the struct has no tag name.
34        let typedef_names = self.collect_typedef_names(unit)?;
35
36        let mut entries = unit.entries();
37        while let Some((_, entry)) = entries.next_dfs()? {
38            if entry.tag() == gimli::DW_TAG_structure_type
39                && let Some(mut layout) = self.extract_struct(unit, entry)?
40            {
41                if layout.name == "<anonymous>"
42                    && let Some(name) = typedef_names.get(&entry.offset())
43                {
44                    layout.name = name.clone();
45                }
46                out.push(layout);
47            }
48        }
49        Ok(())
50    }
51
52    /// Walk all top-level DIEs and collect DW_TAG_typedef entries that point
53    /// directly to a DW_TAG_structure_type, returning struct_offset → typedef_name.
54    fn collect_typedef_names(
55        &self,
56        unit: &Unit<R>,
57    ) -> anyhow::Result<HashMap<UnitOffset<R::Offset>, String>> {
58        let mut map = HashMap::new();
59        let mut entries = unit.entries();
60        while let Some((_, entry)) = entries.next_dfs()? {
61            if entry.tag() != gimli::DW_TAG_typedef {
62                continue;
63            }
64            let typedef_name = match self.attr_string(unit, entry, gimli::DW_AT_name)? {
65                Some(n) => n,
66                None => continue,
67            };
68            let struct_offset = match entry.attr_value(gimli::DW_AT_type)? {
69                Some(gimli::AttributeValue::UnitRef(off)) => off,
70                _ => continue,
71            };
72            map.insert(struct_offset, typedef_name);
73        }
74        Ok(map)
75    }
76
77    pub(crate) fn extract_struct(
78        &self,
79        unit: &Unit<R>,
80        entry: &DebuggingInformationEntry<R>,
81    ) -> anyhow::Result<Option<StructLayout>> {
82        if entry.attr(gimli::DW_AT_declaration)?.is_some() {
83            return Ok(None);
84        }
85
86        let name = self
87            .attr_string(unit, entry, gimli::DW_AT_name)?
88            .unwrap_or_else(|| "<anonymous>".to_string());
89
90        let total_size = match entry.attr_value(gimli::DW_AT_byte_size)? {
91            Some(gimli::AttributeValue::Udata(s)) => s as usize,
92            _ => return Ok(None),
93        };
94
95        let source_file = self.attr_string(unit, entry, gimli::DW_AT_decl_file)?;
96        let source_line = entry.attr_value(gimli::DW_AT_decl_line)?.and_then(|v| {
97            if let gimli::AttributeValue::Udata(n) = v {
98                Some(n as u32)
99            } else {
100                None
101            }
102        });
103
104        let mut fields = Vec::new();
105        let mut uncertain_fields: Vec<String> = Vec::new();
106
107        // Accumulates consecutive bitfield members at the same byte offset before
108        // flushing them as a single synthetic storage-unit field.
109        struct BitfieldGroup {
110            parts: Vec<String>, // "name:bits" labels
111            byte_offset: usize,
112            storage_bytes: usize, // from DW_AT_byte_size on member; 0 = unknown
113        }
114        let mut pending_bf: Option<BitfieldGroup> = None;
115
116        let flush_bf =
117            |group: BitfieldGroup, fields: &mut Vec<Field>, uncertain: &mut Vec<String>| {
118                if group.storage_bytes == 0 {
119                    // Storage unit size unknown; flag as uncertain so the user knows.
120                    uncertain.push(format!("[bf@{}]", group.byte_offset));
121                    return;
122                }
123                let field_name = if group.parts.is_empty() {
124                    "[__pad]".to_string()
125                } else {
126                    format!("[{}]", group.parts.join("|"))
127                };
128                use padlock_core::ir::TypeInfo;
129                fields.push(Field {
130                    name: field_name,
131                    ty: TypeInfo::Primitive {
132                        name: format!("uint{}_t", group.storage_bytes * 8),
133                        size: group.storage_bytes,
134                        align: group.storage_bytes,
135                    },
136                    offset: group.byte_offset,
137                    size: group.storage_bytes,
138                    align: group.storage_bytes,
139                    source_file: None,
140                    source_line: None,
141                    access: AccessPattern::Unknown,
142                });
143            };
144
145        let mut children = unit.entries_tree(Some(entry.offset()))?;
146        let root = children.root()?;
147        let mut child_iter = root.children();
148
149        while let Some(child) = child_iter.next()? {
150            let child_entry = child.entry();
151            if child_entry.tag() != gimli::DW_TAG_member {
152                continue;
153            }
154
155            let is_bitfield = child_entry.attr(gimli::DW_AT_bit_size)?.is_some();
156
157            if is_bitfield {
158                let byte_offset = match child_entry.attr_value(gimli::DW_AT_data_member_location)? {
159                    Some(gimli::AttributeValue::Udata(n)) => n as usize,
160                    Some(gimli::AttributeValue::Sdata(n)) => n as usize,
161                    _ => {
162                        // No byte offset — flush pending group and skip this member.
163                        if let Some(g) = pending_bf.take() {
164                            flush_bf(g, &mut fields, &mut uncertain_fields);
165                        }
166                        continue;
167                    }
168                };
169
170                let bit_size = match child_entry.attr_value(gimli::DW_AT_bit_size)? {
171                    Some(gimli::AttributeValue::Udata(n)) => n as usize,
172                    _ => 0,
173                };
174
175                // DW_AT_byte_size on a bitfield member gives the storage unit size.
176                let storage_bytes = match child_entry.attr_value(gimli::DW_AT_byte_size)? {
177                    Some(gimli::AttributeValue::Udata(n)) => n as usize,
178                    Some(gimli::AttributeValue::Data1(n)) => n as usize,
179                    Some(gimli::AttributeValue::Data2(n)) => n as usize,
180                    Some(gimli::AttributeValue::Data4(n)) => n as usize,
181                    _ => 0,
182                };
183
184                let member_name = self
185                    .attr_string(unit, child_entry, gimli::DW_AT_name)?
186                    .unwrap_or_default();
187
188                // If the pending group is at a different byte offset, flush it first.
189                if let Some(ref g) = pending_bf
190                    && g.byte_offset != byte_offset
191                {
192                    let g = pending_bf.take().unwrap();
193                    flush_bf(g, &mut fields, &mut uncertain_fields);
194                }
195
196                let group = pending_bf.get_or_insert(BitfieldGroup {
197                    parts: Vec::new(),
198                    byte_offset,
199                    storage_bytes: 0,
200                });
201                if !member_name.is_empty() && bit_size > 0 {
202                    group.parts.push(format!("{member_name}:{bit_size}"));
203                }
204                if storage_bytes > group.storage_bytes {
205                    group.storage_bytes = storage_bytes;
206                }
207            } else {
208                // Non-bitfield member — flush any pending bitfield group first.
209                if let Some(g) = pending_bf.take() {
210                    flush_bf(g, &mut fields, &mut uncertain_fields);
211                }
212                if let Some(field) = self.extract_field(unit, child_entry)? {
213                    fields.push(field);
214                }
215            }
216        }
217
218        // Flush any remaining bitfield group.
219        if let Some(g) = pending_bf.take() {
220            flush_bf(g, &mut fields, &mut uncertain_fields);
221        }
222
223        fields.sort_by_key(|f| f.offset);
224
225        Ok(Some(StructLayout {
226            name,
227            total_size,
228            align: fields.iter().map(|f| f.align).max().unwrap_or(1),
229            fields,
230            source_file,
231            source_line,
232            arch: self.arch,
233            is_packed: false,
234            is_union: false,
235            is_repr_rust: false,
236            suppressed_findings: Vec::new(),
237            uncertain_fields,
238        }))
239    }
240
241    fn extract_field(
242        &self,
243        unit: &Unit<R>,
244        entry: &DebuggingInformationEntry<R>,
245    ) -> anyhow::Result<Option<Field>> {
246        let name = self
247            .attr_string(unit, entry, gimli::DW_AT_name)?
248            .unwrap_or_else(|| "<unnamed>".to_string());
249
250        let offset = match entry.attr_value(gimli::DW_AT_data_member_location)? {
251            Some(gimli::AttributeValue::Udata(n)) => n as usize,
252            Some(gimli::AttributeValue::Sdata(n)) => n as usize,
253            _ => return Ok(None),
254        };
255
256        let type_offset = match entry.attr_value(gimli::DW_AT_type)? {
257            Some(gimli::AttributeValue::UnitRef(off)) => off,
258            _ => return Ok(None),
259        };
260
261        let (size, align, ty) = self.resolve_type(unit, type_offset)?;
262
263        Ok(Some(Field {
264            name,
265            ty,
266            offset,
267            size,
268            align,
269            source_file: None,
270            source_line: entry.attr_value(gimli::DW_AT_decl_line)?.and_then(|v| {
271                if let gimli::AttributeValue::Udata(n) = v {
272                    Some(n as u32)
273                } else {
274                    None
275                }
276            }),
277            access: AccessPattern::Unknown,
278        }))
279    }
280
281    pub(crate) fn attr_string(
282        &self,
283        unit: &Unit<R>,
284        entry: &DebuggingInformationEntry<R>,
285        attr: gimli::DwAt,
286    ) -> anyhow::Result<Option<String>> {
287        match entry.attr(attr)? {
288            Some(a) => match self.dwarf.attr_string(unit, a.value()) {
289                Ok(s) => Ok(Some(s.to_string_lossy()?.into_owned())),
290                Err(_) => Ok(None),
291            },
292            None => Ok(None),
293        }
294    }
295
296    pub(crate) fn attr_usize(
297        &self,
298        entry: &DebuggingInformationEntry<R>,
299        attr: gimli::DwAt,
300    ) -> anyhow::Result<Option<usize>> {
301        match entry.attr_value(attr)? {
302            Some(gimli::AttributeValue::Udata(n)) => Ok(Some(n as usize)),
303            Some(gimli::AttributeValue::Data1(n)) => Ok(Some(n as usize)),
304            Some(gimli::AttributeValue::Data2(n)) => Ok(Some(n as usize)),
305            Some(gimli::AttributeValue::Data4(n)) => Ok(Some(n as usize)),
306            Some(gimli::AttributeValue::Data8(n)) => Ok(Some(n as usize)),
307            _ => Ok(None),
308        }
309    }
310
311    pub(crate) fn extract_array_count(
312        &self,
313        unit: &Unit<R>,
314        entry: &DebuggingInformationEntry<R>,
315    ) -> anyhow::Result<usize> {
316        let mut children = unit.entries_tree(Some(entry.offset()))?;
317        let root = children.root()?;
318        let mut child_iter = root.children();
319
320        while let Some(child) = child_iter.next()? {
321            let child_entry = child.entry();
322            if child_entry.tag() == gimli::DW_TAG_subrange_type {
323                if let Some(count) = self.attr_usize(child_entry, gimli::DW_AT_count)? {
324                    return Ok(count);
325                }
326                if let Some(upper) = self.attr_usize(child_entry, gimli::DW_AT_upper_bound)? {
327                    return Ok(upper + 1);
328                }
329            }
330        }
331
332        Ok(0)
333    }
334}