1use std::collections::HashMap;
4
5use gimli::{DebuggingInformationEntry, Reader, Unit, UnitOffset};
6use padlock_core::arch::ArchConfig;
7use padlock_core::ir::{AccessPattern, Field, StructLayout};
8
9pub struct Extractor<'a, R: Reader> {
10 pub(crate) dwarf: &'a gimli::Dwarf<R>,
11 pub(crate) arch: &'static ArchConfig,
12}
13
14impl<'a, R: Reader> Extractor<'a, R> {
15 pub fn new(dwarf: &'a gimli::Dwarf<R>, arch: &'static ArchConfig) -> Self {
16 Self { dwarf, arch }
17 }
18
19 pub fn extract_all(&self) -> anyhow::Result<Vec<StructLayout>> {
20 let mut layouts = Vec::new();
21
22 let mut iter = self.dwarf.units();
23 while let Some(header) = iter.next()? {
24 let unit = self.dwarf.unit(header)?;
25 self.extract_from_unit(&unit, &mut layouts)?;
26 }
27
28 Ok(layouts)
29 }
30
31 fn extract_from_unit(&self, unit: &Unit<R>, out: &mut Vec<StructLayout>) -> anyhow::Result<()> {
32 let typedef_names = self.collect_typedef_names(unit)?;
35
36 let mut entries = unit.entries();
37 while let Some((_, entry)) = entries.next_dfs()? {
38 if entry.tag() == gimli::DW_TAG_structure_type
39 && let Some(mut layout) = self.extract_struct(unit, entry)?
40 {
41 if layout.name == "<anonymous>"
42 && let Some(name) = typedef_names.get(&entry.offset())
43 {
44 layout.name = name.clone();
45 }
46 out.push(layout);
47 }
48 }
49 Ok(())
50 }
51
52 fn collect_typedef_names(
55 &self,
56 unit: &Unit<R>,
57 ) -> anyhow::Result<HashMap<UnitOffset<R::Offset>, String>> {
58 let mut map = HashMap::new();
59 let mut entries = unit.entries();
60 while let Some((_, entry)) = entries.next_dfs()? {
61 if entry.tag() != gimli::DW_TAG_typedef {
62 continue;
63 }
64 let typedef_name = match self.attr_string(unit, entry, gimli::DW_AT_name)? {
65 Some(n) => n,
66 None => continue,
67 };
68 let struct_offset = match entry.attr_value(gimli::DW_AT_type)? {
69 Some(gimli::AttributeValue::UnitRef(off)) => off,
70 _ => continue,
71 };
72 map.insert(struct_offset, typedef_name);
73 }
74 Ok(map)
75 }
76
77 pub(crate) fn extract_struct(
78 &self,
79 unit: &Unit<R>,
80 entry: &DebuggingInformationEntry<R>,
81 ) -> anyhow::Result<Option<StructLayout>> {
82 if entry.attr(gimli::DW_AT_declaration)?.is_some() {
83 return Ok(None);
84 }
85
86 let name = self
87 .attr_string(unit, entry, gimli::DW_AT_name)?
88 .unwrap_or_else(|| "<anonymous>".to_string());
89
90 let total_size = match entry.attr_value(gimli::DW_AT_byte_size)? {
91 Some(gimli::AttributeValue::Udata(s)) => s as usize,
92 _ => return Ok(None),
93 };
94
95 let source_file = self.attr_string(unit, entry, gimli::DW_AT_decl_file)?;
96 let source_line = entry.attr_value(gimli::DW_AT_decl_line)?.and_then(|v| {
97 if let gimli::AttributeValue::Udata(n) = v {
98 Some(n as u32)
99 } else {
100 None
101 }
102 });
103
104 let mut fields = Vec::new();
105 let mut children = unit.entries_tree(Some(entry.offset()))?;
106 let root = children.root()?;
107 let mut child_iter = root.children();
108
109 while let Some(child) = child_iter.next()? {
110 let child_entry = child.entry();
111 if child_entry.tag() == gimli::DW_TAG_member
112 && let Some(field) = self.extract_field(unit, child_entry)?
113 {
114 fields.push(field);
115 }
116 }
117
118 fields.sort_by_key(|f| f.offset);
119
120 Ok(Some(StructLayout {
121 name,
122 total_size,
123 align: fields.iter().map(|f| f.align).max().unwrap_or(1),
124 fields,
125 source_file,
126 source_line,
127 arch: self.arch,
128 is_packed: false,
129 is_union: false,
130 }))
131 }
132
133 fn extract_field(
134 &self,
135 unit: &Unit<R>,
136 entry: &DebuggingInformationEntry<R>,
137 ) -> anyhow::Result<Option<Field>> {
138 let name = self
139 .attr_string(unit, entry, gimli::DW_AT_name)?
140 .unwrap_or_else(|| "<unnamed>".to_string());
141
142 let offset = match entry.attr_value(gimli::DW_AT_data_member_location)? {
143 Some(gimli::AttributeValue::Udata(n)) => n as usize,
144 Some(gimli::AttributeValue::Sdata(n)) => n as usize,
145 _ => return Ok(None),
146 };
147
148 if entry.attr(gimli::DW_AT_bit_size)?.is_some() {
153 return Ok(None);
154 }
155
156 let type_offset = match entry.attr_value(gimli::DW_AT_type)? {
157 Some(gimli::AttributeValue::UnitRef(off)) => off,
158 _ => return Ok(None),
159 };
160
161 let (size, align, ty) = self.resolve_type(unit, type_offset)?;
162
163 Ok(Some(Field {
164 name,
165 ty,
166 offset,
167 size,
168 align,
169 source_file: None,
170 source_line: entry.attr_value(gimli::DW_AT_decl_line)?.and_then(|v| {
171 if let gimli::AttributeValue::Udata(n) = v {
172 Some(n as u32)
173 } else {
174 None
175 }
176 }),
177 access: AccessPattern::Unknown,
178 }))
179 }
180
181 pub(crate) fn attr_string(
182 &self,
183 unit: &Unit<R>,
184 entry: &DebuggingInformationEntry<R>,
185 attr: gimli::DwAt,
186 ) -> anyhow::Result<Option<String>> {
187 match entry.attr(attr)? {
188 Some(a) => match self.dwarf.attr_string(unit, a.value()) {
189 Ok(s) => Ok(Some(s.to_string_lossy()?.into_owned())),
190 Err(_) => Ok(None),
191 },
192 None => Ok(None),
193 }
194 }
195
196 pub(crate) fn attr_usize(
197 &self,
198 entry: &DebuggingInformationEntry<R>,
199 attr: gimli::DwAt,
200 ) -> anyhow::Result<Option<usize>> {
201 match entry.attr_value(attr)? {
202 Some(gimli::AttributeValue::Udata(n)) => Ok(Some(n as usize)),
203 Some(gimli::AttributeValue::Data1(n)) => Ok(Some(n as usize)),
204 Some(gimli::AttributeValue::Data2(n)) => Ok(Some(n as usize)),
205 Some(gimli::AttributeValue::Data4(n)) => Ok(Some(n as usize)),
206 Some(gimli::AttributeValue::Data8(n)) => Ok(Some(n as usize)),
207 _ => Ok(None),
208 }
209 }
210
211 pub(crate) fn extract_array_count(
212 &self,
213 unit: &Unit<R>,
214 entry: &DebuggingInformationEntry<R>,
215 ) -> anyhow::Result<usize> {
216 let mut children = unit.entries_tree(Some(entry.offset()))?;
217 let root = children.root()?;
218 let mut child_iter = root.children();
219
220 while let Some(child) = child_iter.next()? {
221 let child_entry = child.entry();
222 if child_entry.tag() == gimli::DW_TAG_subrange_type {
223 if let Some(count) = self.attr_usize(child_entry, gimli::DW_AT_count)? {
224 return Ok(count);
225 }
226 if let Some(upper) = self.attr_usize(child_entry, gimli::DW_AT_upper_bound)? {
227 return Ok(upper + 1);
228 }
229 }
230 }
231
232 Ok(0)
233 }
234}