1use std::collections::HashMap;
13
14use gimli::constants;
15use gimli::{
16 AttributeValue, DebuggingInformationEntry, EndianSlice, LittleEndian, Reader, UnitOffset,
17};
18use ud_ast::{Param, Type};
19use ud_format::elf::Elf64File;
20
21#[derive(Debug, thiserror::Error)]
22pub enum DebugError {
23 #[error("DWARF parser rejected the input: {0}")]
24 Gimli(#[source] gimli::Error),
25}
26
27impl From<gimli::Error> for DebugError {
28 fn from(e: gimli::Error) -> Self {
29 Self::Gimli(e)
30 }
31}
32
33#[derive(Debug, Clone)]
35pub struct DebugFunction {
36 pub addr: u64,
37 pub name: String,
38 pub return_type: Type,
39 pub params: Vec<Param>,
40}
41
42pub fn read_subprograms(elf: &Elf64File) -> Result<Vec<DebugFunction>, DebugError> {
45 let Some(dwarf) = load_dwarf(elf) else {
46 return Ok(Vec::new());
47 };
48
49 let mut out = Vec::new();
50 let mut units = dwarf.units();
51 while let Some(header) = units.next()? {
52 let unit = dwarf.unit(header)?;
53 let mut tree = unit.entries_tree(None)?;
54 let root = tree.root()?;
55 walk_for_subprograms(&dwarf, &unit, root, &mut out)?;
56 }
57 Ok(out)
58}
59
60fn walk_for_subprograms<R>(
61 dwarf: &gimli::Dwarf<R>,
62 unit: &gimli::Unit<R>,
63 node: gimli::EntriesTreeNode<R>,
64 out: &mut Vec<DebugFunction>,
65) -> Result<(), DebugError>
66where
67 R: Reader<Offset = usize>,
68{
69 let entry_offset = node.entry().offset();
70 let entry_tag = node.entry().tag();
71 if entry_tag == constants::DW_TAG_subprogram {
72 if let Some(func) = read_subprogram(dwarf, unit, entry_offset)? {
75 out.push(func);
76 }
77 }
78 let mut children = node.children();
79 while let Some(child) = children.next()? {
80 walk_for_subprograms(dwarf, unit, child, out)?;
81 }
82 Ok(())
83}
84
85type SectionData<'a> = EndianSlice<'a, LittleEndian>;
86
87fn load_dwarf(elf: &Elf64File) -> Option<gimli::Dwarf<SectionData<'_>>> {
88 let load_section = |id: gimli::SectionId| -> Result<SectionData<'_>, gimli::Error> {
89 let name = id.name();
90 let bytes = elf.section_by_name(name).map_or(&[][..], |(_, _, b)| b);
91 Ok(EndianSlice::new(bytes, LittleEndian))
92 };
93 gimli::Dwarf::load(load_section).ok()
94}
95
96fn read_subprogram<R>(
97 dwarf: &gimli::Dwarf<R>,
98 unit: &gimli::Unit<R>,
99 offset: UnitOffset,
100) -> Result<Option<DebugFunction>, DebugError>
101where
102 R: Reader<Offset = usize>,
103{
104 let mut tree = unit.entries_tree(Some(offset))?;
106 let root = tree.root()?;
107
108 let entry = root.entry();
109 let Some(addr) = read_low_pc(entry) else {
110 return Ok(None);
111 };
112 let Some(name) = read_name(dwarf, unit, entry)? else {
113 return Ok(None);
114 };
115 let return_type = match attr_unit_ref(entry, constants::DW_AT_type) {
116 Some(off) => resolve_type_at(unit, off)?,
117 None => Type::Void,
118 };
119
120 let mut params = Vec::new();
121 let mut children = root.children();
122 while let Some(child) = children.next()? {
123 let centry = child.entry();
124 if centry.tag() == constants::DW_TAG_formal_parameter {
125 let pname = read_name(dwarf, unit, centry)?.unwrap_or_default();
126 let pty = match attr_unit_ref(centry, constants::DW_AT_type) {
127 Some(off) => resolve_type_at(unit, off)?,
128 None => Type::Unknown,
129 };
130 params.push(Param {
131 name: pname,
132 ty: pty,
133 location: None,
134 });
135 }
136 }
137
138 Ok(Some(DebugFunction {
139 addr,
140 name,
141 return_type,
142 params,
143 }))
144}
145
146fn attr_unit_ref<R: Reader<Offset = usize>>(
147 entry: &DebuggingInformationEntry<R>,
148 name: constants::DwAt,
149) -> Option<UnitOffset> {
150 match entry.attr_value(name) {
151 Some(AttributeValue::UnitRef(off)) => Some(off),
152 _ => None,
153 }
154}
155
156fn read_low_pc<R: Reader>(entry: &DebuggingInformationEntry<R>) -> Option<u64> {
157 if let Some(AttributeValue::Addr(a)) = entry.attr_value(constants::DW_AT_low_pc) {
158 return Some(a);
159 }
160 None
161}
162
163fn read_name<R>(
164 dwarf: &gimli::Dwarf<R>,
165 unit: &gimli::Unit<R>,
166 entry: &DebuggingInformationEntry<R>,
167) -> Result<Option<String>, DebugError>
168where
169 R: Reader<Offset = usize>,
170{
171 let Some(value) = entry.attr_value(constants::DW_AT_name) else {
172 return Ok(None);
173 };
174 attr_string(dwarf, unit, value)
175}
176
177fn attr_string<R>(
178 dwarf: &gimli::Dwarf<R>,
179 unit: &gimli::Unit<R>,
180 value: AttributeValue<R>,
181) -> Result<Option<String>, DebugError>
182where
183 R: Reader<Offset = usize>,
184{
185 let s = dwarf.attr_string(unit, value)?;
186 let bytes = s.to_slice()?;
187 Ok(std::str::from_utf8(&bytes).ok().map(str::to_owned))
188}
189
190fn resolve_type_at<R>(unit: &gimli::Unit<R>, off: UnitOffset) -> Result<Type, DebugError>
191where
192 R: Reader<Offset = usize>,
193{
194 resolve_type_inner(unit, off, 0, &mut HashMap::new())
196}
197
198fn resolve_type_inner<R>(
199 unit: &gimli::Unit<R>,
200 off: UnitOffset,
201 depth: u32,
202 cache: &mut HashMap<UnitOffset, Type>,
203) -> Result<Type, DebugError>
204where
205 R: Reader<Offset = usize>,
206{
207 if depth > 32 {
208 return Ok(Type::Unknown);
209 }
210 if let Some(t) = cache.get(&off) {
211 return Ok(t.clone());
212 }
213 let mut tree = unit.entries_tree(Some(off))?;
214 let root = tree.root()?;
215 let entry = root.entry();
216
217 let resolved = match entry.tag() {
218 constants::DW_TAG_base_type => resolve_base_type(entry),
219 constants::DW_TAG_pointer_type => {
220 let inner = match attr_unit_ref(entry, constants::DW_AT_type) {
221 Some(o) => resolve_type_inner(unit, o, depth + 1, cache)?,
222 None => Type::Void,
223 };
224 Type::Pointer(Box::new(inner))
225 }
226 constants::DW_TAG_const_type
227 | constants::DW_TAG_volatile_type
228 | constants::DW_TAG_restrict_type
229 | constants::DW_TAG_typedef => match attr_unit_ref(entry, constants::DW_AT_type) {
230 Some(o) => resolve_type_inner(unit, o, depth + 1, cache)?,
231 None => Type::Unknown,
232 },
233 _ => Type::Unknown,
234 };
235
236 cache.insert(off, resolved.clone());
237 Ok(resolved)
238}
239
240fn resolve_base_type<R: Reader>(entry: &DebuggingInformationEntry<R>) -> Type {
241 let size = match entry.attr_value(constants::DW_AT_byte_size) {
242 Some(AttributeValue::Udata(n) | AttributeValue::Data8(n)) => n,
243 Some(AttributeValue::Data1(n)) => u64::from(n),
244 Some(AttributeValue::Data2(n)) => u64::from(n),
245 Some(AttributeValue::Data4(n)) => u64::from(n),
246 _ => return Type::Unknown,
247 };
248 let Some(AttributeValue::Encoding(encoding)) = entry.attr_value(constants::DW_AT_encoding)
249 else {
250 return Type::Unknown;
251 };
252
253 match (encoding, size) {
254 (constants::DW_ATE_boolean, _) => Type::Bool,
255 (constants::DW_ATE_signed_char | constants::DW_ATE_unsigned_char, _) => Type::Char,
256 (constants::DW_ATE_signed, 1) => Type::I8,
257 (constants::DW_ATE_signed, 2) => Type::I16,
258 (constants::DW_ATE_signed, 4) => Type::I32,
259 (constants::DW_ATE_signed, 8) => Type::I64,
260 (constants::DW_ATE_unsigned, 1) => Type::U8,
261 (constants::DW_ATE_unsigned, 2) => Type::U16,
262 (constants::DW_ATE_unsigned, 4) => Type::U32,
263 (constants::DW_ATE_unsigned, 8) => Type::U64,
264 (constants::DW_ATE_float, 4) => Type::F32,
265 (constants::DW_ATE_float, 8) => Type::F64,
266 _ => Type::Unknown,
267 }
268}