Skip to main content

ud_debug/
dwarf.rs

1//! DWARF reader: walks `.debug_info` for `DW_TAG_subprogram` DIEs and
2//! produces structured function-signature records.
3//!
4//! Type resolution is intentionally narrow at v0: `DW_TAG_base_type`
5//! becomes one of the [`ud_ast::Type`] primitives based on its
6//! `DW_AT_byte_size` + `DW_AT_encoding` (signed/unsigned/float/bool/
7//! char/utf), and `DW_TAG_pointer_type` recurses to its pointee. Type
8//! qualifiers (`const`, `volatile`, `typedef`) transparently unwrap to
9//! the underlying type. Anything else (composite types, function
10//! pointers) falls through to [`ud_ast::Type::Unknown`].
11
12use std::collections::HashMap;
13
14use gimli::constants;
15use gimli::{
16    AttributeValue, DebuggingInformationEntry, EndianSlice, LittleEndian, Reader, UnitOffset,
17};
18use ud_ast::{Param, Type};
19use ud_format::elf::Elf64File;
20
21#[derive(Debug, thiserror::Error)]
22pub enum DebugError {
23    #[error("DWARF parser rejected the input: {0}")]
24    Gimli(#[source] gimli::Error),
25}
26
27impl From<gimli::Error> for DebugError {
28    fn from(e: gimli::Error) -> Self {
29        Self::Gimli(e)
30    }
31}
32
33/// One function's DWARF-recovered signature.
34#[derive(Debug, Clone)]
35pub struct DebugFunction {
36    pub addr: u64,
37    pub name: String,
38    pub return_type: Type,
39    pub params: Vec<Param>,
40}
41
42/// Walk every `DW_TAG_subprogram` in `.debug_info` and produce a
43/// [`DebugFunction`] for each one with a known address.
44pub fn read_subprograms(elf: &Elf64File) -> Result<Vec<DebugFunction>, DebugError> {
45    let Some(dwarf) = load_dwarf(elf) else {
46        return Ok(Vec::new());
47    };
48
49    let mut out = Vec::new();
50    let mut units = dwarf.units();
51    while let Some(header) = units.next()? {
52        let unit = dwarf.unit(header)?;
53        let mut tree = unit.entries_tree(None)?;
54        let root = tree.root()?;
55        walk_for_subprograms(&dwarf, &unit, root, &mut out)?;
56    }
57    Ok(out)
58}
59
60fn walk_for_subprograms<R>(
61    dwarf: &gimli::Dwarf<R>,
62    unit: &gimli::Unit<R>,
63    node: gimli::EntriesTreeNode<R>,
64    out: &mut Vec<DebugFunction>,
65) -> Result<(), DebugError>
66where
67    R: Reader<Offset = usize>,
68{
69    let entry_offset = node.entry().offset();
70    let entry_tag = node.entry().tag();
71    if entry_tag == constants::DW_TAG_subprogram {
72        // Need to re-fetch via entries_at_offset to read params from
73        // children — the node is consumed by `children()`.
74        if let Some(func) = read_subprogram(dwarf, unit, entry_offset)? {
75            out.push(func);
76        }
77    }
78    let mut children = node.children();
79    while let Some(child) = children.next()? {
80        walk_for_subprograms(dwarf, unit, child, out)?;
81    }
82    Ok(())
83}
84
85type SectionData<'a> = EndianSlice<'a, LittleEndian>;
86
87fn load_dwarf(elf: &Elf64File) -> Option<gimli::Dwarf<SectionData<'_>>> {
88    let load_section = |id: gimli::SectionId| -> Result<SectionData<'_>, gimli::Error> {
89        let name = id.name();
90        let bytes = elf.section_by_name(name).map_or(&[][..], |(_, _, b)| b);
91        Ok(EndianSlice::new(bytes, LittleEndian))
92    };
93    gimli::Dwarf::load(load_section).ok()
94}
95
96fn read_subprogram<R>(
97    dwarf: &gimli::Dwarf<R>,
98    unit: &gimli::Unit<R>,
99    offset: UnitOffset,
100) -> Result<Option<DebugFunction>, DebugError>
101where
102    R: Reader<Offset = usize>,
103{
104    // entries_tree lets us re-walk the subprogram's children.
105    let mut tree = unit.entries_tree(Some(offset))?;
106    let root = tree.root()?;
107
108    let entry = root.entry();
109    let Some(addr) = read_low_pc(entry) else {
110        return Ok(None);
111    };
112    let Some(name) = read_name(dwarf, unit, entry)? else {
113        return Ok(None);
114    };
115    let return_type = match attr_unit_ref(entry, constants::DW_AT_type) {
116        Some(off) => resolve_type_at(unit, off)?,
117        None => Type::Void,
118    };
119
120    let mut params = Vec::new();
121    let mut children = root.children();
122    while let Some(child) = children.next()? {
123        let centry = child.entry();
124        if centry.tag() == constants::DW_TAG_formal_parameter {
125            let pname = read_name(dwarf, unit, centry)?.unwrap_or_default();
126            let pty = match attr_unit_ref(centry, constants::DW_AT_type) {
127                Some(off) => resolve_type_at(unit, off)?,
128                None => Type::Unknown,
129            };
130            params.push(Param {
131                name: pname,
132                ty: pty,
133                location: None,
134            });
135        }
136    }
137
138    Ok(Some(DebugFunction {
139        addr,
140        name,
141        return_type,
142        params,
143    }))
144}
145
146fn attr_unit_ref<R: Reader<Offset = usize>>(
147    entry: &DebuggingInformationEntry<R>,
148    name: constants::DwAt,
149) -> Option<UnitOffset> {
150    match entry.attr_value(name) {
151        Some(AttributeValue::UnitRef(off)) => Some(off),
152        _ => None,
153    }
154}
155
156fn read_low_pc<R: Reader>(entry: &DebuggingInformationEntry<R>) -> Option<u64> {
157    if let Some(AttributeValue::Addr(a)) = entry.attr_value(constants::DW_AT_low_pc) {
158        return Some(a);
159    }
160    None
161}
162
163fn read_name<R>(
164    dwarf: &gimli::Dwarf<R>,
165    unit: &gimli::Unit<R>,
166    entry: &DebuggingInformationEntry<R>,
167) -> Result<Option<String>, DebugError>
168where
169    R: Reader<Offset = usize>,
170{
171    let Some(value) = entry.attr_value(constants::DW_AT_name) else {
172        return Ok(None);
173    };
174    attr_string(dwarf, unit, value)
175}
176
177fn attr_string<R>(
178    dwarf: &gimli::Dwarf<R>,
179    unit: &gimli::Unit<R>,
180    value: AttributeValue<R>,
181) -> Result<Option<String>, DebugError>
182where
183    R: Reader<Offset = usize>,
184{
185    let s = dwarf.attr_string(unit, value)?;
186    let bytes = s.to_slice()?;
187    Ok(std::str::from_utf8(&bytes).ok().map(str::to_owned))
188}
189
190fn resolve_type_at<R>(unit: &gimli::Unit<R>, off: UnitOffset) -> Result<Type, DebugError>
191where
192    R: Reader<Offset = usize>,
193{
194    // Bound recursion in case of malformed cycles (typedef → typedef → …).
195    resolve_type_inner(unit, off, 0, &mut HashMap::new())
196}
197
198fn resolve_type_inner<R>(
199    unit: &gimli::Unit<R>,
200    off: UnitOffset,
201    depth: u32,
202    cache: &mut HashMap<UnitOffset, Type>,
203) -> Result<Type, DebugError>
204where
205    R: Reader<Offset = usize>,
206{
207    if depth > 32 {
208        return Ok(Type::Unknown);
209    }
210    if let Some(t) = cache.get(&off) {
211        return Ok(t.clone());
212    }
213    let mut tree = unit.entries_tree(Some(off))?;
214    let root = tree.root()?;
215    let entry = root.entry();
216
217    let resolved = match entry.tag() {
218        constants::DW_TAG_base_type => resolve_base_type(entry),
219        constants::DW_TAG_pointer_type => {
220            let inner = match attr_unit_ref(entry, constants::DW_AT_type) {
221                Some(o) => resolve_type_inner(unit, o, depth + 1, cache)?,
222                None => Type::Void,
223            };
224            Type::Pointer(Box::new(inner))
225        }
226        constants::DW_TAG_const_type
227        | constants::DW_TAG_volatile_type
228        | constants::DW_TAG_restrict_type
229        | constants::DW_TAG_typedef => match attr_unit_ref(entry, constants::DW_AT_type) {
230            Some(o) => resolve_type_inner(unit, o, depth + 1, cache)?,
231            None => Type::Unknown,
232        },
233        _ => Type::Unknown,
234    };
235
236    cache.insert(off, resolved.clone());
237    Ok(resolved)
238}
239
240fn resolve_base_type<R: Reader>(entry: &DebuggingInformationEntry<R>) -> Type {
241    let size = match entry.attr_value(constants::DW_AT_byte_size) {
242        Some(AttributeValue::Udata(n) | AttributeValue::Data8(n)) => n,
243        Some(AttributeValue::Data1(n)) => u64::from(n),
244        Some(AttributeValue::Data2(n)) => u64::from(n),
245        Some(AttributeValue::Data4(n)) => u64::from(n),
246        _ => return Type::Unknown,
247    };
248    let Some(AttributeValue::Encoding(encoding)) = entry.attr_value(constants::DW_AT_encoding)
249    else {
250        return Type::Unknown;
251    };
252
253    match (encoding, size) {
254        (constants::DW_ATE_boolean, _) => Type::Bool,
255        (constants::DW_ATE_signed_char | constants::DW_ATE_unsigned_char, _) => Type::Char,
256        (constants::DW_ATE_signed, 1) => Type::I8,
257        (constants::DW_ATE_signed, 2) => Type::I16,
258        (constants::DW_ATE_signed, 4) => Type::I32,
259        (constants::DW_ATE_signed, 8) => Type::I64,
260        (constants::DW_ATE_unsigned, 1) => Type::U8,
261        (constants::DW_ATE_unsigned, 2) => Type::U16,
262        (constants::DW_ATE_unsigned, 4) => Type::U32,
263        (constants::DW_ATE_unsigned, 8) => Type::U64,
264        (constants::DW_ATE_float, 4) => Type::F32,
265        (constants::DW_ATE_float, 8) => Type::F64,
266        _ => Type::Unknown,
267    }
268}