rudy_dwarf/
modules.rs

1use std::{collections::BTreeMap, fmt};
2
3use crate::{
4    die::{cu::is_rust_cu, utils::get_string_attr},
5    visitor::{walk_file, DieVisitor, DieWalker, VisitorNode},
6    DebugFile, Die, DwarfDb,
7};
8
9#[derive(Default, Debug, Clone, PartialEq, Eq, Hash, salsa::Update)]
10pub struct Module {
11    pub(crate) modules: BTreeMap<String, Module>,
12    pub entries: Vec<Die>,
13}
14
15#[salsa::tracked(debug)]
16pub struct ModuleIndex<'db> {
17    #[returns(ref)]
18    pub by_offset: Vec<ModuleRange>,
19    #[returns(ref)]
20    pub by_name: Module,
21}
22
23impl<'db> ModuleIndex<'db> {
24    /// Find the module at a specific offset
25    pub fn find_by_offset(&self, db: &'db dyn DwarfDb, offset: usize) -> Option<&'db ModuleRange> {
26        let ranges = self.by_offset(db);
27        // Find the most specific (deepest) namespace that contains this offset
28        let pos = ranges
29            // find the first point at which the range starts _after_ the offset
30            .partition_point(|range| range.start_offset <= offset);
31
32        tracing::trace!("Partition point: {:?}", ranges.get(pos));
33
34        // then, work backwards to find the first range that contains the offset
35        ranges[..pos]
36            .iter()
37            .rev()
38            .find(|range| offset >= range.start_offset && offset < range.end_offset)
39    }
40
41    /// Find a module by its name
42    pub fn find_by_path(&self, db: &'db dyn DwarfDb, module_path: &[String]) -> Option<&Module> {
43        let mut module = self.by_name(db);
44
45        for segment in module_path {
46            module = module.modules.get(segment)?;
47        }
48
49        Some(module)
50    }
51}
52
53/// Namespace range representing a module's DIE offset coverage
54#[derive(Clone, PartialEq, Eq, Hash, salsa::Update)]
55pub struct ModuleRange {
56    pub(crate) module_path: Vec<String>,
57    pub(crate) die: Die,
58    pub(crate) start_offset: usize,
59    pub(crate) end_offset: usize,
60}
61
62impl fmt::Debug for ModuleRange {
63    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
64        salsa::with_attached_database(|db| {
65            f.debug_struct("ModuleRange")
66                .field("module_path", &self.module_path.join("::"))
67                .field("die", &self.die.location(db))
68                .field("start_offset", &format!("{:#010x}", self.start_offset))
69                .field("end_offset", &format!("{:#010x}", self.end_offset))
70                .finish()
71        })
72        .unwrap_or_else(|| {
73            f.debug_struct("ModuleRange")
74                .field("module_path", &self.module_path.join("::"))
75                .field("start_offset", &format!("{:#010x}", self.start_offset))
76                .field("end_offset", &format!("{:#010x}", self.end_offset))
77                .finish()
78        })
79    }
80}
81
82/// Visitor for building namespace ranges efficiently
83/// Only visits namespaces and uses depth-first traversal to capture ranges
84#[derive(Default)]
85struct ModuleRangeBuilder {
86    module_ranges: Vec<ModuleRange>,
87    modules: Module,
88    last_seen_offset: usize, // Last offset seen during traversal
89    namespace_stack: Vec<(String, Die, usize)>, // (path segment, die, start_offset)
90}
91
92impl<'db> DieVisitor<'db> for ModuleRangeBuilder {
93    fn visit_cu<'a>(
94        walker: &mut DieWalker<'a, 'db, Self>,
95        node: VisitorNode<'a>,
96    ) -> anyhow::Result<()> {
97        if is_rust_cu(&node.die, &node.unit_ref) {
98            walker.visitor.namespace_stack.clear();
99            let die = walker.get_die(node.die);
100            let start_offset = die.offset();
101            walker.visitor.last_seen_offset = start_offset;
102
103            walker
104                .visitor
105                .namespace_stack
106                .push((String::new(), die, start_offset));
107
108            walker.walk_cu()?;
109
110            // for the last range, we need to take the most recently inserted range
111            // and extend it to the end of the CU
112            let module_path = walker
113                .visitor
114                .namespace_stack
115                .iter()
116                .skip(1)
117                .map(|(segment, _, _)| segment.clone())
118                .collect::<Vec<_>>();
119
120            let end_offset = start_offset + node.unit_ref.unit.header.unit_length();
121            walker.visitor.module_ranges.push(ModuleRange {
122                module_path,
123                die,
124                start_offset: walker.visitor.last_seen_offset,
125                end_offset,
126            });
127        }
128        Ok(())
129    }
130
131    fn visit_die<'a>(
132        walker: &mut DieWalker<'a, 'db, Self>,
133        node: VisitorNode<'a>,
134    ) -> anyhow::Result<()> {
135        if node.die.tag() == gimli::DW_TAG_namespace {
136            // Only visit namespaces, skip other DIEs
137            Self::visit_namespace(walker, node)?;
138        }
139        Ok(())
140    }
141
142    fn visit_namespace<'a>(
143        walker: &mut DieWalker<'a, 'db, Self>,
144        node: VisitorNode<'a>,
145    ) -> anyhow::Result<()> {
146        let module_name = get_string_attr(&node.die, gimli::DW_AT_name, &node.unit_ref)
147            .unwrap()
148            .unwrap();
149
150        let die = walker.get_die(node.die);
151        let start_offset = die.offset();
152
153        if let Some((_, last_die, _)) = walker.visitor.namespace_stack.last() {
154            if walker.visitor.last_seen_offset < start_offset {
155                // If the last segment's start offset is before this one,
156                // we need to close the previous namespace range
157                let module_path = walker
158                    .visitor
159                    .namespace_stack
160                    .iter()
161                    .skip(1)
162                    .map(|(segment, _, _)| segment.clone())
163                    .collect::<Vec<_>>();
164
165                let range = ModuleRange {
166                    module_path,
167                    die: *last_die,
168                    start_offset: walker.visitor.last_seen_offset,
169                    end_offset: start_offset,
170                };
171
172                tracing::trace!("Closing module range for {range:#?}",);
173                walker.visitor.last_seen_offset = start_offset;
174
175                // Use the current offset as the end (we'll update this with the next entry)
176                walker.visitor.module_ranges.push(range);
177            }
178        }
179
180        walker
181            .visitor
182            .namespace_stack
183            .push((module_name.clone(), die, start_offset));
184
185        let mut module = &mut walker.visitor.modules;
186        // Traverse or create the module path
187        // (skipping the first segment which is always empty)
188        for (segment, _, _) in walker.visitor.namespace_stack.iter().skip(1) {
189            module = module.modules.entry(segment.clone()).or_default();
190        }
191
192        // push this namespace to the Die entries
193        module.entries.push(die);
194
195        tracing::trace!("Visiting namespace: {module_name} at offset {start_offset:#010x}",);
196        walker.walk_namespace()?;
197
198        // When we're done walking this namespace, record its range
199        if let Some(end_offset) = walker.peek_next_offset() {
200            if let Some((path, _, _)) = walker.visitor.namespace_stack.pop() {
201                let start = walker.visitor.last_seen_offset;
202                if end_offset > start {
203                    walker.visitor.last_seen_offset = end_offset;
204
205                    let mut module_path = walker
206                        .visitor
207                        .namespace_stack
208                        .iter()
209                        .skip(1)
210                        .map(|(segment, _, _)| segment.clone())
211                        .collect::<Vec<_>>();
212
213                    module_path.push(path);
214
215                    let range = ModuleRange {
216                        module_path,
217                        start_offset: start,
218                        end_offset,
219                        die,
220                    };
221                    tracing::trace!("Walked module range: {range:#?}");
222                    // Use the current offset as the end (we'll update this with the next entry)
223                    walker.visitor.module_ranges.push(range);
224                }
225            }
226        }
227
228        Ok(())
229    }
230}
231
232/// Build namespace ranges for efficient offset-based lookups
233/// This is used to efficiently resolve type contexts without full indexing
234#[salsa::tracked(returns(ref))]
235pub fn module_index<'db>(db: &'db dyn DwarfDb, debug_file: DebugFile) -> ModuleIndex<'db> {
236    let mut builder = ModuleRangeBuilder::default();
237    if let Err(e) = walk_file(db, debug_file, &mut builder) {
238        tracing::error!("Failed to walk file: {e}");
239    }
240
241    let ModuleRangeBuilder {
242        module_ranges,
243        modules,
244        namespace_stack: _,
245        last_seen_offset: _,
246    } = builder;
247    // Sort ranges by start offset and fix overlapping ranges
248    let mut ranges = module_ranges;
249    ranges.sort_by_key(|r| r.start_offset);
250
251    ModuleIndex::new(db, ranges, modules)
252}
253
254pub fn get_containing_module(db: &dyn DwarfDb, die: Die) -> Option<Vec<String>> {
255    let module_index = module_index(db, die.file);
256    let die_offset = die.offset();
257    module_index
258        .find_by_offset(db, die_offset)
259        .map(|range| range.module_path.clone())
260}
261
262#[cfg(test)]
263mod tests {
264    use super::*;
265    use crate::test_utils;
266
267    #[test]
268    fn test_module_index() {
269        let _guard = test_utils::init_tracing_and_insta();
270
271        let platform = "aarch64-unknown-linux-gnu";
272        let example_name = "examples/enums";
273
274        let artifacts = test_utils::artifacts_dir(Some(platform));
275        let db = test_utils::test_db(Some(platform));
276        let db = &db;
277        let binary = test_utils::load_binary(db, artifacts.join(example_name));
278
279        let (debug_files, _) = crate::symbols::index_symbol_map(db, binary).unwrap();
280
281        let mut output = String::new();
282
283        for (_, file) in debug_files {
284            let module_index = module_index(db, file);
285            let file_name = file.name(db);
286
287            let by_offset = module_index.by_offset(db);
288            let by_name = module_index.by_name(db);
289            output.push_str(&format!("\n=== {file_name} ===\n"));
290            salsa::attach(db, || {
291                output.push_str("===== By Offset =====\n\n");
292                for range in by_offset {
293                    output.push_str(&format!(
294                        "{:#010x} - {:#010x}: {}\n",
295                        range.start_offset,
296                        range.end_offset,
297                        range.module_path.join("::")
298                    ));
299                }
300
301                fn print_module(module: &Module, indent: usize, output: &mut String) {
302                    let indent_str = " ".repeat(indent);
303
304                    for (name, submodule) in &module.modules {
305                        output.push_str(&format!("{indent_str}{name}:\n"));
306                        print_module(submodule, indent + 2, output);
307                    }
308                }
309
310                output.push_str("\n===== By Name =====\n\n");
311
312                // for modules, just print the nested names
313                print_module(by_name, 0, &mut output);
314            });
315
316            // break;
317        }
318
319        insta::assert_snapshot!(output);
320    }
321}