Skip to main content

profile_bee/
probe_resolver.rs

1//! Symbol discovery engine for resolving probe specs to concrete uprobe targets.
2//!
3//! Scans ELF binaries (via /proc/pid/maps or system library paths) to find
4//! symbols matching a `ProbeSpec`. Supports exact, glob, regex, and demangled
5//! name matching, as well as source-location resolution via DWARF debug info.
6
7use std::collections::HashMap;
8use std::fs;
9use std::path::{Path, PathBuf};
10
11use addr2line::demangle;
12use gimli::{self, EndianSlice, NativeEndian};
13use object::{Object, ObjectSymbol};
14use procfs::process::{MMPermissions, MMapPath, Process};
15
16use crate::probe_spec::{ProbeSpec, SymbolPattern};
17
18/// A resolved probe target — a concrete (library, symbol, offset) triple
19/// that can be passed to `UProbe::attach()`.
20#[derive(Debug, Clone)]
21pub struct ResolvedProbe {
22    /// Absolute path to the ELF binary or shared library.
23    pub library_path: PathBuf,
24    /// The raw (mangled) symbol name as it appears in the ELF.
25    pub symbol_name: String,
26    /// Byte offset from the start of the symbol (from the spec's +offset).
27    pub offset: u64,
28    /// Virtual address of the symbol in the ELF file.
29    pub address: u64,
30    /// Symbol size (if available from ELF).
31    pub size: u64,
32    /// Demangled name (if different from symbol_name).
33    pub demangled: Option<String>,
34    /// Whether this should be a return probe.
35    pub is_ret: bool,
36}
37
38/// Resolves probe specifications to concrete uprobe attach targets.
39pub struct ProbeResolver;
40
41impl ProbeResolver {
42    pub fn new() -> Self {
43        Self
44    }
45
46    /// Resolve a probe spec for a running process.
47    ///
48    /// Reads /proc/<pid>/maps, scans each mapped executable ELF for matching symbols.
49    pub fn resolve_for_pid(
50        &self,
51        spec: &ProbeSpec,
52        pid: u32,
53    ) -> Result<Vec<ResolvedProbe>, String> {
54        match spec {
55            ProbeSpec::Symbol {
56                library,
57                pattern,
58                offset,
59                is_ret,
60            } => {
61                let maps = self.get_executable_maps(pid)?;
62                let mut results = Vec::new();
63
64                for (path, _start, _end) in &maps {
65                    // If library filter is set, check it
66                    if let Some(lib_filter) = library {
67                        if !library_matches(lib_filter, path) {
68                            continue;
69                        }
70                    }
71
72                    match self.scan_elf_for_symbols(path, pattern, *offset, *is_ret) {
73                        Ok(mut probes) => results.append(&mut probes),
74                        Err(e) => {
75                            // Non-fatal: some mapped files may not be readable
76                            tracing::debug!("skipping {}: {}", path.display(), e);
77                        }
78                    }
79                }
80
81                Ok(results)
82            }
83            ProbeSpec::SourceLocation { file, line, is_ret } => {
84                let maps = self.get_executable_maps(pid)?;
85                let mut results = Vec::new();
86
87                for (path, _start, _end) in &maps {
88                    match self.resolve_source_location(path, file, *line, *is_ret) {
89                        Ok(mut probes) => results.append(&mut probes),
90                        Err(_) => continue,
91                    }
92                }
93
94                Ok(results)
95            }
96        }
97    }
98
99    /// Resolve a probe spec by scanning system library paths.
100    ///
101    /// Used when no --pid is specified. Scans well-known library directories
102    /// and ldconfig cache.
103    pub fn resolve_system_wide(&self, spec: &ProbeSpec) -> Result<Vec<ResolvedProbe>, String> {
104        match spec {
105            ProbeSpec::Symbol {
106                library,
107                pattern,
108                offset,
109                is_ret,
110            } => {
111                let lib_paths = if let Some(lib_filter) = library {
112                    // If a specific library is named, find it
113                    self.find_library_paths(lib_filter)?
114                } else {
115                    // Scan common library directories
116                    self.get_system_libraries()?
117                };
118
119                let mut results = Vec::new();
120                for path in &lib_paths {
121                    match self.scan_elf_for_symbols(path, pattern, *offset, *is_ret) {
122                        Ok(mut probes) => results.append(&mut probes),
123                        Err(_) => continue,
124                    }
125                }
126
127                Ok(results)
128            }
129            ProbeSpec::SourceLocation { file, line, is_ret } => {
130                // For source locations without a PID, there's not much we can do
131                // besides scanning system libraries for DWARF info
132                let lib_paths = self.get_system_libraries()?;
133                let mut results = Vec::new();
134
135                for path in &lib_paths {
136                    match self.resolve_source_location(path, file, *line, *is_ret) {
137                        Ok(mut probes) => results.append(&mut probes),
138                        Err(_) => continue,
139                    }
140                }
141
142                Ok(results)
143            }
144        }
145    }
146
147    /// Get all executable memory-mapped files for a process.
148    fn get_executable_maps(&self, pid: u32) -> Result<Vec<(PathBuf, u64, u64)>, String> {
149        let process =
150            Process::new(pid as i32).map_err(|e| format!("failed to open /proc/{}: {}", pid, e))?;
151
152        let maps = process
153            .maps()
154            .map_err(|e| format!("failed to read /proc/{}/maps: {}", pid, e))?;
155
156        let root_path = format!("/proc/{}/root", pid);
157        let mut seen = HashMap::new();
158        let mut result = Vec::new();
159
160        for map in maps.iter() {
161            if !map.perms.contains(MMPermissions::EXECUTE)
162                || !map.perms.contains(MMPermissions::READ)
163            {
164                continue;
165            }
166
167            let file_path = match &map.pathname {
168                MMapPath::Path(p) => p.to_path_buf(),
169                _ => continue,
170            };
171
172            // Deduplicate: same binary may be mapped multiple times
173            if seen.contains_key(&file_path) {
174                continue;
175            }
176            seen.insert(file_path.clone(), ());
177
178            // Resolve through /proc/<pid>/root for container/namespace support
179            let resolved = if file_path.is_absolute() {
180                let ns_path = PathBuf::from(&root_path)
181                    .join(file_path.strip_prefix("/").unwrap_or(&file_path));
182                if ns_path.exists() {
183                    ns_path
184                } else {
185                    file_path
186                }
187            } else {
188                file_path
189            };
190
191            result.push((resolved, map.address.0, map.address.1));
192        }
193
194        Ok(result)
195    }
196
197    /// Scan an ELF binary for symbols matching the given pattern.
198    fn scan_elf_for_symbols(
199        &self,
200        path: &Path,
201        pattern: &SymbolPattern,
202        spec_offset: u64,
203        is_ret: bool,
204    ) -> Result<Vec<ResolvedProbe>, String> {
205        let data = fs::read(path).map_err(|e| format!("cannot read {}: {}", path.display(), e))?;
206
207        let obj = object::File::parse(&*data)
208            .map_err(|e| format!("cannot parse ELF {}: {}", path.display(), e))?;
209
210        let mut results = Vec::new();
211        let mut seen_names = HashMap::new();
212
213        // Iterate both .symtab and .dynsym
214        for symbol in obj.symbols().chain(obj.dynamic_symbols()) {
215            let name = match symbol.name() {
216                Ok(n) if !n.is_empty() => n,
217                _ => continue,
218            };
219
220            // Skip non-function symbols
221            if symbol.kind() != object::SymbolKind::Text {
222                continue;
223            }
224
225            // Skip undefined (imported) symbols
226            if symbol.is_undefined() {
227                continue;
228            }
229
230            // Deduplicate
231            if seen_names.contains_key(name) {
232                continue;
233            }
234
235            let matched = match pattern {
236                SymbolPattern::Exact(_) | SymbolPattern::Glob(_) | SymbolPattern::Regex(_) => {
237                    pattern.matches(name)
238                }
239                SymbolPattern::Demangled(_) => {
240                    // Demangle and check
241                    let demangled = try_demangle(name);
242                    if let Some(ref dm) = demangled {
243                        pattern.matches_demangled(dm)
244                    } else {
245                        // No demangling possible, try raw name
246                        pattern.matches_demangled(name)
247                    }
248                }
249            };
250
251            if matched {
252                seen_names.insert(name.to_string(), ());
253                let demangled = try_demangle(name);
254
255                results.push(ResolvedProbe {
256                    library_path: path.to_path_buf(),
257                    symbol_name: name.to_string(),
258                    offset: spec_offset,
259                    address: symbol.address(),
260                    size: symbol.size(),
261                    demangled,
262                    is_ret,
263                });
264            }
265        }
266
267        Ok(results)
268    }
269
270    /// Resolve a source file:line to a probe target using DWARF debug info.
271    fn resolve_source_location(
272        &self,
273        elf_path: &Path,
274        target_file: &str,
275        target_line: u32,
276        is_ret: bool,
277    ) -> Result<Vec<ResolvedProbe>, String> {
278        let data =
279            fs::read(elf_path).map_err(|e| format!("cannot read {}: {}", elf_path.display(), e))?;
280
281        let obj = object::File::parse(&*data)
282            .map_err(|e| format!("cannot parse ELF {}: {}", elf_path.display(), e))?;
283
284        // Load DWARF sections as EndianSlice references into `data`
285        let load_section =
286            |id: gimli::SectionId| -> Result<EndianSlice<'_, NativeEndian>, gimli::Error> {
287                let slice = obj
288                    .section_by_name(id.name())
289                    .and_then(|s| {
290                        use object::ObjectSection;
291                        s.data().ok()
292                    })
293                    .unwrap_or(&[]);
294                Ok(EndianSlice::new(slice, NativeEndian))
295            };
296
297        let dwarf = gimli::Dwarf::load(load_section)
298            .map_err(|e| format!("failed to load DWARF from {}: {}", elf_path.display(), e))?;
299
300        let mut results = Vec::new();
301        let mut units = dwarf.units();
302
303        while let Ok(Some(header)) = units.next() {
304            let unit = match dwarf.unit(header) {
305                Ok(u) => u,
306                Err(_) => continue,
307            };
308
309            let line_program = match unit.line_program.clone() {
310                Some(lp) => lp,
311                None => continue,
312            };
313
314            let mut rows = line_program.rows();
315            let mut best_match: Option<(u64, u32)> = None; // (address, actual_line)
316
317            while let Ok(Some((header, row))) = rows.next_row() {
318                if row.end_sequence() {
319                    continue;
320                }
321
322                if let Some(file_entry) = row.file(header) {
323                    let file_name: Option<String> = dwarf
324                        .attr_string(&unit, file_entry.path_name())
325                        .ok()
326                        .and_then(|s| s.to_string().ok().map(|s| s.to_string()));
327
328                    if let Some(ref fname) = file_name {
329                        // Match: the file name ends with the target file name
330                        // This handles both "main.c" matching "/home/user/src/main.c"
331                        // and exact matches
332                        if fname.ends_with(target_file) || target_file.ends_with(fname.as_str()) {
333                            if let Some(line) = row.line() {
334                                let line_num = line.get() as u32;
335                                // Find the closest line >= target_line
336                                if line_num >= target_line {
337                                    match best_match {
338                                        None => best_match = Some((row.address(), line_num)),
339                                        Some((_, best_line)) if line_num < best_line => {
340                                            best_match = Some((row.address(), line_num));
341                                        }
342                                        _ => {}
343                                    }
344                                }
345                            }
346                        }
347                    }
348                }
349            }
350
351            if let Some((address, actual_line)) = best_match {
352                // Try to find the function name at this address
353                let fn_name = find_function_at_address(&dwarf, &unit, address);
354
355                results.push(ResolvedProbe {
356                    library_path: elf_path.to_path_buf(),
357                    symbol_name: fn_name.unwrap_or_else(|| format!("0x{:x}", address)),
358                    offset: 0, // Source location resolves to exact address
359                    address,
360                    size: 0,
361                    demangled: None,
362                    is_ret,
363                });
364
365                if actual_line != target_line {
366                    tracing::info!(
367                        "{}:{} resolved to line {} at 0x{:x}",
368                        target_file,
369                        target_line,
370                        actual_line,
371                        address,
372                    );
373                }
374            }
375        }
376
377        Ok(results)
378    }
379
380    /// Find library paths matching a name (e.g. "libc" -> "/usr/lib/x86_64-linux-gnu/libc.so.6").
381    fn find_library_paths(&self, name: &str) -> Result<Vec<PathBuf>, String> {
382        // If it's an absolute path, use directly
383        if name.starts_with('/') {
384            if Path::new(name).exists() {
385                return Ok(vec![PathBuf::from(name)]);
386            } else {
387                return Err(format!("library not found: {}", name));
388            }
389        }
390
391        // Try ldconfig cache
392        let mut paths = Vec::new();
393
394        if let Ok(output) = std::process::Command::new("ldconfig").arg("-p").output() {
395            let stdout = String::from_utf8_lossy(&output.stdout);
396            for line in stdout.lines() {
397                // Format: "    libfoo.so.1 (libc6,x86-64) => /usr/lib/x86_64-linux-gnu/libfoo.so.1"
398                if let Some(arrow_pos) = line.find("=>") {
399                    let lib_part = line[..arrow_pos].trim();
400                    let path_part = line[arrow_pos + 2..].trim();
401
402                    // Match: library name starts with the filter
403                    // e.g. "libc" matches "libc.so.6", "libc.so", etc.
404                    let lib_name = lib_part.split_whitespace().next().unwrap_or("");
405                    if lib_name.starts_with(name) || lib_name.starts_with(&format!("lib{}", name)) {
406                        let path = PathBuf::from(path_part);
407                        if path.exists() && !paths.contains(&path) {
408                            paths.push(path);
409                        }
410                    }
411                }
412            }
413        }
414
415        // Fallback: scan common directories
416        if paths.is_empty() {
417            let search_dirs = [
418                "/usr/lib",
419                "/usr/lib64",
420                "/lib",
421                "/lib64",
422                "/usr/lib/x86_64-linux-gnu",
423                "/usr/lib/aarch64-linux-gnu",
424            ];
425
426            for dir in &search_dirs {
427                if let Ok(entries) = fs::read_dir(dir) {
428                    for entry in entries.flatten() {
429                        let fname = entry.file_name();
430                        let fname_str = fname.to_string_lossy();
431                        if fname_str.starts_with(name)
432                            || fname_str.starts_with(&format!("lib{}", name))
433                        {
434                            if fname_str.contains(".so") {
435                                let path = entry.path();
436                                if !paths.contains(&path) {
437                                    paths.push(path);
438                                }
439                            }
440                        }
441                    }
442                }
443            }
444        }
445
446        if paths.is_empty() {
447            Err(format!(
448                "library '{}' not found in ldconfig cache or standard paths",
449                name
450            ))
451        } else {
452            Ok(paths)
453        }
454    }
455
456    /// Get a list of commonly-used system libraries for system-wide scanning.
457    fn get_system_libraries(&self) -> Result<Vec<PathBuf>, String> {
458        let mut paths = Vec::new();
459
460        // Parse ldconfig cache for all libraries
461        if let Ok(output) = std::process::Command::new("ldconfig").arg("-p").output() {
462            let stdout = String::from_utf8_lossy(&output.stdout);
463            for line in stdout.lines() {
464                if let Some(arrow_pos) = line.find("=>") {
465                    let path_str = line[arrow_pos + 2..].trim();
466                    let path = PathBuf::from(path_str);
467                    if path.exists() && !paths.contains(&path) {
468                        paths.push(path);
469                    }
470                }
471            }
472        }
473
474        Ok(paths)
475    }
476}
477
478/// Try to demangle a symbol name using both Rust and C++ demanglers.
479fn try_demangle(name: &str) -> Option<String> {
480    // Try Rust demangling first
481    let rust_result = demangle(name, gimli::DW_LANG_Rust);
482    if let Some(ref demangled) = rust_result {
483        if demangled != name {
484            return rust_result;
485        }
486    }
487
488    // Try C++ demangling
489    let cpp_result = demangle(name, gimli::DW_LANG_C_plus_plus);
490    if let Some(ref demangled) = cpp_result {
491        if demangled != name {
492            return cpp_result;
493        }
494    }
495
496    None
497}
498
499/// Check if a library filter matches a given path.
500fn library_matches(filter: &str, path: &Path) -> bool {
501    let path_str = path.to_string_lossy();
502
503    // Exact path match
504    if path_str == filter {
505        return true;
506    }
507
508    // Absolute path prefix match
509    if filter.starts_with('/') {
510        return path_str.starts_with(filter);
511    }
512
513    // Library name match: "libc" matches "/usr/lib/x86_64-linux-gnu/libc.so.6"
514    let file_name = path
515        .file_name()
516        .map(|f| f.to_string_lossy())
517        .unwrap_or_default();
518
519    file_name.starts_with(filter)
520        || file_name.starts_with(&format!("lib{}", filter))
521        // Also match just the stem: "libc" matches "libc-2.31.so"
522        || file_name.contains(&format!("{}.so", filter))
523        || file_name.contains(&format!("lib{}.so", filter))
524}
525
526/// Find the function name at a given address using DWARF debug info.
527fn find_function_at_address<R: gimli::Reader>(
528    dwarf: &gimli::Dwarf<R>,
529    unit: &gimli::Unit<R>,
530    target_address: u64,
531) -> Option<String> {
532    let mut entries = unit.entries();
533    while let Ok(Some((_, entry))) = entries.next_dfs() {
534        if entry.tag() == gimli::DW_TAG_subprogram {
535            // Check if this function contains the target address
536            let low_pc = entry
537                .attr_value(gimli::DW_AT_low_pc)
538                .ok()
539                .flatten()
540                .and_then(|v| match v {
541                    gimli::AttributeValue::Addr(addr) => Some(addr),
542                    _ => None,
543                });
544
545            if let Some(low) = low_pc {
546                let high_pc = entry
547                    .attr_value(gimli::DW_AT_high_pc)
548                    .ok()
549                    .flatten()
550                    .and_then(|v| match v {
551                        gimli::AttributeValue::Addr(addr) => Some(addr),
552                        gimli::AttributeValue::Udata(size) => Some(low + size),
553                        _ => None,
554                    });
555
556                let contains = match high_pc {
557                    Some(high) => target_address >= low && target_address < high,
558                    None => target_address == low,
559                };
560
561                if contains {
562                    if let Some(name) = entry
563                        .attr_value(gimli::DW_AT_name)
564                        .ok()
565                        .flatten()
566                        .and_then(|v| dwarf.attr_string(unit, v).ok())
567                        .and_then(|s| s.to_string().ok().map(|s| s.to_string()))
568                    {
569                        return Some(name);
570                    }
571                }
572            }
573        }
574    }
575    None
576}
577
578/// Format resolved probes for display (used by --list-probes).
579pub fn format_resolved_probes(probes: &[ResolvedProbe]) -> String {
580    if probes.is_empty() {
581        return "No matching symbols found.".to_string();
582    }
583
584    // Group by library path
585    let mut by_library: HashMap<&Path, Vec<&ResolvedProbe>> = HashMap::new();
586    for probe in probes {
587        by_library
588            .entry(&probe.library_path)
589            .or_default()
590            .push(probe);
591    }
592
593    let mut output = String::new();
594    let mut libs: Vec<_> = by_library.keys().collect();
595    libs.sort();
596
597    for lib in libs {
598        let probes = &by_library[lib];
599        output.push_str(&format!("\n{}:\n", lib.display()));
600
601        let mut sorted_probes = probes.to_vec();
602        sorted_probes.sort_by_key(|p| p.address);
603
604        for probe in sorted_probes {
605            let name_display = if let Some(ref dm) = probe.demangled {
606                format!("{} ({})", probe.symbol_name, dm)
607            } else {
608                probe.symbol_name.clone()
609            };
610
611            if probe.size > 0 {
612                output.push_str(&format!(
613                    "  {:<50} 0x{:08x}  ({} bytes)\n",
614                    name_display, probe.address, probe.size
615                ));
616            } else {
617                output.push_str(&format!("  {:<50} 0x{:08x}\n", name_display, probe.address));
618            }
619        }
620    }
621
622    let total = probes.len();
623    let lib_count = by_library.len();
624    output.push_str(&format!(
625        "\nTotal: {} match{} across {} librar{}\n",
626        total,
627        if total == 1 { "" } else { "es" },
628        lib_count,
629        if lib_count == 1 { "y" } else { "ies" },
630    ));
631
632    output
633}
634
635#[cfg(test)]
636mod tests {
637    use super::*;
638
639    #[test]
640    fn test_library_matches_exact() {
641        let path = Path::new("/usr/lib/x86_64-linux-gnu/libc.so.6");
642        assert!(library_matches("libc", path));
643        assert!(library_matches("libc.so.6", path));
644        assert!(library_matches("/usr/lib/x86_64-linux-gnu/libc.so.6", path));
645        assert!(!library_matches("libpthread", path));
646    }
647
648    #[test]
649    fn test_library_matches_lib_prefix() {
650        let path = Path::new("/usr/lib/libpthread.so.0");
651        assert!(library_matches("pthread", path));
652        assert!(library_matches("libpthread", path));
653    }
654
655    #[test]
656    fn test_try_demangle_rust() {
657        // A typical Rust mangled symbol
658        let demangled = try_demangle("_ZN3std2io5stdio6_print17h1234567890abcdefE");
659        assert!(demangled.is_some());
660    }
661
662    #[test]
663    fn test_try_demangle_plain() {
664        // Plain C symbol — should return None (no demangling needed)
665        let demangled = try_demangle("malloc");
666        assert!(demangled.is_none());
667    }
668}