Skip to main content

dbg_cli/session_db/canonicalizer/
cxx.rs

1//! C/C++/Rust/Zig/D/Nim symbol canonicalization.
2//!
3//! Strategy for Phase 1:
4//!   * If the raw symbol starts with `_Z`/`_R` (Itanium / Rust v0 mangling),
5//!     shell out to `c++filt --no-params`-style demangling when available.
6//!     If the tool isn't installed we fall back to the raw string so
7//!     callers still get *something* usable.
8//!   * For Rust, strip the trailing `::h[0-9a-f]{16}` hash that rustc
9//!     appends to prevent accidental collisions — it's noise for
10//!     cross-session joins.
11//!   * Collapse libc++ / libstdc++ inline-namespace markers
12//!     (`std::__1::`, `std::__cxx11::`) so symbols from different stdlibs
13//!     line up.
14//!   * KEEP template parameters and parenthesized parameter lists —
15//!     they disambiguate overloads and template instantiations. Losing
16//!     them would merge `sgemm<float>` and `sgemm<half>` into one row.
17//!   * Detect Rust closure syntax (`{{closure}}`, `{closure#N}`) and
18//!     mark the symbol `is_synthetic = true`.
19
20use std::process::{Command, Stdio};
21use std::sync::OnceLock;
22
23use regex::Regex;
24
25use super::{CanonicalSymbol, Canonicalizer};
26
27pub struct CxxCanonicalizer {
28    lang: &'static str,
29}
30
31impl CxxCanonicalizer {
32    pub fn new(lang: &str) -> Self {
33        let lang: &'static str = match lang {
34            "c" => "c",
35            "cpp" => "cpp",
36            "rust" => "rust",
37            "zig" => "zig",
38            "d" => "d",
39            "nim" => "nim",
40            _ => "cpp",
41        };
42        Self { lang }
43    }
44}
45
46impl Canonicalizer for CxxCanonicalizer {
47    fn lang(&self) -> &'static str {
48        self.lang
49    }
50
51    fn canonicalize(&self, raw: &str) -> CanonicalSymbol {
52        let (demangled_out, used_demangler) = maybe_demangle(raw);
53        let mut fqn = normalize(&demangled_out);
54        let synthetic = looks_synthetic(&fqn);
55
56        // Rust hash suffix: "core::fmt::Write::write_fmt::h1234567890abcdef"
57        //                 →  "core::fmt::Write::write_fmt"
58        if self.lang == "rust" {
59            fqn = strip_rust_hash(&fqn);
60        }
61
62        CanonicalSymbol {
63            lang: self.lang,
64            fqn,
65            file: None,
66            line: None,
67            demangled: if used_demangler { Some(demangled_out) } else { None },
68            raw: raw.to_string(),
69            is_synthetic: synthetic,
70        }
71    }
72}
73
74/// If `raw` looks like a mangled symbol, pipe it through the system
75/// `c++filt`. Best-effort: returns `(raw, false)` on any failure.
76fn maybe_demangle(raw: &str) -> (String, bool) {
77    if !(raw.starts_with("_Z") || raw.starts_with("_R")) {
78        return (raw.to_string(), false);
79    }
80    // Cache the "is c++filt on this system" decision once per process.
81    static AVAILABLE: OnceLock<bool> = OnceLock::new();
82    let available = *AVAILABLE.get_or_init(|| which::which("c++filt").is_ok());
83    if !available {
84        return (raw.to_string(), false);
85    }
86
87    let out = Command::new("c++filt")
88        .arg(raw)
89        .stdout(Stdio::piped())
90        .stderr(Stdio::null())
91        .output();
92    match out {
93        Ok(o) if o.status.success() => {
94            let s = String::from_utf8_lossy(&o.stdout).trim().to_string();
95            if s.is_empty() || s == raw {
96                (raw.to_string(), false)
97            } else {
98                (s, true)
99            }
100        }
101        _ => (raw.to_string(), false),
102    }
103}
104
105fn normalize(s: &str) -> String {
106    // Collapse libc++/libstdc++ inline namespaces.
107    let mut out = s.replace("std::__1::", "std::");
108    out = out.replace("std::__cxx11::", "std::");
109    out = out.replace("__gnu_cxx::", "std::");
110    out
111}
112
113fn strip_rust_hash(s: &str) -> String {
114    static RE: OnceLock<Regex> = OnceLock::new();
115    let re = RE.get_or_init(|| Regex::new(r"::h[0-9a-f]{16}$").unwrap());
116    re.replace(s, "").to_string()
117}
118
119fn looks_synthetic(s: &str) -> bool {
120    s.contains("{{closure}}")
121        || s.contains("{closure#")
122        || s.contains("<lambda")   // clang lambdas: "<lambda(...)>"
123        || s.contains("::$_")      // libc++ anonymous thunk prefix
124}
125
126#[cfg(test)]
127mod tests {
128    use super::*;
129
130    fn cpp() -> CxxCanonicalizer { CxxCanonicalizer::new("cpp") }
131    fn rust() -> CxxCanonicalizer { CxxCanonicalizer::new("rust") }
132
133    #[test]
134    fn already_demangled_cpp_passes_through() {
135        let c = cpp();
136        let s = c.canonicalize("foo::bar::baz(int, double) const");
137        assert_eq!(s.fqn, "foo::bar::baz(int, double) const");
138        assert_eq!(s.lang, "cpp");
139        assert!(!s.is_synthetic);
140    }
141
142    #[test]
143    fn rust_hash_suffix_stripped() {
144        let r = rust();
145        let s = r.canonicalize("core::fmt::Write::write_fmt::h0123456789abcdef");
146        assert_eq!(s.fqn, "core::fmt::Write::write_fmt");
147    }
148
149    #[test]
150    fn rust_no_hash_left_alone() {
151        let r = rust();
152        let s = r.canonicalize("core::fmt::Write::write_fmt");
153        assert_eq!(s.fqn, "core::fmt::Write::write_fmt");
154    }
155
156    #[test]
157    fn rust_partial_hash_not_stripped() {
158        // Only the 16-hex-char form is a real rustc suffix.
159        let r = rust();
160        let s = r.canonicalize("core::fmt::Write::write_fmt::habc");
161        assert_eq!(s.fqn, "core::fmt::Write::write_fmt::habc");
162    }
163
164    #[test]
165    fn stdlib_inline_namespaces_collapsed() {
166        let c = cpp();
167        let s = c.canonicalize("std::__1::vector<int>::push_back(int&&)");
168        assert_eq!(s.fqn, "std::vector<int>::push_back(int&&)");
169    }
170
171    #[test]
172    fn cxx11_inline_collapsed() {
173        let c = cpp();
174        let s = c.canonicalize("std::__cxx11::basic_string<char>::size() const");
175        assert_eq!(s.fqn, "std::basic_string<char>::size() const");
176    }
177
178    #[test]
179    fn template_params_preserved() {
180        let c = cpp();
181        let s = c.canonicalize("sgemm<float>(float const*, int)");
182        assert_eq!(s.fqn, "sgemm<float>(float const*, int)");
183        let t = c.canonicalize("sgemm<half>(half const*, int)");
184        assert_ne!(s.fqn, t.fqn, "template params must distinguish");
185    }
186
187    #[test]
188    fn rust_closure_marked_synthetic() {
189        let r = rust();
190        let s = r.canonicalize("my_app::run::{{closure}}::h0123456789abcdef");
191        assert!(s.is_synthetic, "{:?}", s);
192        assert_eq!(s.fqn, "my_app::run::{{closure}}");
193    }
194
195    #[test]
196    fn rust_numbered_closure_synthetic() {
197        let r = rust();
198        let s = r.canonicalize("my_app::run::{closure#2}::h0123456789abcdef");
199        assert!(s.is_synthetic);
200        assert_eq!(s.fqn, "my_app::run::{closure#2}");
201    }
202
203    #[test]
204    fn clang_lambda_synthetic() {
205        let c = cpp();
206        let s = c.canonicalize("foo::<lambda(int)>::operator()(int) const");
207        assert!(s.is_synthetic);
208    }
209
210    #[test]
211    fn raw_field_is_preserved() {
212        let c = cpp();
213        let s = c.canonicalize("std::__1::vector<int>::push_back(int&&)");
214        assert_eq!(s.raw, "std::__1::vector<int>::push_back(int&&)");
215    }
216
217    #[test]
218    fn mangled_symbol_without_cxxfilt_passes_through() {
219        // We can't guarantee c++filt is present on the test runner; if it
220        // is, we should still get something non-empty. Either way the
221        // canonicalizer must not panic and must preserve `raw`.
222        let c = cpp();
223        let s = c.canonicalize("_ZN3foo3bar3bazEi");
224        assert!(!s.fqn.is_empty());
225        assert_eq!(s.raw, "_ZN3foo3bar3bazEi");
226    }
227
228    #[test]
229    fn key_is_lang_plus_fqn() {
230        let c = cpp();
231        let s = c.canonicalize("foo::bar()");
232        assert_eq!(s.key(), ("cpp", "foo::bar()"));
233    }
234}