dbg_cli/session_db/canonicalizer/
cxx.rs1use std::process::{Command, Stdio};
21use std::sync::OnceLock;
22
23use regex::Regex;
24
25use super::{CanonicalSymbol, Canonicalizer};
26
27pub struct CxxCanonicalizer {
28 lang: &'static str,
29}
30
31impl CxxCanonicalizer {
32 pub fn new(lang: &str) -> Self {
33 let lang: &'static str = match lang {
34 "c" => "c",
35 "cpp" => "cpp",
36 "rust" => "rust",
37 "zig" => "zig",
38 "d" => "d",
39 "nim" => "nim",
40 _ => "cpp",
41 };
42 Self { lang }
43 }
44}
45
46impl Canonicalizer for CxxCanonicalizer {
47 fn lang(&self) -> &'static str {
48 self.lang
49 }
50
51 fn canonicalize(&self, raw: &str) -> CanonicalSymbol {
52 let (demangled_out, used_demangler) = maybe_demangle(raw);
53 let mut fqn = normalize(&demangled_out);
54 let synthetic = looks_synthetic(&fqn);
55
56 if self.lang == "rust" {
59 fqn = strip_rust_hash(&fqn);
60 }
61
62 CanonicalSymbol {
63 lang: self.lang,
64 fqn,
65 file: None,
66 line: None,
67 demangled: if used_demangler { Some(demangled_out) } else { None },
68 raw: raw.to_string(),
69 is_synthetic: synthetic,
70 }
71 }
72}
73
74fn maybe_demangle(raw: &str) -> (String, bool) {
77 if !(raw.starts_with("_Z") || raw.starts_with("_R")) {
78 return (raw.to_string(), false);
79 }
80 static AVAILABLE: OnceLock<bool> = OnceLock::new();
82 let available = *AVAILABLE.get_or_init(|| which::which("c++filt").is_ok());
83 if !available {
84 return (raw.to_string(), false);
85 }
86
87 let out = Command::new("c++filt")
88 .arg(raw)
89 .stdout(Stdio::piped())
90 .stderr(Stdio::null())
91 .output();
92 match out {
93 Ok(o) if o.status.success() => {
94 let s = String::from_utf8_lossy(&o.stdout).trim().to_string();
95 if s.is_empty() || s == raw {
96 (raw.to_string(), false)
97 } else {
98 (s, true)
99 }
100 }
101 _ => (raw.to_string(), false),
102 }
103}
104
105fn normalize(s: &str) -> String {
106 let mut out = s.replace("std::__1::", "std::");
108 out = out.replace("std::__cxx11::", "std::");
109 out = out.replace("__gnu_cxx::", "std::");
110 out
111}
112
113fn strip_rust_hash(s: &str) -> String {
114 static RE: OnceLock<Regex> = OnceLock::new();
115 let re = RE.get_or_init(|| Regex::new(r"::h[0-9a-f]{16}$").unwrap());
116 re.replace(s, "").to_string()
117}
118
119fn looks_synthetic(s: &str) -> bool {
120 s.contains("{{closure}}")
121 || s.contains("{closure#")
122 || s.contains("<lambda") || s.contains("::$_") }
125
126#[cfg(test)]
127mod tests {
128 use super::*;
129
130 fn cpp() -> CxxCanonicalizer { CxxCanonicalizer::new("cpp") }
131 fn rust() -> CxxCanonicalizer { CxxCanonicalizer::new("rust") }
132
133 #[test]
134 fn already_demangled_cpp_passes_through() {
135 let c = cpp();
136 let s = c.canonicalize("foo::bar::baz(int, double) const");
137 assert_eq!(s.fqn, "foo::bar::baz(int, double) const");
138 assert_eq!(s.lang, "cpp");
139 assert!(!s.is_synthetic);
140 }
141
142 #[test]
143 fn rust_hash_suffix_stripped() {
144 let r = rust();
145 let s = r.canonicalize("core::fmt::Write::write_fmt::h0123456789abcdef");
146 assert_eq!(s.fqn, "core::fmt::Write::write_fmt");
147 }
148
149 #[test]
150 fn rust_no_hash_left_alone() {
151 let r = rust();
152 let s = r.canonicalize("core::fmt::Write::write_fmt");
153 assert_eq!(s.fqn, "core::fmt::Write::write_fmt");
154 }
155
156 #[test]
157 fn rust_partial_hash_not_stripped() {
158 let r = rust();
160 let s = r.canonicalize("core::fmt::Write::write_fmt::habc");
161 assert_eq!(s.fqn, "core::fmt::Write::write_fmt::habc");
162 }
163
164 #[test]
165 fn stdlib_inline_namespaces_collapsed() {
166 let c = cpp();
167 let s = c.canonicalize("std::__1::vector<int>::push_back(int&&)");
168 assert_eq!(s.fqn, "std::vector<int>::push_back(int&&)");
169 }
170
171 #[test]
172 fn cxx11_inline_collapsed() {
173 let c = cpp();
174 let s = c.canonicalize("std::__cxx11::basic_string<char>::size() const");
175 assert_eq!(s.fqn, "std::basic_string<char>::size() const");
176 }
177
178 #[test]
179 fn template_params_preserved() {
180 let c = cpp();
181 let s = c.canonicalize("sgemm<float>(float const*, int)");
182 assert_eq!(s.fqn, "sgemm<float>(float const*, int)");
183 let t = c.canonicalize("sgemm<half>(half const*, int)");
184 assert_ne!(s.fqn, t.fqn, "template params must distinguish");
185 }
186
187 #[test]
188 fn rust_closure_marked_synthetic() {
189 let r = rust();
190 let s = r.canonicalize("my_app::run::{{closure}}::h0123456789abcdef");
191 assert!(s.is_synthetic, "{:?}", s);
192 assert_eq!(s.fqn, "my_app::run::{{closure}}");
193 }
194
195 #[test]
196 fn rust_numbered_closure_synthetic() {
197 let r = rust();
198 let s = r.canonicalize("my_app::run::{closure#2}::h0123456789abcdef");
199 assert!(s.is_synthetic);
200 assert_eq!(s.fqn, "my_app::run::{closure#2}");
201 }
202
203 #[test]
204 fn clang_lambda_synthetic() {
205 let c = cpp();
206 let s = c.canonicalize("foo::<lambda(int)>::operator()(int) const");
207 assert!(s.is_synthetic);
208 }
209
210 #[test]
211 fn raw_field_is_preserved() {
212 let c = cpp();
213 let s = c.canonicalize("std::__1::vector<int>::push_back(int&&)");
214 assert_eq!(s.raw, "std::__1::vector<int>::push_back(int&&)");
215 }
216
217 #[test]
218 fn mangled_symbol_without_cxxfilt_passes_through() {
219 let c = cpp();
223 let s = c.canonicalize("_ZN3foo3bar3bazEi");
224 assert!(!s.fqn.is_empty());
225 assert_eq!(s.raw, "_ZN3foo3bar3bazEi");
226 }
227
228 #[test]
229 fn key_is_lang_plus_fqn() {
230 let c = cpp();
231 let s = c.canonicalize("foo::bar()");
232 assert_eq!(s.key(), ("cpp", "foo::bar()"));
233 }
234}