use std::process::{Command, Stdio};
use std::sync::OnceLock;
use regex::Regex;
use super::{CanonicalSymbol, Canonicalizer};
pub struct CxxCanonicalizer {
lang: &'static str,
}
impl CxxCanonicalizer {
pub fn new(lang: &str) -> Self {
let lang: &'static str = match lang {
"c" => "c",
"cpp" => "cpp",
"rust" => "rust",
"zig" => "zig",
"d" => "d",
"nim" => "nim",
_ => "cpp",
};
Self { lang }
}
}
impl Canonicalizer for CxxCanonicalizer {
fn lang(&self) -> &'static str {
self.lang
}
fn canonicalize(&self, raw: &str) -> CanonicalSymbol {
let (demangled_out, used_demangler) = maybe_demangle(raw);
let mut fqn = normalize(&demangled_out);
let synthetic = looks_synthetic(&fqn);
if self.lang == "rust" {
fqn = strip_rust_hash(&fqn);
}
CanonicalSymbol {
lang: self.lang,
fqn,
file: None,
line: None,
demangled: if used_demangler { Some(demangled_out) } else { None },
raw: raw.to_string(),
is_synthetic: synthetic,
}
}
}
fn maybe_demangle(raw: &str) -> (String, bool) {
if !(raw.starts_with("_Z") || raw.starts_with("_R")) {
return (raw.to_string(), false);
}
static AVAILABLE: OnceLock<bool> = OnceLock::new();
let available = *AVAILABLE.get_or_init(|| which::which("c++filt").is_ok());
if !available {
return (raw.to_string(), false);
}
let out = Command::new("c++filt")
.arg(raw)
.stdout(Stdio::piped())
.stderr(Stdio::null())
.output();
match out {
Ok(o) if o.status.success() => {
let s = String::from_utf8_lossy(&o.stdout).trim().to_string();
if s.is_empty() || s == raw {
(raw.to_string(), false)
} else {
(s, true)
}
}
_ => (raw.to_string(), false),
}
}
fn normalize(s: &str) -> String {
let mut out = s.replace("std::__1::", "std::");
out = out.replace("std::__cxx11::", "std::");
out = out.replace("__gnu_cxx::", "std::");
out
}
fn strip_rust_hash(s: &str) -> String {
static RE: OnceLock<Regex> = OnceLock::new();
let re = RE.get_or_init(|| Regex::new(r"::h[0-9a-f]{16}$").unwrap());
re.replace(s, "").to_string()
}
fn looks_synthetic(s: &str) -> bool {
s.contains("{{closure}}")
|| s.contains("{closure#")
|| s.contains("<lambda") || s.contains("::$_") }
#[cfg(test)]
mod tests {
use super::*;
fn cpp() -> CxxCanonicalizer { CxxCanonicalizer::new("cpp") }
fn rust() -> CxxCanonicalizer { CxxCanonicalizer::new("rust") }
#[test]
fn already_demangled_cpp_passes_through() {
let c = cpp();
let s = c.canonicalize("foo::bar::baz(int, double) const");
assert_eq!(s.fqn, "foo::bar::baz(int, double) const");
assert_eq!(s.lang, "cpp");
assert!(!s.is_synthetic);
}
#[test]
fn rust_hash_suffix_stripped() {
let r = rust();
let s = r.canonicalize("core::fmt::Write::write_fmt::h0123456789abcdef");
assert_eq!(s.fqn, "core::fmt::Write::write_fmt");
}
#[test]
fn rust_no_hash_left_alone() {
let r = rust();
let s = r.canonicalize("core::fmt::Write::write_fmt");
assert_eq!(s.fqn, "core::fmt::Write::write_fmt");
}
#[test]
fn rust_partial_hash_not_stripped() {
let r = rust();
let s = r.canonicalize("core::fmt::Write::write_fmt::habc");
assert_eq!(s.fqn, "core::fmt::Write::write_fmt::habc");
}
#[test]
fn stdlib_inline_namespaces_collapsed() {
let c = cpp();
let s = c.canonicalize("std::__1::vector<int>::push_back(int&&)");
assert_eq!(s.fqn, "std::vector<int>::push_back(int&&)");
}
#[test]
fn cxx11_inline_collapsed() {
let c = cpp();
let s = c.canonicalize("std::__cxx11::basic_string<char>::size() const");
assert_eq!(s.fqn, "std::basic_string<char>::size() const");
}
#[test]
fn template_params_preserved() {
let c = cpp();
let s = c.canonicalize("sgemm<float>(float const*, int)");
assert_eq!(s.fqn, "sgemm<float>(float const*, int)");
let t = c.canonicalize("sgemm<half>(half const*, int)");
assert_ne!(s.fqn, t.fqn, "template params must distinguish");
}
#[test]
fn rust_closure_marked_synthetic() {
let r = rust();
let s = r.canonicalize("my_app::run::{{closure}}::h0123456789abcdef");
assert!(s.is_synthetic, "{:?}", s);
assert_eq!(s.fqn, "my_app::run::{{closure}}");
}
#[test]
fn rust_numbered_closure_synthetic() {
let r = rust();
let s = r.canonicalize("my_app::run::{closure#2}::h0123456789abcdef");
assert!(s.is_synthetic);
assert_eq!(s.fqn, "my_app::run::{closure#2}");
}
#[test]
fn clang_lambda_synthetic() {
let c = cpp();
let s = c.canonicalize("foo::<lambda(int)>::operator()(int) const");
assert!(s.is_synthetic);
}
#[test]
fn raw_field_is_preserved() {
let c = cpp();
let s = c.canonicalize("std::__1::vector<int>::push_back(int&&)");
assert_eq!(s.raw, "std::__1::vector<int>::push_back(int&&)");
}
#[test]
fn mangled_symbol_without_cxxfilt_passes_through() {
let c = cpp();
let s = c.canonicalize("_ZN3foo3bar3bazEi");
assert!(!s.fqn.is_empty());
assert_eq!(s.raw, "_ZN3foo3bar3bazEi");
}
#[test]
fn key_is_lang_plus_fqn() {
let c = cpp();
let s = c.canonicalize("foo::bar()");
assert_eq!(s.key(), ("cpp", "foo::bar()"));
}
}