Skip to main content

dbg_cli/session_db/canonicalizer/
dotnet.rs

1//! .NET (CoreCLR / Mono) symbol canonicalization.
2//!
3//! Canonical form: `Namespace.Class.Method` with:
4//!   * Parameter lists dropped (CLR metadata tokens disambiguate overloads;
5//!     cross-session joins don't need the signature).
6//!   * Nested-class `+` separators normalized to `.`
7//!     (`Outer+Inner.Method` → `Outer.Inner.Method`).
8//!   * Module prefix `asm!Class.Method` stripped when present.
9//!   * Async state machines `<Method>d__N.MoveNext` unwrapped to `Method`.
10//!     The `MoveNext` entry from a state machine is what samplers see; the
11//!     user wrote `async Method()`, so that's what the agent should see.
12//!   * Generic backtick notation (`List`1`) preserved — it's part of the
13//!     CLR symbol and cross-session stable.
14//!   * Compiler-generated display classes (`<>c`, `<>c__DisplayClass`)
15//!     marked `is_synthetic = true`.
16
17use std::sync::OnceLock;
18
19use regex::Regex;
20
21use super::{CanonicalSymbol, Canonicalizer};
22
23pub struct DotnetCanonicalizer;
24
25impl Canonicalizer for DotnetCanonicalizer {
26    fn lang(&self) -> &'static str {
27        "dotnet"
28    }
29
30    fn canonicalize(&self, raw: &str) -> CanonicalSymbol {
31        let stripped = strip_module_prefix(raw);
32        let stripped = strip_param_list(stripped);
33        let stripped = stripped.replace('+', ".");
34
35        let (fqn, synthetic) = if let Some(unwrapped) = unwrap_async_state_machine(&stripped) {
36            (unwrapped, false)
37        } else if looks_synthetic(&stripped) {
38            (stripped.clone(), true)
39        } else {
40            (stripped.clone(), false)
41        };
42
43        CanonicalSymbol {
44            lang: "dotnet",
45            fqn,
46            file: None,
47            line: None,
48            demangled: None,
49            raw: raw.to_string(),
50            is_synthetic: synthetic,
51        }
52    }
53
54    fn canonicalize_structured(
55        &self,
56        module: &str,
57        class: &str,
58        method: &str,
59        _sig: &str,
60    ) -> CanonicalSymbol {
61        let _ = module; // CLR module is noise for canonical identity
62        let joined = if class.is_empty() {
63            method.to_string()
64        } else {
65            format!("{class}.{method}")
66        };
67        self.canonicalize(&joined)
68    }
69
70    fn resolve_async_frame(&self, raw: &str) -> Option<String> {
71        unwrap_async_state_machine(raw)
72    }
73}
74
75fn strip_module_prefix(s: &str) -> &str {
76    // Forms like `System.Private.CoreLib!System.String.Concat` — keep
77    // everything after the first `!`.
78    match s.find('!') {
79        Some(i) => &s[i + 1..],
80        None => s,
81    }
82}
83
84fn strip_param_list(s: &str) -> String {
85    // Find an unbalanced '(' at top level (ignoring the ones inside
86    // generic args `<...>`) and drop from there.
87    let mut depth_angle: i32 = 0;
88    for (i, ch) in s.char_indices() {
89        match ch {
90            '<' => depth_angle += 1,
91            '>' => depth_angle -= 1,
92            '(' if depth_angle <= 0 => return s[..i].to_string(),
93            _ => {}
94        }
95    }
96    s.to_string()
97}
98
99/// `<MethodAsync>d__7.MoveNext` → `MethodAsync`.
100/// Also handles the variant `<>c__DisplayClass0_0.<Method>b__0` (anonymous
101/// local inside async method) — those we treat as synthetic rather than
102/// unwrapping.
103fn unwrap_async_state_machine(s: &str) -> Option<String> {
104    static RE: OnceLock<Regex> = OnceLock::new();
105    let re = RE.get_or_init(|| {
106        Regex::new(r"(?x)
107            ^(?P<prefix>.*?)               # namespace / class prefix (non-greedy)
108            <(?P<method>[A-Za-z_][A-Za-z0-9_]*)>   # <Method>
109            d__\d+                         # d__N  (state machine discriminator)
110            \.MoveNext$").unwrap()
111    });
112    re.captures(s).map(|c| {
113        let prefix = c.name("prefix").unwrap().as_str();
114        let method = c.name("method").unwrap().as_str();
115        if prefix.is_empty() {
116            method.to_string()
117        } else {
118            // prefix already ends with '.' if there was a namespace
119            format!("{prefix}{method}")
120        }
121    })
122}
123
124fn looks_synthetic(s: &str) -> bool {
125    s.contains("<>c__DisplayClass")
126        || s.contains("<>c.")
127        || s.contains("<>c<>")
128        || s.contains("__AnonymousType")
129        || (s.contains(".<") && s.contains(">b__"))    // local func / lambda
130        || (s.contains(".<") && s.contains(">g__"))    // local static func
131}
132
133#[cfg(test)]
134mod tests {
135    use super::*;
136
137    fn n() -> DotnetCanonicalizer { DotnetCanonicalizer }
138
139    #[test]
140    fn simple_fqn_preserved() {
141        let s = n().canonicalize("MyApp.Services.OrderService.ProcessOrder");
142        assert_eq!(s.fqn, "MyApp.Services.OrderService.ProcessOrder");
143        assert_eq!(s.lang, "dotnet");
144        assert!(!s.is_synthetic);
145    }
146
147    #[test]
148    fn param_list_dropped() {
149        let s = n().canonicalize("MyApp.Foo.Bar(Int32, String)");
150        assert_eq!(s.fqn, "MyApp.Foo.Bar");
151    }
152
153    #[test]
154    fn param_list_with_generic_method_still_dropped() {
155        let s = n().canonicalize("MyApp.Foo.Bar<T>(T, Int32)");
156        assert_eq!(s.fqn, "MyApp.Foo.Bar<T>");
157    }
158
159    #[test]
160    fn nested_plus_normalized_to_dot() {
161        let s = n().canonicalize("MyApp.Outer+Inner.Method");
162        assert_eq!(s.fqn, "MyApp.Outer.Inner.Method");
163    }
164
165    #[test]
166    fn module_prefix_stripped() {
167        let s = n().canonicalize("System.Private.CoreLib!System.String.Concat");
168        assert_eq!(s.fqn, "System.String.Concat");
169    }
170
171    #[test]
172    fn async_state_machine_unwrapped() {
173        let s = n().canonicalize("MyApp.Foo.<ProcessOrderAsync>d__7.MoveNext");
174        assert_eq!(s.fqn, "MyApp.Foo.ProcessOrderAsync");
175    }
176
177    #[test]
178    fn async_at_module_root_unwrapped() {
179        let s = n().canonicalize("<MainAsync>d__0.MoveNext");
180        assert_eq!(s.fqn, "MainAsync");
181    }
182
183    #[test]
184    fn display_class_marked_synthetic() {
185        let s = n().canonicalize("MyApp.Foo.<>c__DisplayClass5_0.<Bar>b__0");
186        assert!(s.is_synthetic, "{s:?}");
187    }
188
189    #[test]
190    fn closure_sentinel_marked_synthetic() {
191        let s = n().canonicalize("MyApp.Foo.<>c.<<Bar>b__0_0>");
192        assert!(s.is_synthetic);
193    }
194
195    #[test]
196    fn resolve_async_frame_returns_method() {
197        let got = n().resolve_async_frame("A.B.<DoWorkAsync>d__3.MoveNext");
198        assert_eq!(got, Some("A.B.DoWorkAsync".into()));
199    }
200
201    #[test]
202    fn resolve_async_frame_none_for_plain_method() {
203        assert!(n().resolve_async_frame("A.B.C").is_none());
204    }
205
206    #[test]
207    fn generic_backtick_notation_preserved() {
208        let s = n().canonicalize("System.Collections.Generic.List`1.Add");
209        assert_eq!(s.fqn, "System.Collections.Generic.List`1.Add");
210    }
211
212    #[test]
213    fn structured_ignores_module_and_sig() {
214        let s = n().canonicalize_structured("MyAsm.dll", "MyNs.MyClass", "Foo", "(I)V");
215        assert_eq!(s.fqn, "MyNs.MyClass.Foo");
216    }
217
218    #[test]
219    fn key_is_lang_plus_fqn() {
220        let s = n().canonicalize("A.B.C");
221        assert_eq!(s.key(), ("dotnet", "A.B.C"));
222    }
223}