dbg_cli/session_db/canonicalizer/
python.rs1use std::path::Path;
15use std::sync::OnceLock;
16
17use regex::Regex;
18
19use super::{CanonicalSymbol, Canonicalizer};
20
21pub struct PythonCanonicalizer;
22
23impl Canonicalizer for PythonCanonicalizer {
24 fn lang(&self) -> &'static str {
25 "python"
26 }
27
28 fn canonicalize(&self, raw: &str) -> CanonicalSymbol {
29 let parsed = parse(raw);
30 CanonicalSymbol {
31 lang: "python",
32 fqn: parsed.fqn,
33 file: parsed.file,
34 line: parsed.line,
35 demangled: None,
36 raw: raw.to_string(),
37 is_synthetic: parsed.synthetic,
38 }
39 }
40}
41
42struct Parsed {
43 fqn: String,
44 file: Option<String>,
45 line: Option<u32>,
46 synthetic: bool,
47}
48
49fn parse(raw: &str) -> Parsed {
50 if let Some(inner) = raw
52 .strip_prefix("<built-in method ")
53 .and_then(|s| s.strip_suffix('>'))
54 {
55 return Parsed {
56 fqn: inner.to_string(),
57 file: None,
58 line: None,
59 synthetic: false,
60 };
61 }
62 static METHOD_OF: OnceLock<Regex> = OnceLock::new();
64 let re_method_of = METHOD_OF
65 .get_or_init(|| Regex::new(r"^<method '(?P<m>[^']+)' of '(?P<t>[^']+)' objects>$").unwrap());
66 if let Some(c) = re_method_of.captures(raw) {
67 return Parsed {
68 fqn: format!("{}.{}", &c["t"], &c["m"]),
69 file: None,
70 line: None,
71 synthetic: false,
72 };
73 }
74
75 static PSTATS: OnceLock<Regex> = OnceLock::new();
77 let re_pstats = PSTATS.get_or_init(|| {
78 Regex::new(r"^(?P<file>[^\s\(]+):(?P<line>\d+)\((?P<func>[^)]+)\)$").unwrap()
79 });
80 if let Some(c) = re_pstats.captures(raw) {
81 let file = c["file"].to_string();
82 let line: u32 = c["line"].parse().ok().unwrap_or(0);
83 let func = c["func"].to_string();
84 let module = module_from_file(&file);
85 let fqn = if module.is_empty() {
86 func.clone()
87 } else {
88 format!("{module}.{func}")
89 };
90 let synthetic = is_synthetic_func(&func);
91 return Parsed { fqn, file: Some(file), line: Some(line), synthetic };
92 }
93
94 static PYSPY: OnceLock<Regex> = OnceLock::new();
96 let re_pyspy = PYSPY.get_or_init(|| {
97 Regex::new(r"^(?P<func>[A-Za-z_<][\w<>]*)\s+\((?P<file>[^:)]+)(?::(?P<line>\d+))?\)$").unwrap()
98 });
99 if let Some(c) = re_pyspy.captures(raw) {
100 let func = c["func"].to_string();
101 let file = c["file"].to_string();
102 let line: Option<u32> = c.name("line").and_then(|m| m.as_str().parse().ok());
103 let module = module_from_file(&file);
104 let fqn = if module.is_empty() {
105 func.clone()
106 } else {
107 format!("{module}.{func}")
108 };
109 let synthetic = is_synthetic_func(&func);
110 return Parsed { fqn, file: Some(file), line, synthetic };
111 }
112
113 let synthetic = is_synthetic_func(raw);
115 Parsed {
116 fqn: raw.to_string(),
117 file: None,
118 line: None,
119 synthetic,
120 }
121}
122
123fn module_from_file(file: &str) -> String {
124 Path::new(file)
127 .file_stem()
128 .and_then(|s| s.to_str())
129 .unwrap_or("")
130 .to_string()
131}
132
133fn is_synthetic_func(f: &str) -> bool {
134 matches!(f, "<lambda>" | "<listcomp>" | "<dictcomp>" | "<setcomp>" | "<genexpr>" | "<module>")
135}
136
137#[cfg(test)]
138mod tests {
139 use super::*;
140
141 fn py() -> PythonCanonicalizer { PythonCanonicalizer }
142
143 #[test]
144 fn pstats_form_parsed() {
145 let s = py().canonicalize("/opt/myapp/api.py:42(handle_request)");
146 assert_eq!(s.fqn, "api.handle_request");
147 assert_eq!(s.file.as_deref(), Some("/opt/myapp/api.py"));
148 assert_eq!(s.line, Some(42));
149 assert!(!s.is_synthetic);
150 }
151
152 #[test]
153 fn pstats_lambda_is_synthetic() {
154 let s = py().canonicalize("/opt/myapp/api.py:42(<lambda>)");
155 assert!(s.is_synthetic);
156 assert_eq!(s.fqn, "api.<lambda>");
157 }
158
159 #[test]
160 fn pyspy_form_with_line_parsed() {
161 let s = py().canonicalize("handle_request (api.py:42)");
162 assert_eq!(s.fqn, "api.handle_request");
163 assert_eq!(s.file.as_deref(), Some("api.py"));
164 assert_eq!(s.line, Some(42));
165 }
166
167 #[test]
168 fn pyspy_form_without_line_parsed() {
169 let s = py().canonicalize("handle_request (api.py)");
170 assert_eq!(s.fqn, "api.handle_request");
171 assert_eq!(s.line, None);
172 }
173
174 #[test]
175 fn bare_dotted_is_passed_through() {
176 let s = py().canonicalize("myapp.services.users.login");
177 assert_eq!(s.fqn, "myapp.services.users.login");
178 assert_eq!(s.file, None);
179 }
180
181 #[test]
182 fn builtin_method_form() {
183 let s = py().canonicalize("<built-in method builtins.print>");
184 assert_eq!(s.fqn, "builtins.print");
185 }
186
187 #[test]
188 fn method_of_form() {
189 let s = py().canonicalize("<method 'write' of 'BufferedWriter' objects>");
190 assert_eq!(s.fqn, "BufferedWriter.write");
191 }
192
193 #[test]
194 fn listcomp_synthetic() {
195 let s = py().canonicalize("/app/main.py:10(<listcomp>)");
196 assert!(s.is_synthetic);
197 }
198
199 #[test]
200 fn module_level_synthetic() {
201 let s = py().canonicalize("/app/main.py:1(<module>)");
202 assert!(s.is_synthetic);
203 }
204
205 #[test]
206 fn structured_default_joins_with_dot() {
207 let s = py().canonicalize_structured("", "UserService", "login", "");
208 assert_eq!(s.fqn, "UserService.login");
209 }
210
211 #[test]
212 fn key_is_lang_plus_fqn() {
213 let s = py().canonicalize("app.main");
214 assert_eq!(s.key(), ("python", "app.main"));
215 }
216}