testlint_sdk/profiler/
python.rs

1#![allow(dead_code)]
2
3use super::{
4    CommonProfileData, FunctionStats, HotFunction, ProfileResult, RuntimeMetrics, StaticMetrics,
5};
6use chrono::Utc;
7use py_spy::{Config, PythonSpy};
8use std::collections::HashMap;
9use std::path::Path;
10use std::process::{Command, Stdio};
11use std::thread;
12use std::time::Duration;
13
14pub struct PythonProfiler {
15    // No static analysis fields needed - only runtime profiling
16}
17
18#[derive(Debug, Clone)]
19pub struct FunctionLocation {
20    pub function_name: String,
21    pub filename: String,
22    pub line_number: u32,
23    pub execution_count: u64,
24}
25
26#[derive(Debug)]
27pub struct PyCoverageData {
28    pub execution_count: HashMap<(String, String, u32), u64>,
29    pub hot_functions: Vec<((String, String, u32), u64)>,
30    pub total_samples: u64,
31    pub function_locations: HashMap<(String, String, u32), FunctionLocation>, // Maps function_key -> location details
32    pub lines_executed: HashMap<String, HashMap<u32, u64>>, // filename -> line_number -> count
33}
34
35impl Default for PythonProfiler {
36    fn default() -> Self {
37        Self::new()
38    }
39}
40
41impl PythonProfiler {
42    pub fn new() -> Self {
43        PythonProfiler {}
44    }
45
46    fn run_pyspy_continuous(&self, python_script: &str) -> Result<PyCoverageData, String> {
47        // Convert to absolute path
48        let script_path = Path::new(python_script)
49            .canonicalize()
50            .map_err(|e| format!("Failed to resolve script path: {}", e))?;
51
52        // Start the Python process
53        let mut child = Command::new("python3")
54            .arg(&script_path)
55            .stdout(Stdio::inherit())
56            .stderr(Stdio::inherit())
57            .spawn()
58            .map_err(|e| format!("Failed to start Python process: {}", e))?;
59
60        let pid = child.id() as py_spy::Pid;
61
62        // Give the process a moment to start and initialize Python runtime
63        thread::sleep(Duration::from_millis(500));
64
65        // Create py-spy config
66        let config = Config::default();
67
68        // Attach to the running Python process
69        let mut spy = PythonSpy::new(pid, &config)
70            .map_err(|e| {
71                let _ = child.kill(); // Clean up the process
72                format!(
73                    "Failed to attach py-spy to process {}: {}\n\
74                    Note: On macOS/Linux, you may need to run with sudo or adjust security settings.\n\
75                    Try: sudo cargo run -- python {}",
76                    pid, e, python_script
77                )
78            })?;
79
80        // Collect samples continuously until process ends or Ctrl+C
81        let sample_interval = Duration::from_millis(10); // Sample every 10ms
82        let mut execution_count: HashMap<(String, String, u32), u64> = HashMap::new();
83        let mut total_samples = 0u64;
84        let mut function_locations: HashMap<(String, String, u32), FunctionLocation> =
85            HashMap::new();
86        let mut lines_executed: HashMap<String, HashMap<u32, u64>> = HashMap::new();
87
88        let mut consecutive_errors = 0;
89        let max_consecutive_errors = 10;
90
91        loop {
92            // Check if process is still alive
93            match child.try_wait() {
94                Ok(Some(_status)) => {
95                    println!("\nProcess exited. Generating profile...");
96                    break;
97                }
98                Ok(None) => {
99                    // Process still running, continue sampling
100                }
101                Err(_) => {
102                    break;
103                }
104            }
105
106            match spy.get_stack_traces() {
107                Ok(traces) => {
108                    consecutive_errors = 0; // Reset error counter on success
109                    for trace in traces {
110                        for frame in &trace.frames {
111                            let filename = frame
112                                .short_filename
113                                .as_ref()
114                                .unwrap_or(&frame.filename)
115                                .clone();
116                            let line_number = frame.line as u32;
117                            let func_name = frame.name.clone();
118                            let func_key = (func_name.clone(), filename.clone(), line_number);
119
120                            // Track function execution count
121                            *execution_count.entry(func_key.clone()).or_insert(0) += 1;
122                            total_samples += 1;
123
124                            // Store function location details
125                            function_locations
126                                .entry(func_key)
127                                .or_insert_with(|| FunctionLocation {
128                                    function_name: func_name,
129                                    filename: filename.clone(),
130                                    line_number,
131                                    execution_count: 0,
132                                })
133                                .execution_count += 1;
134
135                            // Track line-level execution
136                            lines_executed
137                                .entry(filename)
138                                .or_default()
139                                .entry(line_number)
140                                .and_modify(|count| *count += 1)
141                                .or_insert(1);
142                        }
143                    }
144                }
145                Err(e) => {
146                    consecutive_errors += 1;
147                    // Only warn on critical errors or after many consecutive failures
148                    let error_msg = e.to_string();
149                    if !error_msg.contains("timed out")
150                        && !error_msg.contains("Operation timed out")
151                    {
152                        eprintln!("Warning: Failed to get stack trace: {}", e);
153                    } else if consecutive_errors >= max_consecutive_errors {
154                        eprintln!(
155                            "Warning: Multiple consecutive timeouts - process may have exited"
156                        );
157                        break;
158                    }
159                    // Timeouts are normal when the process is busy, don't spam warnings
160                }
161            }
162
163            thread::sleep(sample_interval);
164        }
165
166        // Wait for the Python process to finish
167        let _ = child.wait();
168
169        // Sort and get hot functions
170        let mut hot_functions: Vec<((String, String, u32), u64)> = execution_count
171            .iter()
172            .map(|(k, v)| (k.clone(), *v))
173            .collect();
174        hot_functions.sort_by(|a, b| b.1.cmp(&a.1));
175        hot_functions.truncate(10);
176
177        Ok(PyCoverageData {
178            execution_count,
179            hot_functions,
180            total_samples,
181            function_locations,
182            lines_executed,
183        })
184    }
185}
186
187// Removed LanguageProfiler trait - only runtime profiling methods below
188
189impl PythonProfiler {
190    // Continuous profiling - runs until interrupted with Ctrl+C
191    pub fn profile_continuous(&self, python_script: &str) -> Result<ProfileResult, String> {
192        println!("Starting Python continuous runtime profiling with py-spy...");
193        println!("Script: {}", python_script);
194        println!("Press Ctrl+C to stop and see results...\n");
195
196        // Run py-spy runtime profiling continuously (no duration limit)
197        let coverage_data = self.run_pyspy_continuous(python_script)?;
198
199        let mut details = Vec::new();
200        details.push("=== Runtime Profile (py-spy) ===".to_string());
201        details.push(format!(
202            "Total samples collected: {}",
203            coverage_data.total_samples
204        ));
205        details.push(format!(
206            "Unique functions executed: {}",
207            coverage_data.execution_count.len()
208        ));
209        details.push(format!(
210            "Files covered: {}",
211            coverage_data.lines_executed.len()
212        ));
213        details.push("\nTop 10 Hot Functions:".to_string());
214
215        for (idx, (func_key, count)) in coverage_data.hot_functions.iter().enumerate() {
216            let percentage = if coverage_data.total_samples > 0 {
217                (*count as f64 / coverage_data.total_samples as f64) * 100.0
218            } else {
219                0.0
220            };
221
222            // Get location details
223            if let Some(loc) = coverage_data.function_locations.get(func_key) {
224                details.push(format!(
225                    "  {}. {}() at {}:{} - {} samples ({:.2}%)",
226                    idx + 1,
227                    loc.function_name,
228                    loc.filename,
229                    loc.line_number,
230                    count,
231                    percentage
232                ));
233            } else {
234                let (name, file, line) = func_key;
235                details.push(format!(
236                    "  {}. {}:{}:{} - {} samples ({:.2}%)",
237                    idx + 1,
238                    name,
239                    file,
240                    line,
241                    count,
242                    percentage
243                ));
244            }
245        }
246
247        // Add file-level coverage summary
248        details.push("\n=== File Coverage Summary ===".to_string());
249        let mut file_stats: Vec<(String, usize)> = coverage_data
250            .lines_executed
251            .iter()
252            .map(|(filename, lines)| (filename.clone(), lines.len()))
253            .collect();
254        file_stats.sort_by(|a, b| b.1.cmp(&a.1));
255
256        for (filename, line_count) in file_stats.iter().take(10) {
257            details.push(format!("  {} - {} lines executed", filename, line_count));
258        }
259
260        Ok(ProfileResult {
261            language: "Python".to_string(),
262            details,
263        })
264    }
265
266    // Attach to an existing Python process by PID
267    pub fn profile_pid(&self, pid: u32) -> Result<ProfileResult, String> {
268        println!("Attaching to Python process PID: {}", pid);
269        println!("Profiling until interrupted (Ctrl+C)...\n");
270
271        // Convert to py_spy::Pid type
272        let spy_pid = pid as py_spy::Pid;
273
274        // Create py-spy config
275        let config = Config::default();
276
277        // Attach to the running Python process
278        let mut spy = PythonSpy::new(spy_pid, &config).map_err(|e| {
279            format!(
280                "Failed to attach py-spy to process {}: {}\n\
281                    Note: On macOS/Linux, you may need to run with sudo.\n\
282                    Make sure the process ID is valid and the process is running Python.\n\
283                    Try: sudo cargo run -- python --pid {}",
284                pid, e, pid
285            )
286        })?;
287
288        println!("✅ Successfully attached to process {}", pid);
289        println!("Collecting samples... Press Ctrl+C to stop and see results.\n");
290
291        // Collect samples continuously until Ctrl+C
292        let sample_interval = Duration::from_millis(10); // Sample every 10ms
293        let mut execution_count: HashMap<(String, String, u32), u64> = HashMap::new();
294        let mut total_samples = 0u64;
295        let mut function_locations: HashMap<(String, String, u32), FunctionLocation> =
296            HashMap::new();
297        let mut lines_executed: HashMap<String, HashMap<u32, u64>> = HashMap::new();
298
299        let mut consecutive_errors = 0;
300        let max_consecutive_errors = 10;
301        let mut samples_without_line_info = 0u64;
302        let line_info_check_threshold = 100; // Check after 100 samples
303
304        loop {
305            match spy.get_stack_traces() {
306                Ok(traces) => {
307                    consecutive_errors = 0; // Reset error counter on success
308                    for trace in traces {
309                        for frame in &trace.frames {
310                            let filename = frame
311                                .short_filename
312                                .as_ref()
313                                .unwrap_or(&frame.filename)
314                                .clone();
315                            let line_number = frame.line as u32;
316                            let func_name = frame.name.clone();
317
318                            // Detect missing line number information
319                            if line_number == 0 {
320                                samples_without_line_info += 1;
321
322                                // After threshold, check if most samples lack line info
323                                if total_samples >= line_info_check_threshold {
324                                    let missing_percentage = (samples_without_line_info as f64
325                                        / total_samples as f64)
326                                        * 100.0;
327                                    if missing_percentage > 50.0 {
328                                        return Err(format!(
329                                            "Unable to collect line number information from process {}.\n\
330                                            {:.1}% of samples have missing line numbers (line 0).\n\n\
331                                            Possible causes:\n\
332                                            - Code obfuscation or minification\n\
333                                            - Stripped or optimized bytecode (.pyc without line info)\n\
334                                            - Non-standard Python runtime (PyPy, Jython, etc.)\n\
335                                            - C extensions or compiled modules\n\
336                                            - Python built with -O or -OO flags (removes debug info)\n\
337                                            - Frozen executables (PyInstaller, cx_Freeze, etc.)\n\n\
338                                            Recommendation: Run with standard Python interpreter and unoptimized code.",
339                                            pid, missing_percentage
340                                        ));
341                                    }
342                                }
343                            }
344
345                            let func_key = (func_name.clone(), filename.clone(), line_number);
346
347                            // Track function execution count
348                            *execution_count.entry(func_key.clone()).or_insert(0) += 1;
349                            total_samples += 1;
350
351                            // Store function location details
352                            function_locations
353                                .entry(func_key)
354                                .or_insert_with(|| FunctionLocation {
355                                    function_name: func_name,
356                                    filename: filename.clone(),
357                                    line_number,
358                                    execution_count: 0,
359                                })
360                                .execution_count += 1;
361
362                            // Track line-level execution
363                            lines_executed
364                                .entry(filename)
365                                .or_default()
366                                .entry(line_number)
367                                .and_modify(|count| *count += 1)
368                                .or_insert(1);
369                        }
370                    }
371                }
372                Err(e) => {
373                    consecutive_errors += 1;
374                    let error_msg = e.to_string();
375
376                    // Check if process has exited
377                    if error_msg.contains("No such process") || error_msg.contains("process") {
378                        println!("\n⚠️  Target process has exited. Generating profile...");
379                        break;
380                    }
381
382                    // Only warn on critical errors or after many consecutive failures
383                    if !error_msg.contains("timed out")
384                        && !error_msg.contains("Operation timed out")
385                    {
386                        eprintln!("Warning: Failed to get stack trace: {}", e);
387                    } else if consecutive_errors >= max_consecutive_errors {
388                        eprintln!(
389                            "Warning: Multiple consecutive timeouts - process may have exited"
390                        );
391                        break;
392                    }
393                    // Timeouts are normal when the process is busy, don't spam warnings
394                }
395            }
396
397            thread::sleep(sample_interval);
398        }
399
400        // Sort and get hot functions
401        let mut hot_functions: Vec<((String, String, u32), u64)> = execution_count
402            .iter()
403            .map(|(k, v)| (k.clone(), *v))
404            .collect();
405        hot_functions.sort_by(|a, b| b.1.cmp(&a.1));
406        hot_functions.truncate(10);
407
408        // Build result with detailed location information
409        let mut details = Vec::new();
410        details.push("=== Runtime Profile (py-spy) ===".to_string());
411        details.push(format!("Attached to PID: {}", pid));
412        details.push(format!("Total samples collected: {}", total_samples));
413        details.push(format!(
414            "Unique functions executed: {}",
415            execution_count.len()
416        ));
417        details.push(format!("Files covered: {}", lines_executed.len()));
418        details.push("\nTop 10 Hot Functions:".to_string());
419
420        for (idx, (func_key, count)) in hot_functions.iter().enumerate() {
421            let percentage = if total_samples > 0 {
422                (*count as f64 / total_samples as f64) * 100.0
423            } else {
424                0.0
425            };
426
427            // Get location details
428            if let Some(loc) = function_locations.get(func_key) {
429                details.push(format!(
430                    "  {}. {}() at {}:{} - {} samples ({:.2}%)",
431                    idx + 1,
432                    loc.function_name,
433                    loc.filename,
434                    loc.line_number,
435                    count,
436                    percentage
437                ));
438            } else {
439                let (name, file, line) = func_key;
440                details.push(format!(
441                    "  {}. {}:{}:{} - {} samples ({:.2}%)",
442                    idx + 1,
443                    name,
444                    file,
445                    line,
446                    count,
447                    percentage
448                ));
449            }
450        }
451
452        // Add file-level coverage summary
453        details.push("\n=== File Coverage Summary ===".to_string());
454        let mut file_stats: Vec<(String, usize)> = lines_executed
455            .iter()
456            .map(|(filename, lines)| (filename.clone(), lines.len()))
457            .collect();
458        file_stats.sort_by(|a, b| b.1.cmp(&a.1));
459
460        for (filename, line_count) in file_stats.iter().take(10) {
461            details.push(format!("  {} - {} lines executed", filename, line_count));
462        }
463
464        Ok(ProfileResult {
465            language: "Python".to_string(),
466            details,
467        })
468    }
469
470    pub fn profile_to_common_format(
471        &self,
472        python_script: &str,
473    ) -> Result<CommonProfileData, String> {
474        println!("Starting Python runtime profiling for JSON export...");
475
476        // Run runtime profiling continuously
477        let coverage_data = self.run_pyspy_continuous(python_script)?;
478
479        // Build function stats from runtime data
480        let mut function_stats = HashMap::new();
481
482        for ((func_name, filename, line), count) in &coverage_data.execution_count {
483            let percentage = if coverage_data.total_samples > 0 {
484                (*count as f64 / coverage_data.total_samples as f64) * 100.0
485            } else {
486                0.0
487            };
488
489            let display_name = format!("{}:{}:{}", func_name, filename, line);
490
491            function_stats.insert(
492                display_name.clone(),
493                FunctionStats {
494                    name: display_name,
495                    execution_count: *count,
496                    percentage,
497                    line_number: Some(*line as usize),
498                    file_path: Some(filename.clone()),
499                },
500            );
501        }
502
503        let hot_functions: Vec<HotFunction> = coverage_data
504            .hot_functions
505            .iter()
506            .enumerate()
507            .map(|(idx, ((name, file, line), samples))| {
508                let percentage = if coverage_data.total_samples > 0 {
509                    (*samples as f64 / coverage_data.total_samples as f64) * 100.0
510                } else {
511                    0.0
512                };
513                HotFunction {
514                    rank: idx + 1,
515                    name: format!("{}:{}:{}", name, file, line),
516                    samples: *samples,
517                    percentage,
518                }
519            })
520            .collect();
521
522        let runtime_metrics = RuntimeMetrics {
523            total_samples: coverage_data.total_samples,
524            execution_duration_secs: 0, // Continuous profiling - duration not limited
525            functions_executed: coverage_data.execution_count.len(),
526            function_stats,
527            hot_functions,
528        };
529
530        // Minimal static metrics (not the focus)
531        let static_metrics = StaticMetrics {
532            file_size_bytes: 0,
533            line_count: 0,
534            function_count: 0,
535            class_count: 0,
536            import_count: 0,
537            complexity_score: 0,
538        };
539
540        Ok(CommonProfileData {
541            language: "Python".to_string(),
542            source_file: python_script.to_string(),
543            timestamp: Utc::now().to_rfc3339(),
544            static_analysis: static_metrics,
545            runtime_analysis: Some(runtime_metrics),
546        })
547    }
548}
549
550#[cfg(test)]
551mod tests {
552    use super::*;
553
554    #[test]
555    fn test_python_profiler_new() {
556        let profiler = PythonProfiler::new();
557        // Just verify we can create the profiler
558        assert_eq!(std::mem::size_of_val(&profiler), 0); // Zero-sized struct
559    }
560
561    #[test]
562    fn test_python_profiler_default() {
563        let profiler = PythonProfiler::default();
564        assert_eq!(std::mem::size_of_val(&profiler), 0);
565    }
566
567    #[test]
568    fn test_py_coverage_data_structure() {
569        // Test that we can create and use PyCoverageData
570        let mut execution_count = HashMap::new();
571        execution_count.insert(("test_function".to_string(), "test.py".to_string(), 10), 10);
572
573        let data = PyCoverageData {
574            execution_count,
575            hot_functions: vec![(("test_function".to_string(), "test.py".to_string(), 10), 10)],
576            total_samples: 10,
577            function_locations: HashMap::new(),
578            lines_executed: HashMap::new(),
579        };
580
581        assert_eq!(data.total_samples, 10);
582        assert_eq!(data.hot_functions.len(), 1);
583    }
584
585    #[test]
586    fn test_function_location_creation() {
587        let location = FunctionLocation {
588            function_name: "test_func".to_string(),
589            filename: "test.py".to_string(),
590            line_number: 10,
591            execution_count: 5,
592        };
593
594        assert_eq!(location.function_name, "test_func");
595        assert_eq!(location.filename, "test.py");
596        assert_eq!(location.line_number, 10);
597        assert_eq!(location.execution_count, 5);
598    }
599}