ferric_ai/parser/
nodes.rs

1// Parser module for flamegraph SVG parsing
2
3use quick_xml::{events::Event, Reader};
4use regex::Regex;
5use serde::{Deserialize, Serialize};
6
7/// A node in the function call tree representing a single function
8#[derive(Debug, Clone, Serialize, Deserialize)]
9pub struct ParserNode {
10    /// Function name (cleaned and demangled)
11    pub function_name: String,
12    /// Raw function signature from flamegraph title
13    pub raw_signature: String,
14    /// Unique identifier for this node instance
15    pub unique_key: String,
16
17    // === Performance Metrics ===
18    /// CPU percentage this function consumes (including children)
19    pub cpu_percent: f64,
20    /// Total sample count for this function (including children)
21    pub sample_count: u64,
22
23    // === Visual/Color Information ===
24    /// RGB color from flamegraph (indicates heat level)
25    pub color: Option<RgbColor>,
26
27    // === Positioning Data ===
28    /// Horizontal position in flamegraph (percentage)
29    pub x_position: f64,
30    /// Vertical position in flamegraph (pixels)
31    pub y_position: f64,
32    /// Width in flamegraph (percentage) - indicates relative time
33    pub width: f64,
34    /// Height in flamegraph (pixels) - usually constant per level
35    pub height: f64,
36
37    // === Raw Sample Coordinates ===
38    /// Raw sample offset from flamegraph
39    pub sample_offset: u64,
40    /// Raw sample width from flamegraph
41    pub sample_width: u64,
42}
43
44/// RGB color extracted from SVG
45#[derive(Debug, Clone, Serialize, Deserialize)]
46pub struct RgbColor {
47    pub r: u8,
48    pub g: u8,
49    pub b: u8,
50}
51
52/// Source code location when attribution is available
53#[derive(Debug, Clone, Serialize, Deserialize)]
54pub struct SourceLocation {
55    pub file_path: String,
56    pub line_number: u32,
57    pub confidence: f64, // 0.0 to 1.0
58}
59
60use crate::error::{FerricError, Result};
61
62/// Custom deserializer for flamegraph SVG nodes
63pub struct FlamegraphParser {
64    /// Regex for parsing title format: "function_name (samples, percentage)"
65    title_regex: Regex,
66    /// Regex for parsing RGB colors: "rgb(r,g,b)"
67    color_regex: Regex,
68    /// Total samples from SVG root for percentage calculations
69    total_samples: u64,
70}
71
72impl FlamegraphParser {
73    pub fn new(title_expression: &str, color_expression: &str) -> Result<Self> {
74        Ok(Self {
75            title_regex: Regex::new(title_expression)
76                .map_err(|e| FerricError::ParserError(format!("Invalid title regex pattern '{}': {}", title_expression, e)))?,
77            color_regex: Regex::new(color_expression)
78                .map_err(|e| FerricError::ParserError(format!("Invalid color regex pattern '{}': {}", color_expression, e)))?,
79            total_samples: 0,
80        })
81    }
82
83    /// Parse entire SVG file into ParserNode hierarchy
84    pub fn parse_svg(&mut self, svg_content: &str) -> Result<Vec<ParserNode>> {
85        // First pass: extract total_samples from SVG root
86        self.extract_total_samples(svg_content)?;
87
88        // Second pass: parse all <g> elements into nodes
89        let mut reader = Reader::from_str(svg_content);
90        self.parse_nodes(&mut reader)
91    }
92
93    /// Extract total samples from SVG metadata
94    fn extract_total_samples(&mut self, svg_content: &str) -> Result<()> {
95        // Look for: <svg id="frames" ... total_samples="887077243">
96        let total_regex = Regex::new(r#"total_samples="([^"]+)""#)
97            .map_err(|e| FerricError::ParserError(format!("Invalid regex pattern for total_samples: {}", e)))?;
98        if let Some(caps) = total_regex.captures(svg_content) {
99            self.total_samples = caps[1].parse()
100                .map_err(|e| FerricError::ParserError(format!("Failed to parse total_samples value '{}': {}", &caps[1], e)))?;
101        } else {
102            // If we don't find total_samples in what looks like an SVG, that could be an issue
103            if svg_content.contains("<svg") {
104                return Err(FerricError::ParserError("SVG file appears to be missing total_samples attribute required for flamegraph analysis".to_string()));
105            }
106        }
107        Ok(())
108    }
109
110    /// Parse <g> elements into ParserNode structures
111    fn parse_nodes(&mut self, reader: &mut Reader<&[u8]>) -> Result<Vec<ParserNode>> {
112        let mut nodes = Vec::new();
113        let mut buf = Vec::new();
114
115        loop {
116            match reader.read_event_into(&mut buf)
117                .map_err(|e| FerricError::ParserError(format!("XML parsing error: {}", e)))? {
118                Event::Start(ref e) if e.name().as_ref() == b"g" => {
119                    // Parse individual <g> element
120                    if let Some(node) = self.parse_single_node(reader, &mut buf)? {
121                        nodes.push(node);
122                    }
123                }
124                Event::Eof => break,
125                _ => {}
126            }
127            buf.clear();
128        }
129
130        Ok(nodes)
131    }
132
133    /// Get the total samples from the parsed SVG
134    pub fn get_total_samples(&self) -> u64 {
135        self.total_samples
136    }
137
138    /// Parse a single <g> element into ParserNode
139    fn parse_single_node(
140        &mut self,
141        reader: &mut Reader<&[u8]>,
142        buf: &mut Vec<u8>,
143    ) -> Result<Option<ParserNode>> {
144        let mut function_name = String::new();
145        let mut raw_signature = String::new();
146        let mut cpu_percent = 0.0;
147        let mut sample_count = 0;
148        let mut color: Option<RgbColor> = None;
149        let mut x_position = 0.0;
150        let mut y_position = 0.0;
151        let mut width = 0.0;
152        let mut height = 0.0;
153        let mut sample_offset = 0;
154        let mut sample_width = 0;
155
156        loop {
157            match reader.read_event_into(buf)
158                .map_err(|e| FerricError::ParserError(format!("XML parsing error in node: {}", e)))? {
159                Event::Start(ref e) => {
160                    if e.name().as_ref() == b"title" {
161                        let title_text = self.read_text_content(reader, buf)?;
162                        // Only skip if title is empty, otherwise propagate parsing errors
163                        if !title_text.is_empty() {
164                            match self.parse_title(&title_text) {
165                                Ok((name, samples, percent)) => {
166                                    function_name = name;
167                                    raw_signature = title_text;
168                                    sample_count = samples;
169                                    cpu_percent = percent;
170                                }
171                                Err(e) => {
172                                    // If it looks like a function title but can't be parsed, that's an error
173                                    if title_text.contains("samples") || title_text.contains("%") {
174                                        return Err(e);
175                                    }
176                                    // Otherwise skip (could be SVG metadata)
177                                    log::warn!("Skipping unparseable title that doesn't look like function data: '{}'", title_text);
178                                }
179                            }
180                        }
181                    }
182                }
183                Event::Empty(ref e) => {
184                    // Handle self-closing elements like <rect ... />
185                    if e.name().as_ref() == b"rect" {
186                        // Parse all rect attributes
187                        for attr in e.attributes() {
188                            let attr = attr
189                                .map_err(|e| FerricError::ParserError(format!("Failed to parse XML attribute: {}", e)))?;
190                            match attr.key.as_ref() {
191                                b"x" => x_position = self.parse_percentage(&attr.value)?,
192                                b"y" => y_position = self.parse_pixels(&attr.value)?,
193                                b"width" => width = self.parse_percentage(&attr.value)?,
194                                b"height" => height = self.parse_pixels(&attr.value)?,
195                                b"fill" => color = self.parse_color(&attr.value)?,
196                                b"fg:x" => sample_offset = self.parse_number(&attr.value)?,
197                                b"fg:w" => sample_width = self.parse_number(&attr.value)?,
198                                _ => {}
199                            }
200                        }
201                    }
202                }
203                Event::End(ref e) if e.name().as_ref() == b"g" => break,
204                Event::Eof => break,
205                _ => {}
206            }
207            buf.clear();
208        }
209
210        // Skip empty nodes (background elements, etc.)
211        if function_name.is_empty() {
212            return Ok(None);
213        }
214
215        Ok(Some(ParserNode {
216            function_name: function_name.clone(),
217            raw_signature,
218            unique_key: format!("{}_{}_{}", function_name, x_position, y_position),
219            cpu_percent,
220            sample_count,
221            color,
222            x_position,
223            y_position,
224            width,
225            height,
226            sample_offset,
227            sample_width,
228        }))
229    }
230
231    /// Read text content from XML element
232    fn read_text_content(
233        &self,
234        reader: &mut Reader<&[u8]>,
235        buf: &mut Vec<u8>,
236    ) -> Result<String> {
237        let mut text = String::new();
238        loop {
239            match reader.read_event_into(buf)
240                .map_err(|e| FerricError::ParserError(format!("XML parsing error while reading text: {}", e)))? {
241                Event::Text(e) => {
242                    text.push_str(&e.unescape()
243                        .map_err(|e| FerricError::ParserError(format!("Failed to unescape XML text: {}", e)))?)
244                }
245                Event::End(_) => break,
246                Event::Eof => break,
247                _ => {}
248            }
249            buf.clear();
250        }
251        Ok(text)
252    }
253
254    /// Parse title: "malloc (541,510 samples, 0.06%)" -> ("malloc", 541510, 0.06)
255    fn parse_title(&self, title: &str) -> Result<(String, u64, f64)> {
256        if let Some(caps) = self.title_regex.captures(title) {
257            if caps.len() == 4 {
258                let name = caps[1].to_string();
259                let samples = caps[2].replace(",", "").parse::<u64>()
260                    .map_err(|e| FerricError::ParserError(format!("Failed to parse sample count '{}' in title '{}': {}", &caps[2], title, e)))?;
261                let percent = caps[3].parse::<f64>()
262                    .map_err(|e| FerricError::ParserError(format!("Failed to parse percentage '{}' in title '{}': {}", &caps[3], title, e)))?;
263                return Ok((name, samples, percent));
264            }
265        }
266        Err(FerricError::ParserError(format!("Invalid title format - expected 'function_name (samples, percentage%)' but got: '{}'", title)))
267    }
268
269    /// Parse RGB color: "rgb(227,0,7)" -> RgbColor{r:227, g:0, b:7}
270    fn parse_color(&self, color_bytes: &[u8]) -> Result<Option<RgbColor>> {
271        let color_str = std::str::from_utf8(color_bytes)
272            .map_err(|e| FerricError::ParserError(format!("Invalid UTF-8 in color value: {}", e)))?;
273        if let Some(caps) = self.color_regex.captures(color_str) {
274            Ok(Some(RgbColor {
275                r: caps[1].parse()
276                    .map_err(|e| FerricError::ParserError(format!("Invalid red value '{}' in color '{}': {}", &caps[1], color_str, e)))?,
277                g: caps[2].parse()
278                    .map_err(|e| FerricError::ParserError(format!("Invalid green value '{}' in color '{}': {}", &caps[2], color_str, e)))?,
279                b: caps[3].parse()
280                    .map_err(|e| FerricError::ParserError(format!("Invalid blue value '{}' in color '{}': {}", &caps[3], color_str, e)))?,
281            }))
282        } else {
283            Ok(None)
284        }
285    }
286
287    /// Parse percentage: "0.0769%" -> 0.0769
288    fn parse_percentage(&self, value: &[u8]) -> Result<f64> {
289        let s = std::str::from_utf8(value)
290            .map_err(|e| FerricError::ParserError(format!("Invalid UTF-8 in percentage value: {}", e)))?;
291        let s = s.trim_end_matches('%');
292        s.parse()
293            .map_err(|e| FerricError::ParserError(format!("Invalid percentage value '{}': {}", s, e)))
294    }
295
296    /// Parse pixels: "549" -> 549.0
297    fn parse_pixels(&self, value: &[u8]) -> Result<f64> {
298        let s = std::str::from_utf8(value)
299            .map_err(|e| FerricError::ParserError(format!("Invalid UTF-8 in pixel value: {}", e)))?;
300        s.parse()
301            .map_err(|e| FerricError::ParserError(format!("Invalid pixel value '{}': {}", s, e)))
302    }
303
304    /// Parse number: "681795" -> 681795
305    fn parse_number(&self, value: &[u8]) -> Result<u64> {
306        let s = std::str::from_utf8(value)
307            .map_err(|e| FerricError::ParserError(format!("Invalid UTF-8 in number value: {}", e)))?;
308        s.parse()
309            .map_err(|e| FerricError::ParserError(format!("Invalid number value '{}': {}", s, e)))
310    }
311
312}
313
314impl Default for FlamegraphParser {
315    fn default() -> Self {
316        // These regex patterns are hardcoded and known to be valid
317        Self::new(
318            r"^(.+?)\s+\(([0-9,]+)\s+samples?,\s+([0-9.]+)%\)$",
319            r"rgb\((\d+),(\d+),(\d+)\)",
320        ).expect("Default regex patterns should always be valid")
321    }
322}
323
324#[cfg(test)]
325mod tests {
326    use super::*;
327
328    #[test]
329    fn test_parse_real_flamegraph() {
330        crate::init_test_logging();
331        let svg_content = include_str!("test_flamegraph.svg");
332        let mut parser = FlamegraphParser::default();
333        let result = parser.parse_svg(svg_content);
334
335        assert!(result.is_ok(), "Parser should successfully parse real flamegraph SVG");
336
337        let nodes = result.unwrap();
338        log::debug!("Parsed {} nodes from test_flamegraph.svg", nodes.len());
339
340        // The test flamegraph has nodes (actual count may vary by test file)
341        assert!(!nodes.is_empty(), "Should parse nodes from test_flamegraph.svg");
342
343        // Verify we parsed some key functions
344        let function_names: Vec<&String> = nodes.iter().map(|n| &n.function_name).collect();
345        log::debug!("Function names found: {:?}", function_names);
346
347        assert!(function_names.iter().any(|name| name.contains("main")), "Should find main function");
348        // Note: Function names depend on the test file used
349        log::debug!("Test completed with {} function names", function_names.len());
350    }
351
352    #[test]
353    fn test_error_handling_invalid_svg() {
354        crate::init_test_logging();
355        let mut parser = FlamegraphParser::default();
356
357        // Test completely invalid XML - quick-xml is lenient and returns empty results
358        let invalid_xml = "this is not XML at all";
359        let result = parser.parse_svg(invalid_xml);
360        match result {
361            Ok(nodes) => {
362                log::debug!("Invalid XML handled gracefully with {} nodes", nodes.len());
363                assert_eq!(nodes.len(), 0, "Should return no nodes for invalid XML");
364            }
365            Err(e) => {
366                // This is also acceptable - XML parsing could fail
367                log::debug!("XML parsing failed as expected: {}", e);
368            }
369        }
370
371        // Test malformed SVG with invalid number - this should definitely fail
372        let malformed_svg = r#"
373            <svg total_samples="not_a_number">
374                <g><title>main (abc samples, 50%)</title></g>
375            </svg>
376        "#;
377        let result = parser.parse_svg(malformed_svg);
378        log::debug!("Invalid number result: {:?}", result);
379        assert!(result.is_err(), "Should fail on invalid number format");
380        if let Err(e) = result {
381            log::debug!("Invalid number error: {}", e);
382            assert!(e.to_string().contains("Failed to parse") || e.to_string().contains("total_samples"));
383        }
384
385        // Test SVG with invalid title format in a <g> element with samples/percentage parsing
386        let invalid_title_svg = r#"
387            <svg total_samples="1000">
388                <g>
389                    <title>main (abc samples, 50%)</title>
390                    <rect x="0" y="0" width="100%" height="15" fill="rgb(227,0,7)"/>
391                </g>
392            </svg>
393        "#;
394        let result = parser.parse_svg(invalid_title_svg);
395        log::debug!("Invalid title format result: {:?}", result);
396        // This should either fail or succeed with filtered nodes
397        match result {
398            Ok(nodes) => {
399                log::debug!("Invalid title handled gracefully with {} nodes", nodes.len());
400                // Should have 0 nodes because the title couldn't be parsed (samples = "abc")
401            }
402            Err(e) => {
403                log::debug!("Title parsing error: {}", e);
404                assert!(e.to_string().contains("Failed to parse sample count") || e.to_string().contains("Invalid title format"));
405            }
406        }
407    }
408
409    #[test]
410    fn test_debug_positioning_data() {
411        crate::init_test_logging();
412        let svg_content = include_str!("test_flamegraph.svg");
413        let mut parser = FlamegraphParser::default();
414        let result = parser.parse_svg(svg_content);
415
416        assert!(result.is_ok(), "Parser should successfully parse real flamegraph SVG");
417
418        let nodes = result.unwrap();
419
420        log::debug!("=== DEBUGGING POSITION DATA ===");
421        log::debug!("Total nodes parsed: {}", nodes.len());
422
423        // Check first 5 nodes in detail
424        for (i, node) in nodes.iter().take(5).enumerate() {
425            log::debug!("\nNode {}: {}", i, node.function_name);
426            log::debug!("  Raw signature: {}", node.raw_signature);
427            log::debug!("  CPU percent: {}", node.cpu_percent);
428            log::debug!("  Sample count: {}", node.sample_count);
429            log::debug!("  Color: {:?}", node.color);
430            log::debug!("  Position: x={}, y={}, w={}, h={}", node.x_position, node.y_position, node.width, node.height);
431            log::debug!("  Sample coords: offset={}, width={}", node.sample_offset, node.sample_width);
432            log::debug!("  Unique key: {}", node.unique_key);
433        }
434
435        // Count nodes with non-zero positioning
436        let nodes_with_x = nodes.iter().filter(|n| n.x_position > 0.0).count();
437        let nodes_with_y = nodes.iter().filter(|n| n.y_position > 0.0).count();
438        let nodes_with_width = nodes.iter().filter(|n| n.width > 0.0).count();
439        let nodes_with_color = nodes.iter().filter(|n| n.color.is_some()).count();
440        let nodes_with_samples = nodes.iter().filter(|n| n.sample_width > 0).count();
441
442        log::debug!("\n=== POSITION DATA SUMMARY ===");
443        log::debug!("Nodes with x > 0: {}", nodes_with_x);
444        log::debug!("Nodes with y > 0: {}", nodes_with_y);
445        log::debug!("Nodes with width > 0: {}", nodes_with_width);
446        log::debug!("Nodes with color data: {}", nodes_with_color);
447        log::debug!("Nodes with sample width > 0: {}", nodes_with_samples);
448
449        // Look at the raw SVG to see what we should expect
450        log::debug!("\n=== RAW SVG SAMPLE ===");
451        let first_g_start = svg_content.find("<g>").unwrap_or(0);
452        let _sample_text = &svg_content[first_g_start..first_g_start.min(svg_content.len()).min(first_g_start + 500)];
453
454        // Let's look at a few specific <g> blocks
455        log::debug!("\n=== SPECIFIC G ELEMENTS ===");
456        let g_elements: Vec<&str> = svg_content.split("<g>").skip(1).take(3).collect();
457        for (i, g_element) in g_elements.iter().enumerate() {
458            let end = g_element.find("</g>").unwrap_or(g_element.len().min(300));
459            log::debug!("G element {}:\n<g>{}", i, &g_element[..end]);
460            log::debug!("---");
461        }
462    }
463}