1use quick_xml::{events::Event, Reader};
4use regex::Regex;
5use serde::{Deserialize, Serialize};
6
7#[derive(Debug, Clone, Serialize, Deserialize)]
9pub struct ParserNode {
10 pub function_name: String,
12 pub raw_signature: String,
14 pub unique_key: String,
16
17 pub cpu_percent: f64,
20 pub sample_count: u64,
22
23 pub color: Option<RgbColor>,
26
27 pub x_position: f64,
30 pub y_position: f64,
32 pub width: f64,
34 pub height: f64,
36
37 pub sample_offset: u64,
40 pub sample_width: u64,
42}
43
44#[derive(Debug, Clone, Serialize, Deserialize)]
46pub struct RgbColor {
47 pub r: u8,
48 pub g: u8,
49 pub b: u8,
50}
51
52#[derive(Debug, Clone, Serialize, Deserialize)]
54pub struct SourceLocation {
55 pub file_path: String,
56 pub line_number: u32,
57 pub confidence: f64, }
59
60use crate::error::{FerricError, Result};
61
62pub struct FlamegraphParser {
64 title_regex: Regex,
66 color_regex: Regex,
68 total_samples: u64,
70}
71
72impl FlamegraphParser {
73 pub fn new(title_expression: &str, color_expression: &str) -> Result<Self> {
74 Ok(Self {
75 title_regex: Regex::new(title_expression)
76 .map_err(|e| FerricError::ParserError(format!("Invalid title regex pattern '{}': {}", title_expression, e)))?,
77 color_regex: Regex::new(color_expression)
78 .map_err(|e| FerricError::ParserError(format!("Invalid color regex pattern '{}': {}", color_expression, e)))?,
79 total_samples: 0,
80 })
81 }
82
83 pub fn parse_svg(&mut self, svg_content: &str) -> Result<Vec<ParserNode>> {
85 self.extract_total_samples(svg_content)?;
87
88 let mut reader = Reader::from_str(svg_content);
90 self.parse_nodes(&mut reader)
91 }
92
93 fn extract_total_samples(&mut self, svg_content: &str) -> Result<()> {
95 let total_regex = Regex::new(r#"total_samples="([^"]+)""#)
97 .map_err(|e| FerricError::ParserError(format!("Invalid regex pattern for total_samples: {}", e)))?;
98 if let Some(caps) = total_regex.captures(svg_content) {
99 self.total_samples = caps[1].parse()
100 .map_err(|e| FerricError::ParserError(format!("Failed to parse total_samples value '{}': {}", &caps[1], e)))?;
101 } else {
102 if svg_content.contains("<svg") {
104 return Err(FerricError::ParserError("SVG file appears to be missing total_samples attribute required for flamegraph analysis".to_string()));
105 }
106 }
107 Ok(())
108 }
109
110 fn parse_nodes(&mut self, reader: &mut Reader<&[u8]>) -> Result<Vec<ParserNode>> {
112 let mut nodes = Vec::new();
113 let mut buf = Vec::new();
114
115 loop {
116 match reader.read_event_into(&mut buf)
117 .map_err(|e| FerricError::ParserError(format!("XML parsing error: {}", e)))? {
118 Event::Start(ref e) if e.name().as_ref() == b"g" => {
119 if let Some(node) = self.parse_single_node(reader, &mut buf)? {
121 nodes.push(node);
122 }
123 }
124 Event::Eof => break,
125 _ => {}
126 }
127 buf.clear();
128 }
129
130 Ok(nodes)
131 }
132
133 pub fn get_total_samples(&self) -> u64 {
135 self.total_samples
136 }
137
138 fn parse_single_node(
140 &mut self,
141 reader: &mut Reader<&[u8]>,
142 buf: &mut Vec<u8>,
143 ) -> Result<Option<ParserNode>> {
144 let mut function_name = String::new();
145 let mut raw_signature = String::new();
146 let mut cpu_percent = 0.0;
147 let mut sample_count = 0;
148 let mut color: Option<RgbColor> = None;
149 let mut x_position = 0.0;
150 let mut y_position = 0.0;
151 let mut width = 0.0;
152 let mut height = 0.0;
153 let mut sample_offset = 0;
154 let mut sample_width = 0;
155
156 loop {
157 match reader.read_event_into(buf)
158 .map_err(|e| FerricError::ParserError(format!("XML parsing error in node: {}", e)))? {
159 Event::Start(ref e) => {
160 if e.name().as_ref() == b"title" {
161 let title_text = self.read_text_content(reader, buf)?;
162 if !title_text.is_empty() {
164 match self.parse_title(&title_text) {
165 Ok((name, samples, percent)) => {
166 function_name = name;
167 raw_signature = title_text;
168 sample_count = samples;
169 cpu_percent = percent;
170 }
171 Err(e) => {
172 if title_text.contains("samples") || title_text.contains("%") {
174 return Err(e);
175 }
176 log::warn!("Skipping unparseable title that doesn't look like function data: '{}'", title_text);
178 }
179 }
180 }
181 }
182 }
183 Event::Empty(ref e) => {
184 if e.name().as_ref() == b"rect" {
186 for attr in e.attributes() {
188 let attr = attr
189 .map_err(|e| FerricError::ParserError(format!("Failed to parse XML attribute: {}", e)))?;
190 match attr.key.as_ref() {
191 b"x" => x_position = self.parse_percentage(&attr.value)?,
192 b"y" => y_position = self.parse_pixels(&attr.value)?,
193 b"width" => width = self.parse_percentage(&attr.value)?,
194 b"height" => height = self.parse_pixels(&attr.value)?,
195 b"fill" => color = self.parse_color(&attr.value)?,
196 b"fg:x" => sample_offset = self.parse_number(&attr.value)?,
197 b"fg:w" => sample_width = self.parse_number(&attr.value)?,
198 _ => {}
199 }
200 }
201 }
202 }
203 Event::End(ref e) if e.name().as_ref() == b"g" => break,
204 Event::Eof => break,
205 _ => {}
206 }
207 buf.clear();
208 }
209
210 if function_name.is_empty() {
212 return Ok(None);
213 }
214
215 Ok(Some(ParserNode {
216 function_name: function_name.clone(),
217 raw_signature,
218 unique_key: format!("{}_{}_{}", function_name, x_position, y_position),
219 cpu_percent,
220 sample_count,
221 color,
222 x_position,
223 y_position,
224 width,
225 height,
226 sample_offset,
227 sample_width,
228 }))
229 }
230
231 fn read_text_content(
233 &self,
234 reader: &mut Reader<&[u8]>,
235 buf: &mut Vec<u8>,
236 ) -> Result<String> {
237 let mut text = String::new();
238 loop {
239 match reader.read_event_into(buf)
240 .map_err(|e| FerricError::ParserError(format!("XML parsing error while reading text: {}", e)))? {
241 Event::Text(e) => {
242 text.push_str(&e.unescape()
243 .map_err(|e| FerricError::ParserError(format!("Failed to unescape XML text: {}", e)))?)
244 }
245 Event::End(_) => break,
246 Event::Eof => break,
247 _ => {}
248 }
249 buf.clear();
250 }
251 Ok(text)
252 }
253
254 fn parse_title(&self, title: &str) -> Result<(String, u64, f64)> {
256 if let Some(caps) = self.title_regex.captures(title) {
257 if caps.len() == 4 {
258 let name = caps[1].to_string();
259 let samples = caps[2].replace(",", "").parse::<u64>()
260 .map_err(|e| FerricError::ParserError(format!("Failed to parse sample count '{}' in title '{}': {}", &caps[2], title, e)))?;
261 let percent = caps[3].parse::<f64>()
262 .map_err(|e| FerricError::ParserError(format!("Failed to parse percentage '{}' in title '{}': {}", &caps[3], title, e)))?;
263 return Ok((name, samples, percent));
264 }
265 }
266 Err(FerricError::ParserError(format!("Invalid title format - expected 'function_name (samples, percentage%)' but got: '{}'", title)))
267 }
268
269 fn parse_color(&self, color_bytes: &[u8]) -> Result<Option<RgbColor>> {
271 let color_str = std::str::from_utf8(color_bytes)
272 .map_err(|e| FerricError::ParserError(format!("Invalid UTF-8 in color value: {}", e)))?;
273 if let Some(caps) = self.color_regex.captures(color_str) {
274 Ok(Some(RgbColor {
275 r: caps[1].parse()
276 .map_err(|e| FerricError::ParserError(format!("Invalid red value '{}' in color '{}': {}", &caps[1], color_str, e)))?,
277 g: caps[2].parse()
278 .map_err(|e| FerricError::ParserError(format!("Invalid green value '{}' in color '{}': {}", &caps[2], color_str, e)))?,
279 b: caps[3].parse()
280 .map_err(|e| FerricError::ParserError(format!("Invalid blue value '{}' in color '{}': {}", &caps[3], color_str, e)))?,
281 }))
282 } else {
283 Ok(None)
284 }
285 }
286
287 fn parse_percentage(&self, value: &[u8]) -> Result<f64> {
289 let s = std::str::from_utf8(value)
290 .map_err(|e| FerricError::ParserError(format!("Invalid UTF-8 in percentage value: {}", e)))?;
291 let s = s.trim_end_matches('%');
292 s.parse()
293 .map_err(|e| FerricError::ParserError(format!("Invalid percentage value '{}': {}", s, e)))
294 }
295
296 fn parse_pixels(&self, value: &[u8]) -> Result<f64> {
298 let s = std::str::from_utf8(value)
299 .map_err(|e| FerricError::ParserError(format!("Invalid UTF-8 in pixel value: {}", e)))?;
300 s.parse()
301 .map_err(|e| FerricError::ParserError(format!("Invalid pixel value '{}': {}", s, e)))
302 }
303
304 fn parse_number(&self, value: &[u8]) -> Result<u64> {
306 let s = std::str::from_utf8(value)
307 .map_err(|e| FerricError::ParserError(format!("Invalid UTF-8 in number value: {}", e)))?;
308 s.parse()
309 .map_err(|e| FerricError::ParserError(format!("Invalid number value '{}': {}", s, e)))
310 }
311
312}
313
314impl Default for FlamegraphParser {
315 fn default() -> Self {
316 Self::new(
318 r"^(.+?)\s+\(([0-9,]+)\s+samples?,\s+([0-9.]+)%\)$",
319 r"rgb\((\d+),(\d+),(\d+)\)",
320 ).expect("Default regex patterns should always be valid")
321 }
322}
323
324#[cfg(test)]
325mod tests {
326 use super::*;
327
328 #[test]
329 fn test_parse_real_flamegraph() {
330 crate::init_test_logging();
331 let svg_content = include_str!("test_flamegraph.svg");
332 let mut parser = FlamegraphParser::default();
333 let result = parser.parse_svg(svg_content);
334
335 assert!(result.is_ok(), "Parser should successfully parse real flamegraph SVG");
336
337 let nodes = result.unwrap();
338 log::debug!("Parsed {} nodes from test_flamegraph.svg", nodes.len());
339
340 assert!(!nodes.is_empty(), "Should parse nodes from test_flamegraph.svg");
342
343 let function_names: Vec<&String> = nodes.iter().map(|n| &n.function_name).collect();
345 log::debug!("Function names found: {:?}", function_names);
346
347 assert!(function_names.iter().any(|name| name.contains("main")), "Should find main function");
348 log::debug!("Test completed with {} function names", function_names.len());
350 }
351
352 #[test]
353 fn test_error_handling_invalid_svg() {
354 crate::init_test_logging();
355 let mut parser = FlamegraphParser::default();
356
357 let invalid_xml = "this is not XML at all";
359 let result = parser.parse_svg(invalid_xml);
360 match result {
361 Ok(nodes) => {
362 log::debug!("Invalid XML handled gracefully with {} nodes", nodes.len());
363 assert_eq!(nodes.len(), 0, "Should return no nodes for invalid XML");
364 }
365 Err(e) => {
366 log::debug!("XML parsing failed as expected: {}", e);
368 }
369 }
370
371 let malformed_svg = r#"
373 <svg total_samples="not_a_number">
374 <g><title>main (abc samples, 50%)</title></g>
375 </svg>
376 "#;
377 let result = parser.parse_svg(malformed_svg);
378 log::debug!("Invalid number result: {:?}", result);
379 assert!(result.is_err(), "Should fail on invalid number format");
380 if let Err(e) = result {
381 log::debug!("Invalid number error: {}", e);
382 assert!(e.to_string().contains("Failed to parse") || e.to_string().contains("total_samples"));
383 }
384
385 let invalid_title_svg = r#"
387 <svg total_samples="1000">
388 <g>
389 <title>main (abc samples, 50%)</title>
390 <rect x="0" y="0" width="100%" height="15" fill="rgb(227,0,7)"/>
391 </g>
392 </svg>
393 "#;
394 let result = parser.parse_svg(invalid_title_svg);
395 log::debug!("Invalid title format result: {:?}", result);
396 match result {
398 Ok(nodes) => {
399 log::debug!("Invalid title handled gracefully with {} nodes", nodes.len());
400 }
402 Err(e) => {
403 log::debug!("Title parsing error: {}", e);
404 assert!(e.to_string().contains("Failed to parse sample count") || e.to_string().contains("Invalid title format"));
405 }
406 }
407 }
408
409 #[test]
410 fn test_debug_positioning_data() {
411 crate::init_test_logging();
412 let svg_content = include_str!("test_flamegraph.svg");
413 let mut parser = FlamegraphParser::default();
414 let result = parser.parse_svg(svg_content);
415
416 assert!(result.is_ok(), "Parser should successfully parse real flamegraph SVG");
417
418 let nodes = result.unwrap();
419
420 log::debug!("=== DEBUGGING POSITION DATA ===");
421 log::debug!("Total nodes parsed: {}", nodes.len());
422
423 for (i, node) in nodes.iter().take(5).enumerate() {
425 log::debug!("\nNode {}: {}", i, node.function_name);
426 log::debug!(" Raw signature: {}", node.raw_signature);
427 log::debug!(" CPU percent: {}", node.cpu_percent);
428 log::debug!(" Sample count: {}", node.sample_count);
429 log::debug!(" Color: {:?}", node.color);
430 log::debug!(" Position: x={}, y={}, w={}, h={}", node.x_position, node.y_position, node.width, node.height);
431 log::debug!(" Sample coords: offset={}, width={}", node.sample_offset, node.sample_width);
432 log::debug!(" Unique key: {}", node.unique_key);
433 }
434
435 let nodes_with_x = nodes.iter().filter(|n| n.x_position > 0.0).count();
437 let nodes_with_y = nodes.iter().filter(|n| n.y_position > 0.0).count();
438 let nodes_with_width = nodes.iter().filter(|n| n.width > 0.0).count();
439 let nodes_with_color = nodes.iter().filter(|n| n.color.is_some()).count();
440 let nodes_with_samples = nodes.iter().filter(|n| n.sample_width > 0).count();
441
442 log::debug!("\n=== POSITION DATA SUMMARY ===");
443 log::debug!("Nodes with x > 0: {}", nodes_with_x);
444 log::debug!("Nodes with y > 0: {}", nodes_with_y);
445 log::debug!("Nodes with width > 0: {}", nodes_with_width);
446 log::debug!("Nodes with color data: {}", nodes_with_color);
447 log::debug!("Nodes with sample width > 0: {}", nodes_with_samples);
448
449 log::debug!("\n=== RAW SVG SAMPLE ===");
451 let first_g_start = svg_content.find("<g>").unwrap_or(0);
452 let _sample_text = &svg_content[first_g_start..first_g_start.min(svg_content.len()).min(first_g_start + 500)];
453
454 log::debug!("\n=== SPECIFIC G ELEMENTS ===");
456 let g_elements: Vec<&str> = svg_content.split("<g>").skip(1).take(3).collect();
457 for (i, g_element) in g_elements.iter().enumerate() {
458 let end = g_element.find("</g>").unwrap_or(g_element.len().min(300));
459 log::debug!("G element {}:\n<g>{}", i, &g_element[..end]);
460 log::debug!("---");
461 }
462 }
463}