import re
import sys
import argparse
from collections import defaultdict
from html import unescape
def parse_title(title_content: str) -> tuple[str, int, float] | None:
match = re.match(
r'^(.+?)\s+\(([0-9,]+)\s+samples?,\s+([0-9.]+)%\)$',
title_content.strip()
)
if not match:
return None
func_name = match.group(1)
samples_str = match.group(2).replace(',', '')
percentage_str = match.group(3)
try:
samples = int(samples_str)
percentage = float(percentage_str)
except ValueError:
return None
return (func_name, samples, percentage)
def extract_titles_from_svg(svg_path: str) -> list[tuple[str, int, float]]:
with open(svg_path, 'r', encoding='utf-8') as f:
content = f.read()
title_pattern = re.compile(r'<title>([^<]+)</title>')
matches = title_pattern.findall(content)
results = []
for title_content in matches:
unescaped = unescape(title_content)
parsed = parse_title(unescaped)
if parsed:
results.append(parsed)
return results
def demangle_name(name: str) -> str:
prev = None
result = name
while prev != result:
prev = result
result = re.sub(r'<[^<>]*>', '', result)
result = re.sub(r'\s+', ' ', result) result = result.strip()
return result
def extract_module(name: str) -> str:
parts = name.split('::')
if len(parts) > 1:
return '::'.join(parts[:-1])
return name
def extract_crate(name: str) -> str:
parts = name.split('::')
if parts:
return parts[0]
return name
def group_entries(
entries: list[tuple[str, int, float]],
group_by: str,
demangle: bool
) -> dict[str, tuple[int, float]]:
groups: dict[str, tuple[int, float]] = defaultdict(lambda: (0, 0.0))
for func_name, samples, percentage in entries:
if demangle:
func_name = demangle_name(func_name)
if group_by == 'module':
key = extract_module(func_name)
elif group_by == 'crate':
key = extract_crate(func_name)
else: key = func_name
current_samples, current_max_pct = groups[key]
groups[key] = (
current_samples + samples,
max(current_max_pct, percentage)
)
return dict(groups)
def extract_stacks_from_svg(svg_path: str) -> list[tuple[list[str], int, float]]:
with open(svg_path, 'r', encoding='utf-8') as f:
content = f.read()
g_pattern = re.compile(
r'<g[^>]*>.*?<title>([^<]+)</title>.*?<rect[^>]*\sy="([^"]+)"[^>]*fg:x="([^"]+)"[^>]*fg:w="([^"]+)"[^>]*/?>',
re.DOTALL
)
frames_data: list[tuple[str, int, float, int, int, int]] = []
for match in g_pattern.finditer(content):
title_content = unescape(match.group(1))
y_str = match.group(2)
x_str = match.group(3)
w_str = match.group(4)
title_match = re.match(
r'^(.+?)\s+\(([0-9,]+)\s+samples?,\s+([0-9.]+)%\)$',
title_content.strip()
)
if not title_match:
continue
func_name = title_match.group(1)
samples_str = title_match.group(2).replace(',', '')
percentage_str = title_match.group(3)
try:
samples = int(samples_str)
percentage = float(percentage_str)
x = int(x_str)
w = int(w_str)
y = int(float(y_str))
except ValueError:
continue
frames_data.append((func_name, samples, percentage, x, w, y))
if not frames_data:
return []
frames_data.sort(key=lambda f: (f[3], -f[5]))
by_y: dict[int, list[tuple[str, int, float, int, int, int]]] = defaultdict(list)
for frame in frames_data:
by_y[frame[5]].append(frame)
y_levels = sorted(by_y.keys())
results: list[tuple[list[str], int, float]] = []
for frame in frames_data:
func_name, samples, percentage, x, w, y = frame
stack = [func_name]
current_x = x
current_w = w
for level_y in y_levels:
if level_y <= y: continue
for candidate in by_y[level_y]:
cand_name, _, _, cand_x, cand_w, _ = candidate
if cand_x <= current_x and (cand_x + cand_w) >= (current_x + current_w):
stack.append(cand_name)
break
stack.reverse()
results.append((stack, samples, percentage))
return results
def build_stack_tree(
stacks: list[tuple[list[str], int, float]],
demangle: bool
) -> dict:
root: dict = {
'name': 'root',
'self_samples': 0,
'total_samples': 0,
'children': {}
}
for frames, samples, _pct in stacks:
if demangle:
frames = [demangle_name(f) for f in frames]
root['total_samples'] += samples
node = root
for i, frame in enumerate(frames):
if frame not in node['children']:
node['children'][frame] = {
'name': frame,
'self_samples': 0,
'total_samples': 0,
'children': {}
}
child = node['children'][frame]
child['total_samples'] += samples
if i == len(frames) - 1:
child['self_samples'] += samples
node = child
return root
def find_leaf_stacks(
stacks: list[tuple[list[str], int, float]]
) -> list[tuple[list[str], int, float]]:
stacks_sorted = sorted(stacks, key=lambda x: len(x[0]), reverse=True)
stack_set = {tuple(s[0]) for s in stacks}
leaves = []
for frames, samples, pct in stacks_sorted:
stack_tuple = tuple(frames)
is_leaf = True
for other in stack_set:
if len(other) > len(stack_tuple):
if other[:len(stack_tuple)] == stack_tuple:
is_leaf = False
break
if is_leaf:
leaves.append((frames, samples, pct))
return leaves
def format_hottest_stacks(
stacks: list[tuple[list[str], int, float]],
top_n: int,
min_percent: float,
demangle: bool,
max_frames: int = 0
) -> str:
leaf_stacks = find_leaf_stacks(stacks)
sorted_stacks = sorted(leaf_stacks, key=lambda x: x[1], reverse=True)
filtered = [
(frames, samples, pct)
for frames, samples, pct in sorted_stacks
if pct >= min_percent
][:top_n]
if not filtered:
return "No stacks found matching criteria."
lines = []
lines.append("=" * 80)
lines.append("HOTTEST STACK TRACES (leaf frames only)")
lines.append("=" * 80)
lines.append("")
for i, (frames, samples, pct) in enumerate(filtered, 1):
if demangle:
frames = [demangle_name(f) for f in frames]
lines.append(f"#{i}: {samples:,} samples ({pct:.2f}%)")
lines.append("-" * 40)
if max_frames == 0 or len(frames) <= max_frames:
for depth, frame in enumerate(frames):
indent = " " * min(depth, 4) if depth == len(frames) - 1:
lines.append(f"{indent}→ {frame}")
else:
lines.append(f"{indent}{frame}")
else:
head_count = max_frames // 3
tail_count = max_frames - head_count
omitted = len(frames) - head_count - tail_count
for depth, frame in enumerate(frames[:head_count]):
indent = " " * min(depth, 4)
lines.append(f"{indent}{frame}")
lines.append(" ... ({} frames omitted) ...".format(omitted))
for depth, frame in enumerate(frames[-tail_count:], len(frames) - tail_count):
indent = " " * min(depth, 4)
if depth == len(frames) - 1:
lines.append(f"{indent}→ {frame}")
else:
lines.append(f"{indent}{frame}")
lines.append("")
return '\n'.join(lines)
def format_stack_tree(
tree: dict,
top_n: int,
min_percent: float,
total_samples: int
) -> str:
lines = []
lines.append("=" * 80)
lines.append("STACK TREE (hottest paths)")
lines.append("=" * 80)
lines.append("")
lines.append(f"Total samples: {total_samples:,}")
lines.append("")
def format_node(node: dict, depth: int, path_samples: int, shown_count: list) -> None:
if shown_count[0] >= top_n * 3: return
children = list(node['children'].values())
if not children:
return
children.sort(key=lambda x: x['total_samples'], reverse=True)
for child in children:
pct = (child['total_samples'] / total_samples * 100) if total_samples > 0 else 0
self_pct = (child['self_samples'] / total_samples * 100) if total_samples > 0 else 0
if pct < min_percent:
continue
indent = " " * depth
if child['self_samples'] > 0 and self_pct >= 0.1:
self_info = f" [self: {child['self_samples']:,} ({self_pct:.1f}%)]"
else:
self_info = ""
name = child['name']
max_name_len = 60
if len(name) > max_name_len:
name = name[:max_name_len-3] + "..."
lines.append(f"{indent}{name}")
lines.append(f"{indent} └─ {child['total_samples']:,} samples ({pct:.1f}%){self_info}")
shown_count[0] += 1
format_node(child, depth + 1, child['total_samples'], shown_count)
shown_count = [0]
format_node(tree, 0, total_samples, shown_count)
if not lines[-1]: lines = lines[:-1]
return '\n'.join(lines)
def format_output(
grouped: dict[str, tuple[int, float]],
top_n: int,
min_percent: float,
sort_by: str = 'samples'
) -> str:
filtered = [
(name, samples, pct)
for name, (samples, pct) in grouped.items()
if pct >= min_percent
]
if sort_by == 'samples':
filtered.sort(key=lambda x: x[1], reverse=True)
else: filtered.sort(key=lambda x: x[2], reverse=True)
filtered = filtered[:top_n]
if not filtered:
return "No entries found matching criteria."
max_samples = max(e[1] for e in filtered)
samples_width = len(f"{max_samples:,}")
lines = []
lines.append(f"{'Samples':>{samples_width}} {'%':>6} Function/Path")
lines.append('-' * (samples_width + 2 + 6 + 2 + 50))
for name, samples, pct in filtered:
lines.append(f"{samples:>{samples_width},} {pct:>5.2f}% {name}")
return '\n'.join(lines)
def main():
parser = argparse.ArgumentParser(
description='Parse cargo flamegraph SVG and output sorted hottest codepaths.'
)
parser.add_argument(
'svg_file',
help='Path to the flamegraph SVG file'
)
parser.add_argument(
'--top', '-n',
type=int,
default=50,
help='Show top N entries (default: 50)'
)
parser.add_argument(
'--min-percent', '-m',
type=float,
default=0.0,
help='Minimum percentage threshold (default: 0.0)'
)
parser.add_argument(
'--group-by', '-g',
choices=['function', 'module', 'crate'],
default='function',
help='Group results by function, module, or crate (default: function)'
)
parser.add_argument(
'--demangle', '-d',
action='store_true',
help='Simplify Rust/C++ symbol names by removing template parameters'
)
parser.add_argument(
'--sort-by', '-s',
choices=['samples', 'percent'],
default='samples',
help='Sort by samples or percentage (default: samples)'
)
parser.add_argument(
'--stacks',
action='store_true',
help='Show hottest complete stack traces'
)
parser.add_argument(
'--tree',
action='store_true',
help='Show stack tree with hottest paths highlighted'
)
parser.add_argument(
'--max-frames',
type=int,
default=0,
help='Max frames to show per stack before abbreviating (0=no limit, default: 0)'
)
args = parser.parse_args()
try:
entries = extract_titles_from_svg(args.svg_file)
except FileNotFoundError:
print(f"Error: File not found: {args.svg_file}", file=sys.stderr)
sys.exit(1)
except Exception as e:
print(f"Error reading file: {e}", file=sys.stderr)
sys.exit(1)
if not entries:
print("No flamegraph data found in the SVG file.", file=sys.stderr)
sys.exit(1)
total_samples = sum(samples for _, samples, _ in entries)
print(f"Parsed {len(entries)} stack frames")
print(f"Total samples: {total_samples:,}")
print()
if args.stacks or args.tree:
stacks = extract_stacks_from_svg(args.svg_file)
if args.stacks:
output = format_hottest_stacks(stacks, args.top, args.min_percent, args.demangle, args.max_frames)
print(output)
print()
if args.tree:
tree = build_stack_tree(stacks, args.demangle)
output = format_stack_tree(tree, args.top, args.min_percent, total_samples)
print(output)
print()
if not args.stacks:
grouped = group_entries(entries, args.group_by, args.demangle)
output = format_output(grouped, args.top, args.min_percent, args.sort_by)
print(output)
else:
grouped = group_entries(entries, args.group_by, args.demangle)
output = format_output(grouped, args.top, args.min_percent, args.sort_by)
print(output)
if __name__ == '__main__':
main()