calcit 0.12.30

Interpreter and js codegen for Calcit
Documentation
#!/usr/bin/env python3

import argparse
import os
import re
import subprocess
import sys
import tempfile
import xml.etree.ElementTree as ET
from collections import Counter
from pathlib import Path


def export_time_profile_xml(trace_path: Path, xml_path: Path) -> None:
  cmd = [
    "xctrace",
    "export",
    "--input",
    str(trace_path),
    "--xpath",
    '/trace-toc/run[@number="1"]/data/table[@schema="time-profile"]',
    "--output",
    str(xml_path),
  ]
  result = subprocess.run(cmd, capture_output=True, text=True)
  if result.returncode != 0:
    stderr = result.stderr.strip() or "(no stderr)"
    raise RuntimeError(f"xctrace export failed: {stderr}")


def parse_frame_names(xml_path: Path) -> Counter[str]:
  counts: Counter[str] = Counter()
  for _, elem in ET.iterparse(xml_path, events=("end",)):
    if elem.tag == "frame":
      name = elem.attrib.get("name")
      if name:
        counts[name] += 1
    elem.clear()
  return counts


def compile_patterns(patterns: list[str]) -> list[re.Pattern[str]]:
  compiled = []
  for pattern in patterns:
    compiled.append(re.compile(pattern))
  return compiled


def filter_counts(
  counts: Counter[str],
  include_patterns: list[re.Pattern[str]],
  exclude_patterns: list[re.Pattern[str]],
) -> Counter[str]:
  if not include_patterns and not exclude_patterns:
    return counts

  filtered: Counter[str] = Counter()
  for name, count in counts.items():
    if include_patterns and not any(regex.search(name) for regex in include_patterns):
      continue
    if exclude_patterns and any(regex.search(name) for regex in exclude_patterns):
      continue
    filtered[name] = count
  return filtered


def summarize_by_prefix(counts: Counter[str]) -> list[tuple[str, int]]:
  group = Counter()
  for name, count in counts.items():
    parts = name.split("::")
    if len(parts) >= 2:
      key = "::".join(parts[:2])
    else:
      key = parts[0]
    group[key] += count
  return group.most_common(10)


def derive_hints(counts: Counter[str]) -> list[str]:
  joined = "\n".join(counts.keys())
  hints: list[str] = []
  if re.search(r"calcit::runner::(call_expr|evaluate_expr|run_fn_owned)", joined):
    hints.append("解释器调度路径是主要热点,可优先检查 `call_expr`/`evaluate_expr` 的分支和数据转换。")
  if re.search(r"CalcitProc::get_type_signature|check_proc_arity|type_annotation", joined):
    hints.append("运行时类型签名/arity 检查占比不低,可考虑缓存签名结果或减少重复检查路径。")
  if re.search(r"im_ternary_tree::.*to_vec|CalcitList::to_vec|drop_left", joined):
    hints.append("持久结构与 Vec 转换频繁,建议减少 `to_vec` 往返或批量化访问。")
  if re.search(r"alloc::|RawVec|drop::|triomphe::|rpds::", joined):
    hints.append("分配与释放开销明显,建议优先减少短生命周期对象和临时容器创建。")
  return hints


def parse_args() -> argparse.Namespace:
  parser = argparse.ArgumentParser(
    description="Summarize xctrace Time Profiler output into LLM-friendly hotspot text."
  )
  parser.add_argument("--trace", type=Path, help="Path to .trace bundle generated by xctrace")
  parser.add_argument("--xml", type=Path, help="Path to exported time-profile XML")
  parser.add_argument("--top", type=int, default=30, help="Number of top hotspot functions")
  parser.add_argument(
    "--include",
    action="append",
    default=[],
    help="Regex include filter for symbol names (repeatable)",
  )
  parser.add_argument(
    "--exclude",
    action="append",
    default=[],
    help="Regex exclude filter for symbol names (repeatable)",
  )
  parser.add_argument(
    "--keep-xml",
    action="store_true",
    help="Keep temporary XML when using --trace",
  )
  args = parser.parse_args()
  if not args.trace and not args.xml:
    parser.error("One of --trace or --xml is required")
  if args.trace and args.xml:
    parser.error("Use only one of --trace or --xml")
  if args.top <= 0:
    parser.error("--top must be > 0")
  return args


def main() -> int:
  args = parse_args()

  xml_path: Path
  temp_dir = None
  if args.trace:
    if not args.trace.exists():
      print(f"Trace file not found: {args.trace}", file=sys.stderr)
      return 2
    temp_dir = tempfile.TemporaryDirectory(prefix="xctrace-export-")
    xml_path = Path(temp_dir.name) / "time-profile.xml"
    try:
      export_time_profile_xml(args.trace, xml_path)
    except RuntimeError as error:
      print(str(error), file=sys.stderr)
      return 3
    if args.keep_xml:
      kept = args.trace.parent / f"{args.trace.name}.time-profile.xml"
      kept.write_bytes(xml_path.read_bytes())
      print(f"Saved exported XML to: {kept}")
  else:
    xml_path = args.xml
    if not xml_path.exists():
      print(f"XML file not found: {xml_path}", file=sys.stderr)
      return 2

  counts = parse_frame_names(xml_path)
  include_patterns = compile_patterns(args.include)
  exclude_patterns = compile_patterns(args.exclude)
  filtered = filter_counts(counts, include_patterns, exclude_patterns)

  total_frames = sum(counts.values())
  filtered_frames = sum(filtered.values())
  print(f"Source XML: {xml_path}")
  print(f"Total named frames: {total_frames}")
  print(f"Frames after filter: {filtered_frames}")
  print()

  print(f"Top {args.top} Hotspots")
  print("-" * 72)
  if not filtered:
    print("(no matched symbols)")
  else:
    for name, count in filtered.most_common(args.top):
      ratio = (count / filtered_frames * 100.0) if filtered_frames else 0.0
      print(f"{count:6d}  {ratio:6.2f}%  {name}")

  print()
  print("Top Prefix Groups")
  print("-" * 72)
  for group, count in summarize_by_prefix(filtered):
    ratio = (count / filtered_frames * 100.0) if filtered_frames else 0.0
    print(f"{count:6d}  {ratio:6.2f}%  {group}")

  print()
  print("Optimization Hints")
  print("-" * 72)
  hints = derive_hints(filtered)
  if not hints:
    print("- 无明显通用模式,请结合 Top Hotspots 逐个函数分析。")
  else:
    for hint in hints:
      print(f"- {hint}")

  if temp_dir is not None and not args.keep_xml:
    temp_dir.cleanup()
  return 0


if __name__ == "__main__":
  raise SystemExit(main())