#!/usr/bin/env python3
"""
Test: compaction summary via KV cache reuse.
Simulates appending a summary instruction to an existing conversation
(with tools defined) and verifies each model:
1. Generates a structured summary (not a tool call)
2. Follows the 8-section format
3. Does NOT call any tools despite tools being present
Usage:
python tests/test_compaction_kvcache.py
"""
import json, time, os, sys
# --- Config ---
OLLAMA_URL = "http://macstudio.local"
GATE_ROUTER_URL = "https://api.gaterouter.ai/openai/v1"
MODELS = [
# Direct cloud models (need individual API keys)
{"name": "deepseek/deepseek-chat", "base": "https://api.deepseek.com/v1", "key_env": "DEEPSEEK_API_KEY"},
{"name": "doubao/doubao-seed-2-0-pro-260215", "base": "https://ark.cn-beijing.volces.com/api/v3", "key_env": "ARK_API_KEY"},
# GateRouter models (single key, multiple providers)
{"name": "google/gemini-2.5-flash", "base": GATE_ROUTER_URL, "key_env": "GATE_ROUTER_KEY"},
{"name": "google/gemini-3-flash-preview", "base": GATE_ROUTER_URL, "key_env": "GATE_ROUTER_KEY"},
{"name": "minimax/minimax-m2.5", "base": GATE_ROUTER_URL, "key_env": "GATE_ROUTER_KEY"},
{"name": "google/gemini-3.1-pro-preview", "base": GATE_ROUTER_URL, "key_env": "GATE_ROUTER_KEY"},
{"name": "z-ai/glm-5-turbo", "base": GATE_ROUTER_URL, "key_env": "GATE_ROUTER_KEY"},
{"name": "moonshotai/kimi-k2.5", "base": GATE_ROUTER_URL, "key_env": "GATE_ROUTER_KEY"},
{"name": "anthropic/claude-sonnet-4.6", "base": GATE_ROUTER_URL, "key_env": "GATE_ROUTER_KEY"},
# Remote llama.cpp
{"name": "llama/llama", "base": "http://218.22.75.183:8000/v1", "key_env": "LLAMA_REMOTE_KEY"},
# Local ollama models
{"name": "ollama/qwen3.5:9b", "base": OLLAMA_URL, "key_env": None},
{"name": "ollama/qwen3.5:27b", "base": OLLAMA_URL, "key_env": None},
{"name": "ollama/gemma4:26b", "base": OLLAMA_URL, "key_env": None},
]
# Simulated multi-turn conversation
CONVERSATION = [
{"role": "user", "content": "帮我写一个Python爬虫,抓取豆瓣电影Top250的数据"},
{"role": "assistant", "content": "好的,我来帮你写一个豆瓣电影Top250的爬虫。让我先创建项目结构。"},
{"role": "assistant", "content": None, "tool_calls": [{"id": "call_1", "type": "function", "function": {"name": "execute_command", "arguments": "{\"command\":\"mkdir -p douban_spider && cd douban_spider\"}"}}]},
{"role": "tool", "tool_call_id": "call_1", "content": "目录已创建"},
{"role": "assistant", "content": None, "tool_calls": [{"id": "call_2", "type": "function", "function": {"name": "write_file", "arguments": "{\"path\":\"douban_spider/spider.py\",\"content\":\"import requests\\nfrom bs4 import BeautifulSoup\\n\\ndef crawl_top250():\\n results = []\\n for page in range(0, 250, 25):\\n url = f'https://movie.douban.com/top250?start={page}'\\n resp = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})\\n soup = BeautifulSoup(resp.text, 'html.parser')\\n for item in soup.select('.item'):\\n title = item.select_one('.title').text\\n rating = item.select_one('.rating_num').text\\n results.append({'title': title, 'rating': rating})\\n return results\\n\"}"}}]},
{"role": "tool", "tool_call_id": "call_2", "content": "文件已写入: douban_spider/spider.py"},
{"role": "assistant", "content": "爬虫脚本已创建。现在让我运行测试一下。"},
{"role": "assistant", "content": None, "tool_calls": [{"id": "call_3", "type": "function", "function": {"name": "execute_command", "arguments": "{\"command\":\"cd douban_spider && python spider.py\"}"}}]},
{"role": "tool", "tool_call_id": "call_3", "content": "抓取完成,共250部电影。前3部:\n1. 肖申克的救赎 9.7\n2. 霸王别姬 9.6\n3. 阿甘正传 9.5"},
{"role": "assistant", "content": "爬虫运行成功!已抓取250部电影数据。前三名是肖申克的救赎(9.7)、霸王别姬(9.6)、阿甘正传(9.5)。需要我把数据导出为CSV吗?"},
{"role": "user", "content": "好的,导出CSV,然后帮我分析一下评分分布"},
{"role": "assistant", "content": None, "tool_calls": [{"id": "call_4", "type": "function", "function": {"name": "write_file", "arguments": "{\"path\":\"douban_spider/export.py\",\"content\":\"import csv\\nfrom spider import crawl_top250\\n\\ndata = crawl_top250()\\nwith open('top250.csv', 'w', newline='') as f:\\n writer = csv.DictWriter(f, fieldnames=['title', 'rating'])\\n writer.writeheader()\\n writer.writerows(data)\\nprint(f'导出完成: {len(data)}条')\"}"}}]},
{"role": "tool", "tool_call_id": "call_4", "content": "文件已写入: douban_spider/export.py"},
{"role": "assistant", "content": None, "tool_calls": [{"id": "call_5", "type": "function", "function": {"name": "execute_command", "arguments": "{\"command\":\"cd douban_spider && python export.py\"}"}}]},
{"role": "tool", "tool_call_id": "call_5", "content": "导出完成: 250条"},
{"role": "assistant", "content": "CSV已导出。让我分析评分分布。"},
{"role": "assistant", "content": None, "tool_calls": [{"id": "call_6", "type": "function", "function": {"name": "execute_command", "arguments": "{\"command\":\"cd douban_spider && python -c \\\"import csv; data=[float(r['rating']) for r in csv.DictReader(open('top250.csv'))]; print(f'平均:{sum(data)/len(data):.1f}'); print(f'最高:{max(data)}'); print(f'最低:{min(data)}'); bins={f'{i/10:.0f}分段':sum(1 for d in data if i/10<=d<(i+1)/10) for i in range(80,100)}; print(bins)\\\"\"}"}}]},
{"role": "tool", "tool_call_id": "call_6", "content": "平均:8.9\n最高:9.7\n最低:8.2\n{'8分段': 45, '9分段': 205}"},
{"role": "assistant", "content": "分析完成!豆瓣Top250评分分布:\n- 平均分: 8.9\n- 最高: 9.7 (肖申克的救赎)\n- 最低: 8.2\n- 9分以上: 205部 (82%)\n- 8分段: 45部 (18%)\n\n大部分影片都在9分以上,说明Top250的门槛很高。"},
{"role": "user", "content": "不错!再帮我加一个功能:抓取每部电影的导演和年份信息"},
]
# Tools definition (same as what RsClaw sends)
TOOLS = [
{
"type": "function",
"function": {
"name": "execute_command",
"description": "Run a shell command",
"parameters": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}
}
},
{
"type": "function",
"function": {
"name": "write_file",
"description": "Write content to a file",
"parameters": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}
}
},
{
"type": "function",
"function": {
"name": "read_file",
"description": "Read a file",
"parameters": {"type": "object", "properties": {"path": {"type": "string"}}, "required": ["path"]}
}
},
{
"type": "function",
"function": {
"name": "memory",
"description": "Search or store long-term memory",
"parameters": {"type": "object", "properties": {"action": {"type": "string"}, "query": {"type": "string"}}, "required": ["action"]}
}
},
{
"type": "function",
"function": {
"name": "web_search",
"description": "Search the web",
"parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}
}
},
]
SUMMARY_INSTRUCTION = """请将以上对话压缩为结构化摘要。不要调用任何工具,只输出文本摘要。
按以下格式输出:
## Active Task
[当前正在做什么]
## Goal
[最终目标]
## Completed
[已完成的操作和结果]
## In Progress
[正在进行、未完成的]
## Key Data
[必须原样保留的值: 路径/文件名/数字/关键输出]
## Decisions
[已做的技术决策]
## Pending
[等用户确认的、被阻塞的]
## Files
[涉及的文件和修改]
注意: 只输出摘要文本,不要调用任何工具。"""
def call_openai_compat(name: str, base: str, key: str, model_id: str, messages: list) -> dict:
"""Call OpenAI-compatible API."""
import urllib.request
payload = {
"model": model_id,
"messages": messages,
"tools": TOOLS,
"stream": False,
"max_tokens": 2000,
"temperature": 0,
}
# Disable thinking for providers that support it
if "doubao" in name or "ark" in base:
payload["thinking"] = {"type": "disabled"}
body = json.dumps(payload).encode()
req = urllib.request.Request(
f"{base}/chat/completions",
data=body,
headers={
"Authorization": f"Bearer {key}",
"Content-Type": "application/json",
},
)
t0 = time.perf_counter()
try:
with urllib.request.urlopen(req, timeout=120) as resp:
data = json.loads(resp.read())
except urllib.request.HTTPError as e:
err_body = e.read().decode(errors="replace")[:500]
raise RuntimeError(f"HTTP {e.code}: {err_body}") from e
elapsed = time.perf_counter() - t0
choice = data.get("choices", [{}])[0]
msg = choice.get("message", {})
return {
"has_tool_call": bool(msg.get("tool_calls")),
"content": msg.get("content") or "",
"usage": data.get("usage", {}),
"time": elapsed,
}
def call_ollama_native(name: str, base: str, model_id: str, messages: list) -> dict:
"""Call Ollama native /api/chat (supports think=false)."""
import urllib.request
# Convert OpenAI tool format to Ollama format
ollama_tools = []
for t in TOOLS:
ollama_tools.append({
"type": "function",
"function": t["function"],
})
# Ollama requires tool_call arguments as objects, not strings.
# Also remove tool_call_id from tool messages (use content only).
ollama_msgs = []
for m in messages:
msg = dict(m)
if "tool_calls" in msg and msg["tool_calls"]:
new_calls = []
for tc in msg["tool_calls"]:
tc = dict(tc)
func = dict(tc.get("function", {}))
args = func.get("arguments", "{}")
if isinstance(args, str):
try:
func["arguments"] = json.loads(args)
except json.JSONDecodeError:
func["arguments"] = {}
tc["function"] = func
new_calls.append(tc)
msg["tool_calls"] = new_calls
if msg.get("content") is None:
msg["content"] = ""
ollama_msgs.append(msg)
body = json.dumps({
"model": model_id,
"messages": ollama_msgs,
"tools": ollama_tools,
"stream": False,
"think": False,
"options": {"temperature": 0, "num_predict": 2000},
}).encode()
req = urllib.request.Request(
f"{base}/api/chat",
data=body,
headers={"Content-Type": "application/json"},
)
t0 = time.perf_counter()
with urllib.request.urlopen(req, timeout=300) as resp:
data = json.loads(resp.read())
elapsed = time.perf_counter() - t0
msg = data.get("message", {})
return {
"has_tool_call": bool(msg.get("tool_calls")),
"content": msg.get("content") or "",
"usage": {
"prompt_tokens": data.get("prompt_eval_count", 0),
"completion_tokens": data.get("eval_count", 0),
},
"time": elapsed,
}
def call_model(model_cfg: dict) -> dict:
"""Call a model and return result dict."""
name = model_cfg["name"]
base = model_cfg["base"]
is_ollama = name.startswith("ollama/")
model_id = name.split("/", 1)[1] if "/" in name else name
messages = list(CONVERSATION)
messages.append({"role": "user", "content": SUMMARY_INSTRUCTION})
try:
if is_ollama:
r = call_ollama_native(name, base, model_id, messages)
else:
key = os.environ.get(model_cfg["key_env"] or "", "") or ""
r = call_openai_compat(name, base, key, model_id, messages)
except Exception as e:
return {"name": name, "error": str(e), "time": 0}
# Strip any residual <think> tags from content
import re
content = re.sub(
r"<(?:think|thinking|reasoning)>[\s\S]*?</(?:think|thinking|reasoning)>",
"", r["content"], flags=re.IGNORECASE,
).strip()
# Check summary quality
sections_found = []
for section in ["Active Task", "Goal", "Completed", "In Progress", "Key Data", "Decisions", "Pending", "Files"]:
if section.lower() in content.lower():
sections_found.append(section)
return {
"name": name,
"time": r["time"],
"has_tool_call": r["has_tool_call"],
"content_len": len(content),
"sections": len(sections_found),
"sections_found": sections_found,
"usage": r["usage"],
"content_preview": content[:500] if content else "(empty)",
"error": None,
}
def main():
print("=" * 70)
print(" Compaction Summary via KV Cache Reuse - Model Test")
print("=" * 70)
print(f" Conversation: {len(CONVERSATION)} messages")
print(f" Tools: {len(TOOLS)} defined")
print(f" Test: append summary instruction, expect text (not tool call)")
print()
results = []
for cfg in MODELS:
name = cfg["name"]
key_env = cfg["key_env"]
if key_env and not os.environ.get(key_env):
print(f" SKIP {name} ({key_env} not set)")
continue
if "ollama" in name:
# Quick check if ollama is reachable
try:
import urllib.request
urllib.request.urlopen(f"{OLLAMA_URL}/api/tags", timeout=3)
except Exception:
print(f" SKIP {name} (ollama not reachable)")
continue
print(f" Testing {name}...", end=" ", flush=True)
r = call_model(cfg)
results.append(r)
if r["error"]:
print(f"ERROR: {r['error'][:80]}")
else:
status = "TOOL_CALL!" if r["has_tool_call"] else "OK"
print(f"{status} {r['time']:.1f}s sections={r['sections']}/8 "
f"len={r['content_len']} "
f"tokens={r['usage'].get('prompt_tokens', '?')}/{r['usage'].get('completion_tokens', '?')}")
print()
print("=" * 70)
print(" Results Summary")
print("=" * 70)
print(f" {'Model':<40} {'Time':>6} {'Tool?':>6} {'Sect':>5} {'Len':>6}")
print(f" {'-'*40} {'-'*6} {'-'*6} {'-'*5} {'-'*6}")
for r in results:
if r["error"]:
print(f" {r['name']:<40} {'ERROR':>6}")
else:
tc = "YES!" if r["has_tool_call"] else "no"
print(f" {r['name']:<40} {r['time']:>5.1f}s {tc:>6} {r['sections']:>4}/8 {r['content_len']:>6}")
print()
# Print detailed output for each model
for r in results:
if r["error"] or not r.get("content_preview"):
continue
print(f"--- {r['name']} ---")
print(r["content_preview"])
if r["content_len"] > 500:
print(f" ... ({r['content_len']} chars total)")
print()
# Verdict
print("=" * 70)
print(" Verdict")
print("=" * 70)
for r in results:
if r["error"]:
verdict = "SKIP (error)"
elif r["has_tool_call"]:
verdict = "FAIL - called tools instead of generating summary"
elif r["sections"] >= 6:
verdict = "PASS - good structured summary"
elif r["sections"] >= 4:
verdict = "PARTIAL - some sections missing"
elif r["content_len"] > 100:
verdict = "WEAK - generated text but poor structure"
else:
verdict = "FAIL - empty or useless response"
print(f" {r['name']:<40} {verdict}")
# --- Iterative compaction test ---
# New conversation turns that happen AFTER the first compaction
POST_COMPACT_TURNS = [
{"role": "assistant", "content": None, "tool_calls": [{"id": "call_7", "type": "function", "function": {"name": "write_file", "arguments": "{\"path\":\"douban_spider/spider.py\",\"content\":\"import requests\\nfrom bs4 import BeautifulSoup\\n\\ndef crawl_top250():\\n results = []\\n for page in range(0, 250, 25):\\n url = f'https://movie.douban.com/top250?start={page}'\\n resp = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})\\n soup = BeautifulSoup(resp.text, 'html.parser')\\n for item in soup.select('.item'):\\n title = item.select_one('.title').text\\n rating = item.select_one('.rating_num').text\\n info = item.select_one('.bd p').text.strip()\\n director = info.split('\\\\n')[0].replace('导演:', '').strip()\\n year = info.split('\\\\n')[1].strip()[:4]\\n results.append({'title': title, 'rating': rating, 'director': director, 'year': year})\\n return results\"}"}}]},
{"role": "tool", "tool_call_id": "call_7", "content": "文件已更新: douban_spider/spider.py"},
{"role": "assistant", "content": None, "tool_calls": [{"id": "call_8", "type": "function", "function": {"name": "execute_command", "arguments": "{\"command\":\"cd douban_spider && python spider.py\"}"}}]},
{"role": "tool", "tool_call_id": "call_8", "content": "抓取完成,共250部电影。前3部:\n1. 肖申克的救赎 9.7 导演:弗兰克·德拉邦特 年份:1994\n2. 霸王别姬 9.6 导演:陈凯歌 年份:1993\n3. 阿甘正传 9.5 导演:罗伯特·泽米吉斯 年份:1994"},
{"role": "assistant", "content": "导演和年份抓取成功!已更新spider.py。现在每部电影都有标题、评分、导演和年份信息了。"},
{"role": "user", "content": "很好!现在把导出脚本也更新一下,加上导演和年份列"},
{"role": "assistant", "content": None, "tool_calls": [{"id": "call_9", "type": "function", "function": {"name": "write_file", "arguments": "{\"path\":\"douban_spider/export.py\",\"content\":\"import csv\\nfrom spider import crawl_top250\\n\\ndata = crawl_top250()\\nwith open('top250.csv', 'w', newline='') as f:\\n writer = csv.DictWriter(f, fieldnames=['title', 'rating', 'director', 'year'])\\n writer.writeheader()\\n writer.writerows(data)\\nprint(f'导出完成: {len(data)}条')\"}"}}]},
{"role": "tool", "tool_call_id": "call_9", "content": "文件已更新: douban_spider/export.py"},
{"role": "assistant", "content": "导出脚本已更新,CSV现在包含title, rating, director, year四列。需要运行测试吗?"},
{"role": "user", "content": "运行一下,然后统计一下导演出现次数最多的前5名"},
]
ITERATIVE_SUMMARY_INSTRUCTION = """请更新以下摘要,整合新增的对话内容。不要调用任何工具,只输出文本摘要。
规则:
- 保留旧摘要中仍然有效的信息
- 将已完成的工作从"In Progress"移到"Completed"
- 新增已完成的操作到 Completed 列表(继续编号)
- 更新 Active Task 为最新的未完成请求
- 更新 Key Data 中变化的数据
- 更新 Files 中新增或修改的文件
按以下格式输出:
## Active Task
[当前正在做什么]
## Goal
[最终目标]
## Completed
[已完成的操作和结果]
## In Progress
[正在进行、未完成的]
## Key Data
[必须原样保留的值]
## Decisions
[已做的技术决策]
## Pending
[等用户确认的、被阻塞的]
## Files
[涉及的文件和修改]
注意: 只输出摘要文本,不要调用任何工具。"""
def call_model_iterative(model_cfg: dict, first_summary: str) -> dict:
"""Test iterative compaction: [head 3] + [first summary] + [new turns] + [update instruction]."""
name = model_cfg["name"]
base = model_cfg["base"]
is_ollama = name.startswith("ollama/")
model_id = name.split("/", 1)[1] if "/" in name else name
# Build: head(2) + summary + new turns (text only) + update instruction
# Only take first 2 messages (user + assistant text) to avoid
# orphaned tool_calls that some providers reject.
# Convert tool_call turns to plain text descriptions so all providers
# can process them without strict tool_call pairing requirements.
head = CONVERSATION[:2] # user + assistant text reply
summary_msg = {"role": "user", "content": f"[Conversation history compacted]\n{first_summary}"}
new_turns = []
for m in POST_COMPACT_TURNS:
if m.get("tool_calls"):
# Convert tool_call to text description
descs = []
for tc in m["tool_calls"]:
fn = tc.get("function", {})
descs.append(f"[Called {fn.get('name','?')}({fn.get('arguments','')[:80]})]")
new_turns.append({"role": "assistant", "content": " ".join(descs)})
elif m.get("role") == "tool":
# Convert tool result to assistant text
new_turns.append({"role": "assistant", "content": f"[Tool result] {m.get('content','')}"})
else:
new_turns.append(m)
messages = head + [summary_msg] + new_turns
messages.append({"role": "user", "content": ITERATIVE_SUMMARY_INSTRUCTION})
try:
if is_ollama:
r = call_ollama_native(name, base, model_id, messages)
else:
key = os.environ.get(model_cfg["key_env"] or "", "") or ""
r = call_openai_compat(name, base, key, model_id, messages)
except Exception as e:
return {"name": name, "error": str(e), "time": 0}
import re
content = re.sub(
r"<(?:think|thinking|reasoning)>[\s\S]*?</(?:think|thinking|reasoning)>",
"", r["content"], flags=re.IGNORECASE,
).strip()
# Check: did it preserve old info AND add new info?
has_old_data = "8.9" in content or "250" in content # old stats preserved
has_new_data = "导演" in content or "director" in content.lower() or "年份" in content # new feature
has_new_completed = "spider.py" in content and ("更新" in content or "修改" in content or "导演" in content)
sections_found = []
for section in ["Active Task", "Goal", "Completed", "In Progress", "Key Data", "Decisions", "Pending", "Files"]:
if section.lower() in content.lower():
sections_found.append(section)
return {
"name": name,
"time": r["time"],
"has_tool_call": r["has_tool_call"],
"content_len": len(content),
"sections": len(sections_found),
"has_old_data": has_old_data,
"has_new_data": has_new_data,
"has_new_completed": has_new_completed,
"usage": r["usage"],
"content_preview": content[:600] if content else "(empty)",
"error": None,
}
def test_iterative(first_results: list):
"""Run iterative compaction test using first summary results."""
print()
print("=" * 70)
print(" Iterative Compaction Test (2nd summary updates 1st)")
print("=" * 70)
print(f" Head: 3 messages | New turns: {len(POST_COMPACT_TURNS)} messages")
print(f" Test: update existing summary with new progress")
print()
for r in first_results:
if r["error"] or not r.get("content_preview"):
continue
name = r["name"]
cfg = next((m for m in MODELS if m["name"] == name), None)
if not cfg:
continue
first_summary = r["content_preview"]
if r["content_len"] > 500:
# Need full content - reuse preview (truncated but good enough for test)
first_summary = r["content_preview"]
print(f" Testing {name}...", end=" ", flush=True)
ir = call_model_iterative(cfg, first_summary)
if ir["error"]:
print(f"ERROR: {ir['error'][:80]}")
else:
old = "yes" if ir["has_old_data"] else "NO!"
new = "yes" if ir["has_new_data"] else "NO!"
tc = "TOOL!" if ir["has_tool_call"] else "ok"
print(f"{tc} {ir['time']:.1f}s sect={ir['sections']}/8 "
f"old_data={old} new_data={new} len={ir['content_len']}")
print()
print(" Legend: old_data=preserved stats from 1st summary, new_data=incorporated new features")
def main():
print("=" * 70)
print(" Compaction Summary via KV Cache Reuse - Model Test")
print("=" * 70)
print(f" Conversation: {len(CONVERSATION)} messages")
print(f" Tools: {len(TOOLS)} defined")
print(f" Test: append summary instruction, expect text (not tool call)")
print()
results = []
for cfg in MODELS:
name = cfg["name"]
key_env = cfg["key_env"]
if key_env and not os.environ.get(key_env):
print(f" SKIP {name} ({key_env} not set)")
continue
if "ollama" in name:
try:
import urllib.request
urllib.request.urlopen(f"{OLLAMA_URL}/api/tags", timeout=3)
except Exception:
print(f" SKIP {name} (ollama not reachable)")
continue
print(f" Testing {name}...", end=" ", flush=True)
r = call_model(cfg)
results.append(r)
if r["error"]:
print(f"ERROR: {r['error'][:80]}")
else:
status = "TOOL_CALL!" if r["has_tool_call"] else "OK"
print(f"{status} {r['time']:.1f}s sections={r['sections']}/8 "
f"len={r['content_len']} "
f"tokens={r['usage'].get('prompt_tokens', '?')}/{r['usage'].get('completion_tokens', '?')}")
# Print summary table
print()
print("=" * 70)
print(" Round 1 Results")
print("=" * 70)
print(f" {'Model':<40} {'Time':>6} {'Tool?':>6} {'Sect':>5} {'Len':>6}")
print(f" {'-'*40} {'-'*6} {'-'*6} {'-'*5} {'-'*6}")
for r in results:
if r["error"]:
print(f" {r['name']:<40} {'ERROR':>6}")
else:
tc = "YES!" if r["has_tool_call"] else "no"
print(f" {r['name']:<40} {r['time']:>5.1f}s {tc:>6} {r['sections']:>4}/8 {r['content_len']:>6}")
# Round 2: iterative compaction
test_iterative(results)
print()
print(f"{'='*70}\nDone {time.strftime('%H:%M:%S')}")
if __name__ == "__main__":
main()