import argparse
import json
import subprocess
import sys
from pathlib import Path
from dataclasses import dataclass, field, asdict
from typing import Optional
import re
@dataclass
class ParameterPoint:
pagerank_alpha: float = 0.85
pagerank_chat_multiplier: float = 100.0
depth_weight_root: float = 1.0
depth_weight_moderate: float = 0.5
depth_weight_deep: float = 0.1
depth_weight_vendor: float = 0.01
boost_mentioned_ident: float = 10.0
boost_mentioned_file: float = 5.0
boost_chat_file: float = 20.0
boost_temporal_coupling: float = 3.0
boost_focus_expansion: float = 5.0
git_recency_decay_days: float = 30.0
git_recency_max_boost: float = 10.0
git_churn_threshold: float = 10.0
git_churn_max_boost: float = 5.0
focus_decay: float = 0.5
focus_max_hops: float = 2.0
@dataclass
class RankingFailure:
query: str
seed_file: str
expected_top: list[str] actual_top: list[str] ndcg: float commit_message: str repo_context: dict
@dataclass
class ReasoningEpisode:
failures: list[RankingFailure]
current_params: ParameterPoint
reasoning: str proposed_changes: dict structural_insights: list[str] confidence: float
@dataclass
class Scratchpad:
episodes: list[ReasoningEpisode] = field(default_factory=list)
param_interactions: list[str] = field(default_factory=list)
failure_patterns: list[str] = field(default_factory=list)
success_patterns: list[str] = field(default_factory=list)
structural_proposals: list[str] = field(default_factory=list)
presets: dict = field(default_factory=dict)
heuristics: list[str] = field(default_factory=list)
warnings: list[str] = field(default_factory=list)
def call_claude(prompt: str, timeout: int = 60) -> str:
try:
result = subprocess.run(
["claude", "--print", "-p", prompt],
capture_output=True,
text=True,
timeout=timeout
)
if result.returncode != 0:
print(f"Warning: claude error: {result.stderr[:200]}")
return ""
return result.stdout.strip()
except subprocess.TimeoutExpired:
print("Warning: claude timed out")
return ""
except Exception as e:
print(f"Warning: Error calling claude: {e}")
return ""
def reason_about_failures(
failures: list[RankingFailure],
current_params: ParameterPoint,
scratchpad: Scratchpad
) -> ReasoningEpisode:
failure_desc = "\n\n".join([
f"""FAILURE {i+1}:
Query: "{f.query}"
Seed file: {f.seed_file}
Expected top files: {', '.join(f.expected_top[:5])}
Actual top files: {', '.join(f.actual_top[:5])}
NDCG score: {f.ndcg:.3f}
Commit context: "{f.commit_message}"
Repo: {f.repo_context.get('name', 'unknown')} ({f.repo_context.get('file_count', '?')} files)"""
for i, f in enumerate(failures[:5]) ])
params_desc = "\n".join([
f" {k}: {v}" for k, v in asdict(current_params).items()
])
prior_insights = ""
if scratchpad.param_interactions:
prior_insights += "\nKNOWN PARAMETER INTERACTIONS:\n" + "\n".join(
f" • {i}" for i in scratchpad.param_interactions[-5:]
)
if scratchpad.failure_patterns:
prior_insights += "\nKNOWN FAILURE PATTERNS:\n" + "\n".join(
f" • {p}" for p in scratchpad.failure_patterns[-5:]
)
prompt = f"""You are a REASONING-BASED OPTIMIZER for a code ranking system.
Your task is to analyze ranking failures and propose hyperparameter adjustments.
You are approximating the gradient in CONCEPT SPACE, not parameter space.
The question is not just "what values minimize loss" but "what structure would
make this failure class impossible?"
=== CURRENT PARAMETERS ===
{params_desc}
=== RANKING FAILURES TO ANALYZE ===
{failure_desc}
=== PRIOR KNOWLEDGE (from previous episodes) ===
{prior_insights if prior_insights else "No prior insights yet."}
=== YOUR TASK ===
1. DIAGNOSE: Why did each failure occur? What signal was missing or overwhelming?
2. REASON ABOUT INTERACTIONS: Do any parameter pairs interact to cause this?
(e.g., "low α + high boost_chat = tunnel vision because...")
3. PROPOSE CHANGES: For each parameter that should change, specify:
- Direction: increase/decrease
- Magnitude: small (10%), medium (30%), large (2x)
- Rationale: why this change addresses the failure
4. STRUCTURAL INSIGHTS: Are there patterns that can't be fixed by tuning?
(e.g., "multiplicative combination can't express OR logic")
5. CONFIDENCE: How confident are you in these proposals? (0-1)
=== OUTPUT FORMAT ===
Respond with a structured analysis. Use these exact headers:
## Diagnosis
[Your analysis of why each failure occurred]
## Parameter Interactions
[Any discovered interactions between parameters]
## Proposed Changes
[Format: PARAM_NAME: DIRECTION MAGNITUDE "rationale"]
## Structural Insights
[Patterns that suggest architectural changes, not just tuning]
## Confidence
[A number 0-1 and brief justification]
"""
response = call_claude(prompt, timeout=90)
episode = ReasoningEpisode(
failures=failures,
current_params=current_params,
reasoning=response,
proposed_changes={},
structural_insights=[],
confidence=0.5
)
changes_match = re.search(r'## Proposed Changes\n(.*?)(?=##|\Z)', response, re.DOTALL)
if changes_match:
changes_text = changes_match.group(1)
for line in changes_text.strip().split('\n'):
match = re.match(r'(\w+):\s*(increase|decrease)\s+(\w+)\s+"([^"]+)"', line)
if match:
param, direction, magnitude, rationale = match.groups()
episode.proposed_changes[param] = (direction, magnitude, rationale)
insights_match = re.search(r'## Structural Insights\n(.*?)(?=##|\Z)', response, re.DOTALL)
if insights_match:
for line in insights_match.group(1).strip().split('\n'):
if line.strip() and not line.startswith('#'):
episode.structural_insights.append(line.strip())
conf_match = re.search(r'## Confidence\n([\d.]+)', response)
if conf_match:
try:
episode.confidence = float(conf_match.group(1))
except ValueError:
pass
return episode
def update_scratchpad(scratchpad: Scratchpad, episode: ReasoningEpisode):
scratchpad.episodes.append(episode)
interactions_match = re.search(
r'## Parameter Interactions\n(.*?)(?=##|\Z)',
episode.reasoning,
re.DOTALL
)
if interactions_match:
for line in interactions_match.group(1).strip().split('\n'):
if line.strip() and not line.startswith('#'):
scratchpad.param_interactions.append(line.strip())
scratchpad.structural_proposals.extend(episode.structural_insights)
def apply_proposed_changes(
params: ParameterPoint,
changes: dict
) -> ParameterPoint:
new_params = ParameterPoint(**asdict(params))
magnitude_map = {
'small': 0.1,
'medium': 0.3,
'large': 1.0, }
for param_name, (direction, magnitude, _rationale) in changes.items():
if hasattr(new_params, param_name):
current = getattr(new_params, param_name)
mult = magnitude_map.get(magnitude, 0.3)
if direction == 'increase':
new_val = current * (1 + mult)
else:
new_val = current * (1 - mult)
setattr(new_params, param_name, new_val)
return new_params
def distill_scratchpad(scratchpad: Scratchpad) -> str:
all_interactions = list(set(scratchpad.param_interactions))
all_structural = list(set(scratchpad.structural_proposals))
change_counts = {}
for ep in scratchpad.episodes:
for param, (direction, mag, _) in ep.proposed_changes.items():
key = f"{param}:{direction}"
change_counts[key] = change_counts.get(key, 0) + 1
prompt = f"""You are distilling optimization insights into operator wisdom.
You have accumulated {len(scratchpad.episodes)} reasoning episodes about a code ranking system.
=== DISCOVERED PARAMETER INTERACTIONS ===
{chr(10).join(f"• {i}" for i in all_interactions[:20]) or "None yet"}
=== STRUCTURAL PROPOSALS (beyond tuning) ===
{chr(10).join(f"• {s}" for s in all_structural[:10]) or "None yet"}
=== MOST COMMON PARAMETER CHANGES ===
{chr(10).join(f"• {k}: {v} episodes" for k, v in sorted(change_counts.items(), key=lambda x: -x[1])[:10]) or "None yet"}
=== YOUR TASK ===
Distill these insights into operator wisdom:
1. PRESETS: Name 3-5 configurations for common use cases
Format: preset_name: {{param: value, ...}} "when to use"
2. ADAPTIVE HEURISTICS: Conditionals that adjust params based on context
Format: if CONDITION: ADJUSTMENT "rationale"
3. WARNINGS: Failure modes operators should avoid
Format: ⚠ WARNING: what happens and why
4. INTUITIONS: Prose wisdom that teaches the tool's use
(Write as if explaining to a thoughtful user)
Be specific. Reference actual parameter names and values.
"""
return call_claude(prompt, timeout=120)
def save_scratchpad(scratchpad: Scratchpad, path: Path):
with open(path, 'w') as f:
f.write("# Reasoning Optimization Scratchpad\n\n")
f.write(f"## Summary\n")
f.write(f"- Episodes: {len(scratchpad.episodes)}\n")
f.write(f"- Parameter interactions discovered: {len(scratchpad.param_interactions)}\n")
f.write(f"- Structural proposals: {len(scratchpad.structural_proposals)}\n\n")
f.write("## Parameter Interactions\n")
for interaction in scratchpad.param_interactions:
f.write(f"- {interaction}\n")
f.write("\n")
f.write("## Structural Proposals\n")
for proposal in scratchpad.structural_proposals:
f.write(f"- {proposal}\n")
f.write("\n")
f.write("## Episode History\n")
for i, ep in enumerate(scratchpad.episodes):
f.write(f"\n### Episode {i+1}\n")
f.write(f"Confidence: {ep.confidence}\n")
f.write(f"Proposed changes: {len(ep.proposed_changes)}\n")
if ep.proposed_changes:
for param, (dir, mag, rat) in ep.proposed_changes.items():
f.write(f" - {param}: {dir} {mag} \"{rat}\"\n")
def main():
parser = argparse.ArgumentParser(
description="Reasoning-based hyperparameter optimization"
)
subparsers = parser.add_subparsers(dest='command')
opt_parser = subparsers.add_parser('optimize', help='Run optimization episode')
opt_parser.add_argument('--failures', type=Path, required=True,
help='JSON file with ranking failures')
opt_parser.add_argument('--params', type=Path,
help='Current parameter JSON')
opt_parser.add_argument('--scratchpad', type=Path, default=Path('tmp/scratchpad.md'),
help='Scratchpad file to accumulate insights')
opt_parser.add_argument('--output', type=Path,
help='Output new params JSON')
dist_parser = subparsers.add_parser('distill', help='Distill scratchpad into wisdom')
dist_parser.add_argument('--scratchpad', type=Path, required=True,
help='Scratchpad to distill')
dist_parser.add_argument('--output', type=Path, default=Path('tmp/intuitions.md'),
help='Output wisdom file')
demo_parser = subparsers.add_parser('demo', help='Run demo with synthetic failures')
args = parser.parse_args()
if args.command == 'demo':
failures = [
RankingFailure(
query="auth",
seed_file="src/auth/login.rs",
expected_top=["src/auth/session.rs", "src/auth/token.rs"],
actual_top=["src/main.rs", "README.md", "Cargo.toml"],
ndcg=0.23,
commit_message="fix authentication token refresh",
repo_context={"name": "example-app", "file_count": 150}
),
RankingFailure(
query="database",
seed_file="src/db/connection.rs",
expected_top=["src/db/pool.rs", "src/db/query.rs"],
actual_top=["src/db/connection.rs", "src/config.rs"],
ndcg=0.45,
commit_message="optimize connection pooling",
repo_context={"name": "example-app", "file_count": 150}
),
]
scratchpad = Scratchpad()
params = ParameterPoint()
print("=== REASONING EPISODE ===\n")
episode = reason_about_failures(failures, params, scratchpad)
print("Reasoning:")
print(episode.reasoning[:2000] + "..." if len(episode.reasoning) > 2000 else episode.reasoning)
print(f"\nConfidence: {episode.confidence}")
print(f"Proposed changes: {len(episode.proposed_changes)}")
for param, (d, m, r) in episode.proposed_changes.items():
print(f" {param}: {d} {m} - \"{r}\"")
update_scratchpad(scratchpad, episode)
save_scratchpad(scratchpad, Path('tmp/scratchpad_demo.md'))
print(f"\nScratchpad saved to tmp/scratchpad_demo.md")
elif args.command == 'optimize':
with open(args.failures) as f:
failures_data = json.load(f)
failures = [RankingFailure(**f) for f in failures_data]
if args.params and args.params.exists():
with open(args.params) as f:
params = ParameterPoint(**json.load(f))
else:
params = ParameterPoint()
scratchpad = Scratchpad()
episode = reason_about_failures(failures, params, scratchpad)
update_scratchpad(scratchpad, episode)
new_params = apply_proposed_changes(params, episode.proposed_changes)
if args.output:
with open(args.output, 'w') as f:
json.dump(asdict(new_params), f, indent=2)
save_scratchpad(scratchpad, args.scratchpad)
print(f"Episode complete. Confidence: {episode.confidence}")
print(f"Changes: {len(episode.proposed_changes)}")
elif args.command == 'distill':
scratchpad = Scratchpad()
wisdom = distill_scratchpad(scratchpad)
with open(args.output, 'w') as f:
f.write("# Distilled Operator Wisdom\n\n")
f.write(wisdom)
print(f"Wisdom distilled to {args.output}")
else:
parser.print_help()
if __name__ == "__main__":
main()