import shutil
from pathlib import Path
from .ab import (
ABConfig,
build_claude_cmd,
build_codeix_cmd,
build_mcp_config,
build_prompt,
run as run_ab,
)
from .common import (
RunContext,
clone_repo_to,
build_index,
get_binary_version,
get_codeix_bin,
get_claude_version,
get_repo_by_name,
)
def run(question_id: str | None = None) -> list[dict]:
def setup_run(ctx: RunContext) -> tuple[str, str]:
dev_src = get_codeix_bin()
if not dev_src:
raise RuntimeError("No local codeix build found. Run 'cargo build --release' first.")
version_a = get_binary_version(Path(dev_src))
bin_a = ctx.bin_dir / f"codeix-{version_a}"
shutil.copy2(dev_src, bin_a)
bin_a.chmod(0o755)
bin_b = "claude"
return str(bin_a), bin_b
def setup_a(q: dict, ctx: RunContext) -> bool:
repo = get_repo_by_name(q["project"])
if not repo:
return False
dest = ctx.repos_a / q["project"]
if not clone_repo_to(repo, dest):
return False
dev_src = get_codeix_bin()
version_a = get_binary_version(Path(dev_src)) if dev_src else "unknown"
bin_a = ctx.bin_dir / f"codeix-{version_a}"
return build_index(str(bin_a), dest)
def setup_b(q: dict, ctx: RunContext) -> bool:
repo = get_repo_by_name(q["project"])
if not repo:
return False
dest = ctx.repos_b / q["project"]
return clone_repo_to(repo, dest)
def get_commands(q: dict, ctx: RunContext) -> tuple[list[str], Path, list[str], Path]:
dev_src = get_codeix_bin()
version_a = get_binary_version(Path(dev_src)) if dev_src else "unknown"
version_b = get_claude_version()
bin_name_a = f"codeix-{version_a}"
cwd_a = ctx.repos_a / q["project"]
cwd_b = ctx.repos_b / q["project"]
prompt = build_prompt(q["project"], q["question"])
cmd_a = build_codeix_cmd(build_mcp_config(bin_name_a), prompt)
prompt_b = f"[claude:{version_b}] {q['question']}"
cmd_b = build_claude_cmd(prompt_b)
return cmd_a, cwd_a, cmd_b, cwd_b
config = ABConfig(
name="search-value benchmark",
label_a="codeix-dev",
label_b="claude",
title="CODEIX VALUE BENCHMARK",
setup_run=setup_run,
get_commands=get_commands,
setup_a=setup_a,
setup_b=setup_b,
extra_judge_fields=', "codeix_value": "high|medium|low|none"',
)
return run_ab(config, question_id)