batuta/agent/
code.rs

1//! Public entry point for `apr code` / `batuta code`.
2//!
3//! This module provides the library-level API that both the `batuta` binary
4//! and `apr-cli` use to launch the coding assistant. All logic lives here;
5//! CLI wrappers are thin dispatchers.
6//!
7//! PMAT-162: Phase 6 — makes `cmd_code` accessible from the library crate
8//! so `apr-cli` can call `batuta::agent::code::cmd_code()` directly.
9
10use std::path::PathBuf;
11use std::sync::Arc;
12
13use crate::agent::capability::Capability;
14use crate::agent::driver::LlmDriver;
15use crate::agent::manifest::{AgentManifest, ModelConfig, ResourceQuota};
16use crate::agent::tool::file::{FileEditTool, FileReadTool, FileWriteTool};
17use crate::agent::tool::search::{GlobTool, GrepTool};
18use crate::agent::tool::shell::ShellTool;
19use crate::agent::tool::ToolRegistry;
20use crate::serve::backends::PrivacyTier;
21
22/// Entry point for `batuta code` / `apr code`.
23///
24/// This is the public library API — callable from both the batuta binary
25/// and apr-cli (PMAT-162). Handles model discovery, driver selection,
26/// tool registration, and REPL launch.
27#[allow(clippy::too_many_arguments)]
28pub fn cmd_code(
29    model: Option<PathBuf>,
30    project: PathBuf,
31    resume: Option<Option<String>>,
32    prompt: Vec<String>,
33    print: bool,
34    max_turns: u32,
35    manifest_path: Option<PathBuf>,
36    emit_trace: Option<PathBuf>,
37    // PMAT-CODE-OUTPUT-FORMAT-001 / PMAT-CODE-INPUT-FORMAT-001:
38    // accepted as &str ("text" | "json") to keep this crate's public API
39    // independent of apr-cli's ValueEnum types. Unknown values fall back
40    // to "text" — the legacy behavior — under Poka-Yoke.
41    output_format: &str,
42    input_format: &str,
43) -> anyhow::Result<()> {
44    // --project: change working directory for project instructions
45    if project.as_os_str() != "." && project.is_dir() {
46        std::env::set_current_dir(&project)?;
47    }
48
49    // Load manifest or build default. When `--manifest` is set it short-
50    // circuits the settings ladder (the manifest is treated as a complete
51    // agent specification); otherwise we fold in
52    // `~/.config/apr/settings.json` (user-global) and
53    // `<project_root>/.apr/settings.json` (project-local) as Claude-Code
54    // parity defaults (PMAT-CODE-CONFIG-LADDER-001). CLI flags always win.
55    let mut manifest = match manifest_path {
56        Some(ref path) => {
57            let content = std::fs::read_to_string(path)
58                .map_err(|e| anyhow::anyhow!("cannot read manifest {}: {e}", path.display()))?;
59            let m = AgentManifest::from_toml(&content)
60                .map_err(|e| anyhow::anyhow!("invalid manifest: {e}"))?;
61            eprintln!("✓ Loaded manifest: {}", path.display());
62            m
63        }
64        None => {
65            let mut m = build_default_manifest();
66            // PMAT-CODE-CONFIG-LADDER-001: settings.json layered defaults.
67            // Errors are surfaced (Poka-Yoke) — a malformed settings file
68            // is reported rather than silently ignored.
69            let project_root = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
70            let settings = crate::agent::settings::AprSettings::load_layered(&project_root)?;
71            apply_settings_to_manifest(&mut m, &settings)?;
72            m
73        }
74    };
75
76    // --model flag overrides manifest model_path (and therefore overrides
77    // any settings.json `model` field — CLI always wins, per the parity
78    // ladder contract).
79    if let Some(ref model_path) = model {
80        manifest.model.model_path = Some(model_path.clone());
81    }
82
83    // PMAT-150: discover model with Jidoka validation (broken APR → GGUF fallback)
84    discover_and_set_model(&mut manifest);
85
86    // PMAT-198: Scale system prompt based on model size.
87    // Small models (<2B) degrade with the full tool table + project context.
88    if let Some(ref path) = manifest.model.model_path {
89        let params_b = estimate_model_params_from_name(path);
90        if params_b < 2.0 {
91            manifest.model.system_prompt = scale_prompt_for_model(params_b);
92        }
93    }
94
95    // Contract: no_model_error — never silently use MockDriver
96    if manifest.model.resolve_model_path().is_none() && manifest_path.is_none() {
97        print_no_model_error();
98        std::process::exit(exit_code::NO_MODEL);
99    }
100
101    // PMAT-160: Try AprServeDriver first (apr serve has full CUDA/GPU).
102    // Falls back to embedded RealizarDriver if `apr` binary not found.
103    // PMAT-CODE-SPAWN-PARITY-001: driver stored as Arc so TaskTool can
104    // share it with the AgentPool for sub-agent execution.
105    let driver: Arc<dyn LlmDriver> = if let Some(model_path) = manifest.model.resolve_model_path() {
106        match crate::agent::driver::apr_serve::AprServeDriver::launch(
107            model_path,
108            manifest.model.context_window,
109        ) {
110            Ok(d) => Arc::new(d),
111            Err(e) => {
112                eprintln!("⚠ apr serve unavailable ({e}), using embedded inference");
113                Arc::from(build_fallback_driver(&manifest)?)
114            }
115        }
116    } else {
117        Arc::from(build_fallback_driver(&manifest)?)
118    };
119
120    // PMAT-CODE-MCP-JSON-LOADER-001: merge `<project>/.mcp.json` (Claude-Code-
121    // shape) servers into manifest.mcp_servers BEFORE tool registration. The
122    // manifest's TOML-declared servers always win on name collision (operator-
123    // declared > project-default), matching the settings-ladder semantics.
124    // Missing .mcp.json is a non-error; malformed JSON is a hard error.
125    #[cfg(feature = "agents-mcp")]
126    {
127        let project_root = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
128        match crate::agent::mcp_json::load_and_merge(&mut manifest, &project_root) {
129            Ok(0) => {}
130            Ok(n) => {
131                eprintln!("✓ Loaded {n} MCP server(s) from .mcp.json");
132            }
133            Err(e) => {
134                anyhow::bail!("invalid .mcp.json: {e}");
135            }
136        }
137    }
138
139    // Build tool registry with coding tools
140    let mut tools = build_code_tools(&manifest);
141
142    // PMAT-CODE-MCP-CLIENT-001: register MCP client tools from manifest.mcp_servers.
143    // Synchronous wrapper over async discover_mcp_tools — a no-op when mcp_servers is
144    // empty (the default for `apr code` without a manifest).
145    register_mcp_client_tools(&mut tools, &manifest);
146
147    // PMAT-CODE-SPAWN-PARITY-001: register Task tool (Claude-Code Agent parity).
148    // `task` lets the agent delegate to typed subagents (general-purpose,
149    // explore, plan) with bounded recursion depth (Jidoka).
150    crate::agent::task_tool::register_task_tool(
151        &mut tools,
152        &manifest,
153        Arc::clone(&driver),
154        /* max_depth */ 3,
155    );
156
157    // PMAT-CODE-HOOKS-001: build hook registry from manifest and fire SessionStart.
158    // Returned Warn messages are surfaced to the user; a Block here aborts session
159    // startup (matching Claude Code's exit-code-2 semantics).
160    let hooks_reg = crate::agent::hooks::HookRegistry::from_configs(manifest.hooks.clone());
161    let hook_cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
162    match hooks_reg.run(crate::agent::hooks::HookEvent::SessionStart, "", &hook_cwd) {
163        crate::agent::hooks::HookDecision::Allow => {}
164        crate::agent::hooks::HookDecision::Warn(msg) => {
165            if !msg.is_empty() {
166                eprintln!("⚠ SessionStart hook: {msg}");
167            }
168        }
169        crate::agent::hooks::HookDecision::Block(reason) => {
170            anyhow::bail!("SessionStart hook blocked session: {reason}");
171        }
172    }
173
174    // Build memory
175    let memory = crate::agent::memory::InMemorySubstrate::new();
176
177    // Non-interactive mode: single prompt
178    // PMAT-161: Return exit code instead of process::exit() so driver Drop
179    // runs and kills the apr serve subprocess (no zombie processes).
180    if print || !prompt.is_empty() {
181        let prompt_text = if prompt.is_empty() {
182            let mut buf = String::new();
183            std::io::Read::read_to_string(&mut std::io::stdin(), &mut buf)?;
184            // PMAT-CODE-INPUT-FORMAT-001: when --input-format=json, parse
185            // a `{"role":"user","content":"..."}` envelope and use `content`
186            // as the prompt. Empty/missing content is a hard error so the
187            // operator notices the malformed envelope.
188            if input_format.eq_ignore_ascii_case("json") {
189                parse_json_input_envelope(&buf)?
190            } else {
191                buf
192            }
193        } else {
194            prompt.join(" ")
195        };
196        let code = run_single_prompt(
197            &manifest,
198            driver.as_ref(),
199            &tools,
200            &memory,
201            &prompt_text,
202            emit_trace.as_deref(),
203            output_format,
204        );
205        drop(driver); // Kill apr serve subprocess before exit
206        std::process::exit(code);
207    }
208
209    // --resume: load previous session
210    // PMAT-165: auto-resume prompt when recent session exists (spec §6.3)
211    let resume_session_id = match resume {
212        Some(Some(id)) => Some(id), // --resume=<session-id>
213        Some(None) => {
214            // --resume (no ID): find most recent for cwd
215            crate::agent::session::SessionStore::find_recent_for_cwd().map(|m| m.id)
216        }
217        None => {
218            // No --resume flag: check for recent session and prompt
219            crate::agent::session::offer_auto_resume()
220        }
221    };
222
223    // Interactive REPL (local inference is free — budget unlimited)
224    crate::agent::repl::run_repl(
225        &manifest,
226        driver.as_ref(),
227        &tools,
228        &memory,
229        max_turns,
230        f64::MAX,
231        resume_session_id.as_deref(),
232    )
233}
234
235/// PMAT-CODE-CONFIG-LADDER-001: fold loaded `~/.config/apr/settings.json` /
236/// `<project>/.apr/settings.json` defaults into the default manifest **before**
237/// CLI flags apply. Each `Some(_)` field on settings overrides the manifest
238/// default; `None` fields leave the manifest alone. The CLI surface is wired
239/// AFTER this so `--model` / `--max-turns` always win over settings.
240///
241/// PMAT-CODE-CONFIG-LADDER-FIELDS-001 (2026-05-07): also honors
242/// `permissionMode` (validated via [`PermissionMode::parse`]; unknown
243/// strings produce a hard error so a typo doesn't run the agent under the
244/// wrong policy) and `allowedHosts` (mapped to [`AgentManifest::allowed_hosts`];
245/// Sovereign privacy tier still wins as a Poka-Yoke).
246fn apply_settings_to_manifest(
247    manifest: &mut AgentManifest,
248    settings: &crate::agent::settings::AprSettings,
249) -> anyhow::Result<()> {
250    if let Some(ref model) = settings.model {
251        // Heuristic: a slash or starts with `hf://` / `./` / `/` → repo or
252        // path. We keep this loose because the same field accepts both
253        // `qwen3:1.7b-q4k` (apr pull alias) and `/abs/path.gguf`.
254        if std::path::Path::new(model).is_absolute()
255            || model.starts_with("./")
256            || model.starts_with("../")
257            || (!model.contains(':') && !model.starts_with("hf://"))
258        {
259            manifest.model.model_path = Some(std::path::PathBuf::from(model));
260        } else {
261            manifest.model.model_repo = Some(model.clone());
262        }
263    }
264    if let Some(extra) = settings.extra_system_prompt.as_deref() {
265        if !extra.trim().is_empty() {
266            // Append, don't replace — base prompt must keep tool-calling
267            // grammar guidance intact.
268            manifest.model.system_prompt.push_str("\n\n");
269            manifest.model.system_prompt.push_str(extra);
270        }
271    }
272    if let Some(mt) = settings.max_turns {
273        manifest.resources.max_iterations = mt;
274    }
275    if let Some(ref pm) = settings.permission_mode {
276        // Parse once at apply time so the operator sees a clear error with
277        // the bad value rather than a generic serde error. Currently only
278        // the parse + validate is enforced — the runtime per-tool verdict
279        // gate is tracked by PMAT-CODE-PERMISSIONS-RUNTIME-001.
280        if crate::agent::permission::PermissionMode::parse(pm).is_none() {
281            anyhow::bail!(
282                "settings.json permissionMode: unknown mode {pm:?} \
283                 (expected default | plan | acceptEdits | bypassPermissions)"
284            );
285        }
286    }
287    if let Some(ref hosts) = settings.allowed_hosts {
288        // Only apply if the operator hasn't already declared an explicit
289        // list via TOML manifest. Keeps manifest > settings precedence.
290        if manifest.allowed_hosts.is_empty() {
291            manifest.allowed_hosts = hosts.clone();
292        }
293    }
294    Ok(())
295}
296
297/// Build fallback driver (embedded RealizarDriver) when AprServeDriver unavailable.
298fn build_fallback_driver(manifest: &AgentManifest) -> anyhow::Result<Box<dyn LlmDriver>> {
299    #[cfg(feature = "inference")]
300    {
301        if let Some(model_path) = manifest.model.resolve_model_path() {
302            let driver = crate::agent::driver::realizar::RealizarDriver::new(
303                model_path,
304                manifest.model.context_window,
305            )?;
306            return Ok(Box::new(driver));
307        }
308    }
309    let _ = manifest;
310    // No model or no inference feature — return MockDriver
311    Ok(Box::new(crate::agent::driver::mock::MockDriver::single_response(
312        "Hello! I'm running in dry-run mode. \
313         Set model_path in your agent manifest or install the `apr` binary.",
314    )))
315}
316
317/// Auto-discover model if none explicitly set (APR preferred over GGUF).
318fn discover_and_set_model(manifest: &mut AgentManifest) {
319    if manifest.model.model_path.is_some() || manifest.model.model_repo.is_some() {
320        return;
321    }
322    let Some(discovered) = ModelConfig::discover_model() else {
323        return;
324    };
325    eprintln!(
326        "Model: {} (auto-discovered)",
327        discovered.file_name().unwrap_or_default().to_string_lossy()
328    );
329    let ext = discovered.extension().and_then(|e| e.to_str()).unwrap_or("");
330    if ext == "gguf" && check_invalid_apr_in_search_dirs() {
331        eprintln!(
332            "⚠ APR model found but invalid (missing tokenizer). Using GGUF fallback: {}",
333            discovered.display()
334        );
335        eprintln!("  Re-convert with: apr convert <source>.gguf -o <output>.apr\n");
336    }
337    manifest.model.model_path = Some(discovered);
338}
339
340/// Print actionable error when no local model is available.
341fn print_no_model_error() {
342    eprintln!("✗ No local model found. apr code requires a local model.\n");
343    if check_invalid_apr_in_search_dirs() {
344        eprintln!("  ⚠ APR model(s) found but invalid (missing embedded tokenizer).");
345        eprintln!("  Re-convert: apr convert <source>.gguf -o <output>.apr\n");
346    }
347    eprintln!("  Download a model (APR format preferred):");
348    eprintln!("    apr pull qwen3:1.7b-q4k            (default — best tool use at 1.2GB)");
349    eprintln!("    apr pull qwen3:8b-q4k              (recommended for complex tasks)");
350    eprintln!();
351    eprintln!("  Or place a .apr/.gguf file in ~/.apr/models/ (auto-discovered)");
352    eprintln!();
353    eprintln!("  Then run: apr code or apr code --model <path>");
354}
355
356/// Check if any APR files in standard model search dirs are invalid.
357fn check_invalid_apr_in_search_dirs() -> bool {
358    for dir in &ModelConfig::model_search_dirs() {
359        if let Ok(entries) = std::fs::read_dir(dir) {
360            for entry in entries.flatten() {
361                let path = entry.path();
362                if path.extension().is_some_and(|e| e == "apr")
363                    && !crate::agent::driver::validate::is_valid_model_file(&path)
364                {
365                    return true;
366                }
367            }
368        }
369    }
370    false
371}
372
373/// Load project-level instructions from APR.md or CLAUDE.md.
374fn load_project_instructions(max_bytes: usize) -> Option<String> {
375    let cwd = std::env::current_dir().ok()?;
376
377    for filename in &["APR.md", "CLAUDE.md"] {
378        let path = cwd.join(filename);
379        if path.is_file() {
380            if let Ok(content) = std::fs::read_to_string(&path) {
381                if max_bytes == 0 {
382                    return None;
383                }
384                let truncated = if content.len() > max_bytes {
385                    let end = content
386                        .char_indices()
387                        .take_while(|(i, _)| *i < max_bytes)
388                        .last()
389                        .map(|(i, c)| i + c.len_utf8())
390                        .unwrap_or(max_bytes.min(content.len()));
391                    format!("{}...\n(truncated from {} bytes)", &content[..end], content.len())
392                } else {
393                    content
394                };
395                return Some(truncated);
396            }
397        }
398    }
399    None
400}
401
402/// Compute instruction budget based on model context window.
403fn instruction_budget(context_window: usize) -> usize {
404    if context_window < 4096 {
405        return 0;
406    }
407    let budget = context_window / 4;
408    budget.min(4096)
409}
410
411/// PMAT-CODE-ORG-POLICY-RUNTIME-001: assemble the system prompt from
412/// its component blocks in the canonical order (matches PolicyTier
413/// precedence + project-instruction conventions).
414///
415/// Pure function — no I/O, no global state. Each input is `Option`-
416/// wrapped so the caller can pass `None` for a missing block; the
417/// helper is responsible for choosing whether to emit the section
418/// heading at all.
419///
420/// Ordering rationale:
421///
422/// 1. `base` — the always-present `CODE_SYSTEM_PROMPT` (tool table,
423///    grammar, sovereign-by-default reminders).
424/// 2. `## Enforced organization policy` — `PolicyTier::Enforced`,
425///    highest precedence; surfaced FIRST after `base` so downstream
426///    sections cannot override it.
427/// 3. `## Project Context` — git branch, file stats, language.
428/// 4. `## Project Instructions` — CLAUDE.md / APR.md (with @import
429///    expansion + user-level fallback).
430/// 5. `## Auto-memory` — per-project memory directory contents.
431fn assemble_system_prompt(
432    base: &str,
433    project_context: &str,
434    project_instructions: Option<&str>,
435    auto_memory: Option<&str>,
436    org_policy: Option<&crate::agent::org_policy::OrgPolicy>,
437) -> String {
438    let mut out = String::from(base);
439    if let Some(pol) = org_policy {
440        out.push_str(&format!(
441            "\n\n## Enforced organization policy ({source})\n\n{content}",
442            source = pol.source.display(),
443            content = pol.content
444        ));
445    }
446    out.push_str(&format!("\n\n## Project Context\n\n{project_context}"));
447    if let Some(instructions) = project_instructions {
448        out.push_str(&format!("\n## Project Instructions\n\n{instructions}"));
449    }
450    if let Some(mem) = auto_memory {
451        out.push_str(&format!("\n## Auto-memory\n\n{mem}"));
452    }
453    out
454}
455
456/// Gather project context — git info, file stats, language.
457fn gather_project_context() -> String {
458    let mut ctx = String::new();
459    let cwd = std::env::current_dir().unwrap_or_default();
460    ctx.push_str(&format!("Working directory: {}\n", cwd.display()));
461
462    if let Ok(output) =
463        std::process::Command::new("git").args(["rev-parse", "--abbrev-ref", "HEAD"]).output()
464    {
465        if output.status.success() {
466            let branch = String::from_utf8_lossy(&output.stdout).trim().to_string();
467            ctx.push_str(&format!("Git branch: {branch}\n"));
468        }
469    }
470    if let Ok(output) =
471        std::process::Command::new("git").args(["diff", "--stat", "--no-color"]).output()
472    {
473        if output.status.success() {
474            let diff = String::from_utf8_lossy(&output.stdout);
475            let dirty_count = diff.lines().count().saturating_sub(1);
476            if dirty_count > 0 {
477                ctx.push_str(&format!("Dirty files: {dirty_count}\n"));
478            }
479        }
480    }
481
482    let mut rs_count = 0u32;
483    let mut py_count = 0u32;
484    let mut total = 0u32;
485    if let Ok(entries) = std::fs::read_dir("src") {
486        for e in entries.flatten() {
487            total += 1;
488            if let Some(ext) = e.path().extension() {
489                match ext.to_str() {
490                    Some("rs") => rs_count += 1,
491                    Some("py") => py_count += 1,
492                    _ => {}
493                }
494            }
495        }
496    }
497    let lang = if rs_count > py_count {
498        "Rust"
499    } else if py_count > 0 {
500        "Python"
501    } else {
502        "unknown"
503    };
504    ctx.push_str(&format!("Language: {lang} ({total} files in src/)\n"));
505
506    if PathBuf::from("Cargo.toml").exists() {
507        ctx.push_str("Build system: Cargo (Rust)\n");
508    } else if PathBuf::from("pyproject.toml").exists() {
509        ctx.push_str("Build system: pyproject.toml (Python)\n");
510    }
511
512    ctx
513}
514
515/// Build a default `AgentManifest` for coding tasks.
516fn build_default_manifest() -> AgentManifest {
517    let ctx_window = 4096_usize;
518    let budget = instruction_budget(ctx_window);
519    // PMAT-CODE-MEMORY-PARITY-001: Use layered loader (user-global → project)
520    // with `@import` resolution. Falls through to legacy single-file load
521    // when nothing matches at either layer.
522    let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
523    let mut import_warnings = Vec::new();
524    let project_instructions =
525        crate::agent::instructions::load_layered_instructions(&cwd, budget, &mut import_warnings)
526            .or_else(|| load_project_instructions(budget));
527    for w in &import_warnings {
528        eprintln!("⚠ instructions: {w}");
529    }
530    let project_context = gather_project_context();
531
532    // PMAT-CODE-MEMORY-AUTO-001: load `*.md` files from
533    // `~/.config/apr/projects/<slug>/memory/` into the system prompt
534    // under a `## Auto-memory` section. Slug matches Claude Code's
535    // hyphenated-path convention so `~/.claude/projects/` symlinks
536    // continue to work cross-tool.
537    let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
538    let mut auto_warns: Vec<String> = Vec::new();
539    let auto_memory = crate::agent::auto_memory::load_auto_memory(&cwd, &mut auto_warns);
540    for w in &auto_warns {
541        eprintln!("⚠ {w}");
542    }
543
544    // PMAT-CODE-ORG-POLICY-RUNTIME-001: load enforced org policy from
545    // `/etc/apr-code/CLAUDE.md` (native first) or `/etc/claude-code/CLAUDE.md`
546    // (cross-compat). The loader silently skips missing files + I/O errors so
547    // a sandboxed runtime can't ransom REPL boot. PolicyTier::Enforced is the
548    // highest tier — surfaced FIRST in the system prompt so a downstream
549    // project / user / auto-memory section cannot override it. Uses the same
550    // 25%-of-context budget as project_instructions; `max_bytes == 0`
551    // disables the loader entirely (small models).
552    let org_policy = crate::agent::org_policy::load_org_policy(
553        &crate::agent::org_policy::canonical_system_roots(),
554        "CLAUDE.md",
555        budget,
556    );
557
558    let system_prompt = assemble_system_prompt(
559        CODE_SYSTEM_PROMPT,
560        &project_context,
561        project_instructions.as_deref(),
562        auto_memory.as_deref(),
563        org_policy.as_ref(),
564    );
565
566    AgentManifest {
567        name: "apr-code".to_string(),
568        description: "Interactive AI coding assistant".to_string(),
569        privacy: PrivacyTier::Sovereign,
570        model: ModelConfig {
571            system_prompt,
572            max_tokens: 4096,
573            temperature: 0.0,
574            // PMAT-197: Qwen3 supports 32K context. Default 4096 caused
575            // truncate_messages to drop user query (9 tool schemas ~4000 tokens
576            // consumed the entire window). Set to 32K for Qwen3-class models.
577            context_window: Some(32768),
578            ..ModelConfig::default()
579        },
580        resources: ResourceQuota {
581            max_iterations: 50,
582            max_tool_calls: 200,
583            max_cost_usd: 0.0,
584            max_tokens_budget: None,
585        },
586        capabilities: vec![
587            Capability::FileRead { allowed_paths: vec!["*".into()] },
588            Capability::FileWrite { allowed_paths: vec!["*".into()] },
589            Capability::Shell { allowed_commands: vec!["*".into()] },
590            Capability::Memory,
591            Capability::Rag,
592        ],
593        ..AgentManifest::default()
594    }
595}
596
597/// PMAT-CODE-MCP-CLIENT-001 — register external MCP servers declared in
598/// `manifest.mcp_servers[]` as tools in the `apr code` registry. Mirrors
599/// Claude Code's `.mcp.json` → agent-tool-provider wiring. Synchronous
600/// wrapper because `cmd_code` is sync; opens a scoped current-thread
601/// runtime for the discovery handshake. No-op when the feature is off
602/// or the manifest has no servers.
603#[allow(unused_variables)]
604fn register_mcp_client_tools(tools: &mut ToolRegistry, manifest: &AgentManifest) {
605    #[cfg(feature = "agents-mcp")]
606    {
607        if manifest.mcp_servers.is_empty() {
608            return;
609        }
610        let rt = match tokio::runtime::Builder::new_current_thread().enable_all().build() {
611            Ok(rt) => rt,
612            Err(e) => {
613                eprintln!("⚠ failed to create MCP discovery runtime: {e}");
614                return;
615            }
616        };
617        let discovered = rt.block_on(crate::agent::tool::mcp_client::discover_mcp_tools(manifest));
618        let count = discovered.len();
619        for tool in discovered {
620            tools.register(Box::new(tool));
621        }
622        if count > 0 {
623            eprintln!(
624                "✓ Registered {count} MCP tool(s) from {} server(s)",
625                manifest.mcp_servers.len()
626            );
627        }
628    }
629}
630
631/// Register all coding tools.
632fn build_code_tools(manifest: &AgentManifest) -> ToolRegistry {
633    let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
634
635    let mut tools = ToolRegistry::new();
636    tools.register(Box::new(FileReadTool::new(vec!["*".into()])));
637    tools.register(Box::new(FileWriteTool::new(vec!["*".into()])));
638    tools.register(Box::new(FileEditTool::new(vec!["*".into()])));
639    tools.register(Box::new(GlobTool::new(vec!["*".into()])));
640    tools.register(Box::new(GrepTool::new(vec!["*".into()])));
641    tools.register(Box::new(ShellTool::new(vec!["*".into()], cwd)));
642
643    let memory_sub = Arc::new(crate::agent::memory::InMemorySubstrate::new());
644    tools.register(Box::new(crate::agent::tool::memory::MemoryTool::new(
645        memory_sub,
646        manifest.name.clone(),
647    )));
648
649    // PMAT-163: dedicated pmat_query tool
650    tools.register(Box::new(crate::agent::tool::pmat_query::PmatQueryTool::new()));
651
652    #[cfg(feature = "rag")]
653    {
654        let oracle = Arc::new(crate::oracle::rag::RagOracle::new());
655        tools.register(Box::new(crate::agent::tool::rag::RagTool::new(oracle, 5)));
656    }
657
658    // PMAT-CODE-WEB-TOOLS-001: register NetworkTool behind the privacy-tier
659    // gate. Sovereign tier always blocks (Poka-Yoke); Standard/Private
660    // tiers register iff `allowed_hosts` is non-empty (explicit opt-in).
661    register_web_tools(&mut tools, manifest);
662
663    tools
664}
665
666/// Register NetworkTool (+ BrowserTool when the `agents-browser` feature is
667/// on) when the manifest declares a non-Sovereign privacy tier and a
668/// non-empty `allowed_hosts` list.
669fn register_web_tools(tools: &mut ToolRegistry, manifest: &AgentManifest) {
670    use crate::serve::backends::PrivacyTier;
671
672    if matches!(manifest.privacy, PrivacyTier::Sovereign) {
673        return;
674    }
675    if manifest.allowed_hosts.is_empty() {
676        return;
677    }
678
679    tools.register(Box::new(crate::agent::tool::network::NetworkTool::new(
680        manifest.allowed_hosts.clone(),
681    )));
682
683    #[cfg(feature = "agents-browser")]
684    {
685        tools.register(Box::new(crate::agent::tool::browser::BrowserTool::new(manifest.privacy)));
686    }
687}
688
689pub use super::code_prompts::exit_code;
690
691/// Run a single prompt (non-interactive). PMAT-172: cap iterations at 10.
692fn run_single_prompt(
693    manifest: &AgentManifest,
694    driver: &dyn LlmDriver,
695    tools: &ToolRegistry,
696    memory: &dyn crate::agent::memory::MemorySubstrate,
697    prompt: &str,
698    emit_trace: Option<&std::path::Path>,
699    // PMAT-CODE-OUTPUT-FORMAT-001: "text" (default) or "json".
700    output_format: &str,
701) -> i32 {
702    let mut single_manifest = manifest.clone();
703    single_manifest.resources.max_iterations = single_manifest.resources.max_iterations.min(10);
704    // PMAT-197: Use compact system prompt for -p mode.
705    // The full CODE_SYSTEM_PROMPT (9-tool table + project context + CLAUDE.md)
706    // overwhelms Qwen3 1.7B causing </think> loops. For -p mode, use a minimal
707    // prompt that lets the model answer directly. Tools still available if needed.
708    single_manifest.model.system_prompt = COMPACT_SYSTEM_PROMPT.to_string();
709    // Note: context_window is set at driver launch time (build_default_manifest),
710    // not here. See PMAT-197 fix in build_default_manifest.
711
712    let rt = match tokio::runtime::Builder::new_current_thread().enable_all().build() {
713        Ok(rt) => rt,
714        Err(e) => {
715            eprintln!("Error: failed to create tokio runtime: {e}");
716            return exit_code::AGENT_ERROR;
717        }
718    };
719
720    let started = std::time::Instant::now();
721
722    // PMAT-197: Use non-nudge loop for -p mode. The nudge ("Use a tool!") forces
723    // small models to make tool calls even for simple questions like "What is 2+2?"
724    // which causes stuck loops. Let the model decide whether to use tools.
725    let result = rt.block_on(crate::agent::runtime::run_agent_loop(
726        &single_manifest,
727        prompt,
728        driver,
729        tools,
730        memory,
731        None,
732    ));
733
734    match result {
735        Ok(r) => {
736            let elapsed = started.elapsed();
737            if r.text.is_empty() {
738                // PMAT-190: Empty response — model may be emitting only thinking tokens
739                // that get stripped by strip_thinking_blocks(). Common with Qwen3 when
740                // the serve backend doesn't use Qwen3NoThinkTemplate.
741                eprintln!(
742                    "⚠ Empty response ({} iterations, {} tool calls). \
743                     Model may be in thinking mode — rebuild apr from source for Qwen3NoThinkTemplate fix.",
744                    r.iterations, r.tool_calls
745                );
746                if output_format.eq_ignore_ascii_case("json") {
747                    println!("{}", build_json_result_envelope(&r, elapsed, /*is_error*/ true));
748                }
749            } else if output_format.eq_ignore_ascii_case("json") {
750                // PMAT-CODE-OUTPUT-FORMAT-001: structured envelope mirroring
751                // Claude Code's `claude -p --output-format json` shape.
752                println!("{}", build_json_result_envelope(&r, elapsed, /*is_error*/ false));
753            } else {
754                println!("{}", r.text);
755            }
756
757            // PMAT-CODE-EMIT-TRACE-001 (M28): write a ccpa-trace.jsonl
758            // describing this run. Used by `ccpa measure` to score
759            // apr code against canonical Claude Code reference fixtures.
760            if let Some(trace_path) = emit_trace {
761                let model = single_manifest
762                    .model
763                    .resolve_model_path()
764                    .map(|p| p.display().to_string())
765                    .unwrap_or_else(|| "apr-code-unknown".to_owned());
766                if let Err(e) = emit_ccpa_trace(trace_path, prompt, &r, started.elapsed(), &model) {
767                    eprintln!("⚠ failed to write ccpa-trace to {}: {e}", trace_path.display());
768                }
769            }
770
771            exit_code::SUCCESS
772        }
773        Err(e) => {
774            eprintln!("Error: {e}");
775            map_error_to_exit_code(&e)
776        }
777    }
778}
779
780/// Emit a `ccpa-trace.jsonl` (M28) describing a single apr-code run.
781///
782/// Schema mirrors `claude-code-parity-apr-v1.yaml § trace_schema`. For
783/// the M28 minimum-viable scope we emit four records:
784///
785///   1. `session_start`  with a synthetic `session_id` derived from
786///      `started`'s wall-clock ts so re-runs differ; `cwd_sha256`
787///      placeholder is normalized at compare time by the differ.
788///   2. `user_prompt`    turn 0, verbatim text.
789///   3. `assistant_turn` turn 1, single `Block::Text` carrying
790///      `result.text`. Tool dispatch + hook + skill records are
791///      M29+ enrichment follow-ups.
792///   4. `session_end`    real elapsed_ms + token counts from
793///      `result.usage`.
794fn emit_ccpa_trace(
795    path: &std::path::Path,
796    prompt: &str,
797    result: &super::result::AgentLoopResult,
798    elapsed: std::time::Duration,
799    model: &str,
800) -> std::io::Result<()> {
801    use std::time::{SystemTime, UNIX_EPOCH};
802
803    let ts_micros =
804        SystemTime::now().duration_since(UNIX_EPOCH).map(|d| d.as_micros()).unwrap_or(0);
805    // session_id: UUIDv7-shaped hex string of the start ts. Normalized
806    // by the differ at compare time so this only needs to be stable
807    // across teacher and student of the SAME fixture (re-running the
808    // same fixture produces a different session_id, which is fine).
809    let session_id = format!(
810        "{:08x}-{:04x}-7000-{:04x}-{:012x}",
811        (ts_micros >> 64) as u32 & 0xFFFF_FFFF,
812        ((ts_micros >> 48) & 0xFFFF) as u16,
813        ((ts_micros >> 32) & 0xFFFF) as u16,
814        (ts_micros & 0xFFFF_FFFF_FFFF) as u64
815    );
816    // ts in ISO 8601 — not strictly RFC 3339, but the differ
817    // normalizes ts at compare time.
818    let secs = SystemTime::now().duration_since(UNIX_EPOCH).map(|d| d.as_secs()).unwrap_or(0);
819    let ts = format!("@{secs}");
820    let cwd_sha256 = "0".repeat(64);
821
822    let session_start = serde_json::json!({
823        "v": 1,
824        "kind": "session_start",
825        "session_id": session_id,
826        "ts": ts,
827        "actor": "apr-code",
828        "model": model,
829        "cwd_sha256": cwd_sha256,
830    });
831    let user_prompt = serde_json::json!({
832        "v": 1,
833        "kind": "user_prompt",
834        "turn": 0,
835        "text": prompt,
836    });
837    let assistant_turn = serde_json::json!({
838        "v": 1,
839        "kind": "assistant_turn",
840        "turn": 1,
841        "blocks": [{"type": "text", "text": result.text}],
842        "stop_reason": "end_turn",
843    });
844    let session_end = serde_json::json!({
845        "v": 1,
846        "kind": "session_end",
847        "turn": 1,
848        "stop_reason": "end_turn",
849        "elapsed_ms": elapsed.as_millis() as u64,
850        "tokens_in": result.usage.input_tokens,
851        "tokens_out": result.usage.output_tokens,
852    });
853
854    let body = format!("{}\n{}\n{}\n{}\n", session_start, user_prompt, assistant_turn, session_end);
855    std::fs::write(path, body)
856}
857
858/// PMAT-CODE-INPUT-FORMAT-001 (M-NON-INT-002): parse a `{"role":"user","content":"..."}`
859/// JSON envelope from stdin and return the prompt text. Mirrors the shape Claude
860/// Code accepts on `claude -p --input-format json`.
861///
862/// Errors are surfaced (not silently downgraded) so a malformed envelope fails
863/// loudly instead of running the agent on garbage. `role` other than `"user"`
864/// is also rejected — the non-interactive surface is single-user-turn only.
865fn parse_json_input_envelope(buf: &str) -> anyhow::Result<String> {
866    let trimmed = buf.trim();
867    if trimmed.is_empty() {
868        anyhow::bail!("--input-format=json: stdin is empty (expected JSON envelope)");
869    }
870    let v: serde_json::Value = serde_json::from_str(trimmed)
871        .map_err(|e| anyhow::anyhow!("--input-format=json: invalid JSON on stdin: {e}"))?;
872    let role = v.get("role").and_then(|r| r.as_str()).unwrap_or("user");
873    if role != "user" {
874        anyhow::bail!("--input-format=json: only role=\"user\" supported, got \"{role}\"");
875    }
876    let content = v
877        .get("content")
878        .and_then(|c| c.as_str())
879        .ok_or_else(|| anyhow::anyhow!("--input-format=json: missing string field `content`"))?;
880    Ok(content.to_owned())
881}
882
883/// PMAT-CODE-OUTPUT-FORMAT-001 (M-NON-INT-001): build a structured JSON
884/// envelope mirroring Claude Code's `claude -p --output-format json` shape:
885///
886/// ```json
887/// {
888///   "type": "result",
889///   "subtype": "success",
890///   "is_error": false,
891///   "duration_ms": 1234,
892///   "result": "the assistant text",
893///   "session_id": "<uuidv7-shaped>",
894///   "num_turns": 1,
895///   "total_cost_usd": 0
896/// }
897/// ```
898fn build_json_result_envelope(
899    result: &super::result::AgentLoopResult,
900    elapsed: std::time::Duration,
901    is_error: bool,
902) -> String {
903    use std::time::{SystemTime, UNIX_EPOCH};
904    let ts_micros =
905        SystemTime::now().duration_since(UNIX_EPOCH).map(|d| d.as_micros()).unwrap_or(0);
906    // Same UUIDv7-shaped stable-per-run session id used by emit_ccpa_trace.
907    let session_id = format!(
908        "{:08x}-{:04x}-7000-{:04x}-{:012x}",
909        (ts_micros >> 64) as u32 & 0xFFFF_FFFF,
910        ((ts_micros >> 48) & 0xFFFF) as u16,
911        ((ts_micros >> 32) & 0xFFFF) as u16,
912        (ts_micros & 0xFFFF_FFFF_FFFF) as u64
913    );
914    let envelope = serde_json::json!({
915        "type": "result",
916        "subtype": if is_error { "error" } else { "success" },
917        "is_error": is_error,
918        "duration_ms": elapsed.as_millis() as u64,
919        "result": result.text,
920        "session_id": session_id,
921        "num_turns": result.iterations,
922        "tokens_in": result.usage.input_tokens,
923        "tokens_out": result.usage.output_tokens,
924        // Local sovereign inference: cost is always zero by construction.
925        "total_cost_usd": 0,
926    });
927    envelope.to_string()
928}
929
930// Prompts and exit codes extracted to code_prompts.rs
931use super::code_prompts::{
932    estimate_model_params_from_name, map_error_to_exit_code, scale_prompt_for_model,
933    CODE_SYSTEM_PROMPT, COMPACT_SYSTEM_PROMPT,
934};
935
936#[cfg(test)]
937#[path = "code_tests.rs"]
938mod tests;
batuta/agent/code.rs

batuta/agent/
code.rs