batuta/agent/
code.rs

1//! Public entry point for `apr code` / `batuta code`.
2//!
3//! This module provides the library-level API that both the `batuta` binary
4//! and `apr-cli` use to launch the coding assistant. All logic lives here;
5//! CLI wrappers are thin dispatchers.
6//!
7//! PMAT-162: Phase 6 — makes `cmd_code` accessible from the library crate
8//! so `apr-cli` can call `batuta::agent::code::cmd_code()` directly.
9
10use std::path::PathBuf;
11use std::sync::Arc;
12
13use crate::agent::capability::Capability;
14use crate::agent::driver::LlmDriver;
15use crate::agent::manifest::{AgentManifest, ModelConfig, ResourceQuota};
16use crate::agent::tool::file::{FileEditTool, FileReadTool, FileWriteTool};
17use crate::agent::tool::search::{GlobTool, GrepTool};
18use crate::agent::tool::shell::ShellTool;
19use crate::agent::tool::ToolRegistry;
20use crate::serve::backends::PrivacyTier;
21
22/// Entry point for `batuta code` / `apr code`.
23///
24/// This is the public library API — callable from both the batuta binary
25/// and apr-cli (PMAT-162). Handles model discovery, driver selection,
26/// tool registration, and REPL launch.
27pub fn cmd_code(
28    model: Option<PathBuf>,
29    project: PathBuf,
30    resume: Option<Option<String>>,
31    prompt: Vec<String>,
32    print: bool,
33    max_turns: u32,
34    manifest_path: Option<PathBuf>,
35) -> anyhow::Result<()> {
36    // --project: change working directory for project instructions
37    if project.as_os_str() != "." && project.is_dir() {
38        std::env::set_current_dir(&project)?;
39    }
40
41    // Load manifest or build default
42    let mut manifest = match manifest_path {
43        Some(ref path) => {
44            let content = std::fs::read_to_string(path)
45                .map_err(|e| anyhow::anyhow!("cannot read manifest {}: {e}", path.display()))?;
46            let m = AgentManifest::from_toml(&content)
47                .map_err(|e| anyhow::anyhow!("invalid manifest: {e}"))?;
48            eprintln!("✓ Loaded manifest: {}", path.display());
49            m
50        }
51        None => build_default_manifest(),
52    };
53
54    // --model flag overrides manifest model_path
55    if let Some(ref model_path) = model {
56        manifest.model.model_path = Some(model_path.clone());
57    }
58
59    // PMAT-150: discover model with Jidoka validation (broken APR → GGUF fallback)
60    discover_and_set_model(&mut manifest);
61
62    // PMAT-198: Scale system prompt based on model size.
63    // Small models (<2B) degrade with the full tool table + project context.
64    if let Some(ref path) = manifest.model.model_path {
65        let params_b = estimate_model_params_from_name(path);
66        if params_b < 2.0 {
67            manifest.model.system_prompt = scale_prompt_for_model(params_b);
68        }
69    }
70
71    // Contract: no_model_error — never silently use MockDriver
72    if manifest.model.resolve_model_path().is_none() && manifest_path.is_none() {
73        print_no_model_error();
74        std::process::exit(exit_code::NO_MODEL);
75    }
76
77    // PMAT-160: Try AprServeDriver first (apr serve has full CUDA/GPU).
78    // Falls back to embedded RealizarDriver if `apr` binary not found.
79    let driver: Box<dyn LlmDriver> = if let Some(model_path) = manifest.model.resolve_model_path() {
80        match crate::agent::driver::apr_serve::AprServeDriver::launch(
81            model_path,
82            manifest.model.context_window,
83        ) {
84            Ok(d) => Box::new(d),
85            Err(e) => {
86                eprintln!("⚠ apr serve unavailable ({e}), using embedded inference");
87                build_fallback_driver(&manifest)?
88            }
89        }
90    } else {
91        build_fallback_driver(&manifest)?
92    };
93
94    // Build tool registry with coding tools
95    let tools = build_code_tools(&manifest);
96
97    // Build memory
98    let memory = crate::agent::memory::InMemorySubstrate::new();
99
100    // Non-interactive mode: single prompt
101    // PMAT-161: Return exit code instead of process::exit() so driver Drop
102    // runs and kills the apr serve subprocess (no zombie processes).
103    if print || !prompt.is_empty() {
104        let prompt_text = if prompt.is_empty() {
105            let mut buf = String::new();
106            std::io::Read::read_to_string(&mut std::io::stdin(), &mut buf)?;
107            buf
108        } else {
109            prompt.join(" ")
110        };
111        let code = run_single_prompt(&manifest, driver.as_ref(), &tools, &memory, &prompt_text);
112        drop(driver); // Kill apr serve subprocess before exit
113        std::process::exit(code);
114    }
115
116    // --resume: load previous session
117    // PMAT-165: auto-resume prompt when recent session exists (spec §6.3)
118    let resume_session_id = match resume {
119        Some(Some(id)) => Some(id), // --resume=<session-id>
120        Some(None) => {
121            // --resume (no ID): find most recent for cwd
122            crate::agent::session::SessionStore::find_recent_for_cwd().map(|m| m.id)
123        }
124        None => {
125            // No --resume flag: check for recent session and prompt
126            crate::agent::session::offer_auto_resume()
127        }
128    };
129
130    // Interactive REPL (local inference is free — budget unlimited)
131    crate::agent::repl::run_repl(
132        &manifest,
133        driver.as_ref(),
134        &tools,
135        &memory,
136        max_turns,
137        f64::MAX,
138        resume_session_id.as_deref(),
139    )
140}
141
142/// Build fallback driver (embedded RealizarDriver) when AprServeDriver unavailable.
143fn build_fallback_driver(manifest: &AgentManifest) -> anyhow::Result<Box<dyn LlmDriver>> {
144    #[cfg(feature = "inference")]
145    {
146        if let Some(model_path) = manifest.model.resolve_model_path() {
147            let driver = crate::agent::driver::realizar::RealizarDriver::new(
148                model_path,
149                manifest.model.context_window,
150            )?;
151            return Ok(Box::new(driver));
152        }
153    }
154    let _ = manifest;
155    // No model or no inference feature — return MockDriver
156    Ok(Box::new(crate::agent::driver::mock::MockDriver::single_response(
157        "Hello! I'm running in dry-run mode. \
158         Set model_path in your agent manifest or install the `apr` binary.",
159    )))
160}
161
162/// Auto-discover model if none explicitly set (APR preferred over GGUF).
163fn discover_and_set_model(manifest: &mut AgentManifest) {
164    if manifest.model.model_path.is_some() || manifest.model.model_repo.is_some() {
165        return;
166    }
167    let Some(discovered) = ModelConfig::discover_model() else {
168        return;
169    };
170    eprintln!(
171        "Model: {} (auto-discovered)",
172        discovered.file_name().unwrap_or_default().to_string_lossy()
173    );
174    let ext = discovered.extension().and_then(|e| e.to_str()).unwrap_or("");
175    if ext == "gguf" && check_invalid_apr_in_search_dirs() {
176        eprintln!(
177            "⚠ APR model found but invalid (missing tokenizer). Using GGUF fallback: {}",
178            discovered.display()
179        );
180        eprintln!("  Re-convert with: apr convert <source>.gguf -o <output>.apr\n");
181    }
182    manifest.model.model_path = Some(discovered);
183}
184
185/// Print actionable error when no local model is available.
186fn print_no_model_error() {
187    eprintln!("✗ No local model found. apr code requires a local model.\n");
188    if check_invalid_apr_in_search_dirs() {
189        eprintln!("  ⚠ APR model(s) found but invalid (missing embedded tokenizer).");
190        eprintln!("  Re-convert: apr convert <source>.gguf -o <output>.apr\n");
191    }
192    eprintln!("  Download a model (APR format preferred):");
193    eprintln!("    apr pull qwen3:1.7b-q4k            (default — best tool use at 1.2GB)");
194    eprintln!("    apr pull qwen3:8b-q4k              (recommended for complex tasks)");
195    eprintln!();
196    eprintln!("  Or place a .apr/.gguf file in ~/.apr/models/ (auto-discovered)");
197    eprintln!();
198    eprintln!("  Then run: apr code or apr code --model <path>");
199}
200
201/// Check if any APR files in standard model search dirs are invalid.
202fn check_invalid_apr_in_search_dirs() -> bool {
203    for dir in &ModelConfig::model_search_dirs() {
204        if let Ok(entries) = std::fs::read_dir(dir) {
205            for entry in entries.flatten() {
206                let path = entry.path();
207                if path.extension().is_some_and(|e| e == "apr")
208                    && !crate::agent::driver::validate::is_valid_model_file(&path)
209                {
210                    return true;
211                }
212            }
213        }
214    }
215    false
216}
217
218/// Load project-level instructions from APR.md or CLAUDE.md.
219fn load_project_instructions(max_bytes: usize) -> Option<String> {
220    let cwd = std::env::current_dir().ok()?;
221
222    for filename in &["APR.md", "CLAUDE.md"] {
223        let path = cwd.join(filename);
224        if path.is_file() {
225            if let Ok(content) = std::fs::read_to_string(&path) {
226                if max_bytes == 0 {
227                    return None;
228                }
229                let truncated = if content.len() > max_bytes {
230                    let end = content
231                        .char_indices()
232                        .take_while(|(i, _)| *i < max_bytes)
233                        .last()
234                        .map(|(i, c)| i + c.len_utf8())
235                        .unwrap_or(max_bytes.min(content.len()));
236                    format!("{}...\n(truncated from {} bytes)", &content[..end], content.len())
237                } else {
238                    content
239                };
240                return Some(truncated);
241            }
242        }
243    }
244    None
245}
246
247/// Compute instruction budget based on model context window.
248fn instruction_budget(context_window: usize) -> usize {
249    if context_window < 4096 {
250        return 0;
251    }
252    let budget = context_window / 4;
253    budget.min(4096)
254}
255
256/// Gather project context — git info, file stats, language.
257fn gather_project_context() -> String {
258    let mut ctx = String::new();
259    let cwd = std::env::current_dir().unwrap_or_default();
260    ctx.push_str(&format!("Working directory: {}\n", cwd.display()));
261
262    if let Ok(output) =
263        std::process::Command::new("git").args(["rev-parse", "--abbrev-ref", "HEAD"]).output()
264    {
265        if output.status.success() {
266            let branch = String::from_utf8_lossy(&output.stdout).trim().to_string();
267            ctx.push_str(&format!("Git branch: {branch}\n"));
268        }
269    }
270    if let Ok(output) =
271        std::process::Command::new("git").args(["diff", "--stat", "--no-color"]).output()
272    {
273        if output.status.success() {
274            let diff = String::from_utf8_lossy(&output.stdout);
275            let dirty_count = diff.lines().count().saturating_sub(1);
276            if dirty_count > 0 {
277                ctx.push_str(&format!("Dirty files: {dirty_count}\n"));
278            }
279        }
280    }
281
282    let mut rs_count = 0u32;
283    let mut py_count = 0u32;
284    let mut total = 0u32;
285    if let Ok(entries) = std::fs::read_dir("src") {
286        for e in entries.flatten() {
287            total += 1;
288            if let Some(ext) = e.path().extension() {
289                match ext.to_str() {
290                    Some("rs") => rs_count += 1,
291                    Some("py") => py_count += 1,
292                    _ => {}
293                }
294            }
295        }
296    }
297    let lang = if rs_count > py_count {
298        "Rust"
299    } else if py_count > 0 {
300        "Python"
301    } else {
302        "unknown"
303    };
304    ctx.push_str(&format!("Language: {lang} ({total} files in src/)\n"));
305
306    if PathBuf::from("Cargo.toml").exists() {
307        ctx.push_str("Build system: Cargo (Rust)\n");
308    } else if PathBuf::from("pyproject.toml").exists() {
309        ctx.push_str("Build system: pyproject.toml (Python)\n");
310    }
311
312    ctx
313}
314
315/// Build a default `AgentManifest` for coding tasks.
316fn build_default_manifest() -> AgentManifest {
317    let ctx_window = 4096_usize;
318    let budget = instruction_budget(ctx_window);
319    let project_instructions = load_project_instructions(budget);
320    let project_context = gather_project_context();
321
322    let mut system_prompt = CODE_SYSTEM_PROMPT.to_string();
323    system_prompt.push_str(&format!("\n\n## Project Context\n\n{project_context}"));
324    if let Some(ref instructions) = project_instructions {
325        system_prompt.push_str(&format!("\n## Project Instructions\n\n{instructions}"));
326    }
327
328    AgentManifest {
329        name: "apr-code".to_string(),
330        description: "Interactive AI coding assistant".to_string(),
331        privacy: PrivacyTier::Sovereign,
332        model: ModelConfig {
333            system_prompt,
334            max_tokens: 4096,
335            temperature: 0.0,
336            // PMAT-197: Qwen3 supports 32K context. Default 4096 caused
337            // truncate_messages to drop user query (9 tool schemas ~4000 tokens
338            // consumed the entire window). Set to 32K for Qwen3-class models.
339            context_window: Some(32768),
340            ..ModelConfig::default()
341        },
342        resources: ResourceQuota {
343            max_iterations: 50,
344            max_tool_calls: 200,
345            max_cost_usd: 0.0,
346            max_tokens_budget: None,
347        },
348        capabilities: vec![
349            Capability::FileRead { allowed_paths: vec!["*".into()] },
350            Capability::FileWrite { allowed_paths: vec!["*".into()] },
351            Capability::Shell { allowed_commands: vec!["*".into()] },
352            Capability::Memory,
353            Capability::Rag,
354        ],
355        ..AgentManifest::default()
356    }
357}
358
359/// Register all coding tools.
360fn build_code_tools(manifest: &AgentManifest) -> ToolRegistry {
361    let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
362
363    let mut tools = ToolRegistry::new();
364    tools.register(Box::new(FileReadTool::new(vec!["*".into()])));
365    tools.register(Box::new(FileWriteTool::new(vec!["*".into()])));
366    tools.register(Box::new(FileEditTool::new(vec!["*".into()])));
367    tools.register(Box::new(GlobTool::new(vec!["*".into()])));
368    tools.register(Box::new(GrepTool::new(vec!["*".into()])));
369    tools.register(Box::new(ShellTool::new(vec!["*".into()], cwd)));
370
371    let memory_sub = Arc::new(crate::agent::memory::InMemorySubstrate::new());
372    tools.register(Box::new(crate::agent::tool::memory::MemoryTool::new(
373        memory_sub,
374        manifest.name.clone(),
375    )));
376
377    // PMAT-163: dedicated pmat_query tool
378    tools.register(Box::new(crate::agent::tool::pmat_query::PmatQueryTool::new()));
379
380    #[cfg(feature = "rag")]
381    {
382        let oracle = Arc::new(crate::oracle::rag::RagOracle::new());
383        tools.register(Box::new(crate::agent::tool::rag::RagTool::new(oracle, 5)));
384    }
385
386    tools
387}
388
389pub use super::code_prompts::exit_code;
390
391/// Run a single prompt (non-interactive). PMAT-172: cap iterations at 10.
392fn run_single_prompt(
393    manifest: &AgentManifest,
394    driver: &dyn LlmDriver,
395    tools: &ToolRegistry,
396    memory: &dyn crate::agent::memory::MemorySubstrate,
397    prompt: &str,
398) -> i32 {
399    let mut single_manifest = manifest.clone();
400    single_manifest.resources.max_iterations = single_manifest.resources.max_iterations.min(10);
401    // PMAT-197: Use compact system prompt for -p mode.
402    // The full CODE_SYSTEM_PROMPT (9-tool table + project context + CLAUDE.md)
403    // overwhelms Qwen3 1.7B causing </think> loops. For -p mode, use a minimal
404    // prompt that lets the model answer directly. Tools still available if needed.
405    single_manifest.model.system_prompt = COMPACT_SYSTEM_PROMPT.to_string();
406    // Note: context_window is set at driver launch time (build_default_manifest),
407    // not here. See PMAT-197 fix in build_default_manifest.
408
409    let rt = match tokio::runtime::Builder::new_current_thread().enable_all().build() {
410        Ok(rt) => rt,
411        Err(e) => {
412            eprintln!("Error: failed to create tokio runtime: {e}");
413            return exit_code::AGENT_ERROR;
414        }
415    };
416
417    // PMAT-197: Use non-nudge loop for -p mode. The nudge ("Use a tool!") forces
418    // small models to make tool calls even for simple questions like "What is 2+2?"
419    // which causes stuck loops. Let the model decide whether to use tools.
420    let result = rt.block_on(crate::agent::runtime::run_agent_loop(
421        &single_manifest,
422        prompt,
423        driver,
424        tools,
425        memory,
426        None,
427    ));
428
429    match result {
430        Ok(r) => {
431            if r.text.is_empty() {
432                // PMAT-190: Empty response — model may be emitting only thinking tokens
433                // that get stripped by strip_thinking_blocks(). Common with Qwen3 when
434                // the serve backend doesn't use Qwen3NoThinkTemplate.
435                eprintln!(
436                    "⚠ Empty response ({} iterations, {} tool calls). \
437                     Model may be in thinking mode — rebuild apr from source for Qwen3NoThinkTemplate fix.",
438                    r.iterations, r.tool_calls
439                );
440            } else {
441                println!("{}", r.text);
442            }
443            exit_code::SUCCESS
444        }
445        Err(e) => {
446            eprintln!("Error: {e}");
447            map_error_to_exit_code(&e)
448        }
449    }
450}
451
452// Prompts and exit codes extracted to code_prompts.rs
453use super::code_prompts::{
454    estimate_model_params_from_name, map_error_to_exit_code, scale_prompt_for_model,
455    CODE_SYSTEM_PROMPT, COMPACT_SYSTEM_PROMPT,
456};
457
458#[cfg(test)]
459#[path = "code_tests.rs"]
460mod tests;
batuta/agent/code.rs

batuta/agent/
code.rs