aprender-orchestrate 0.36.0

Sovereign AI orchestration: autonomous agents, ML serving, code analysis, and transpilation pipelines
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
//! Public entry point for `apr code` / `batuta code`.
//!
//! This module provides the library-level API that both the `batuta` binary
//! and `apr-cli` use to launch the coding assistant. All logic lives here;
//! CLI wrappers are thin dispatchers.
//!
//! PMAT-162: Phase 6 — makes `cmd_code` accessible from the library crate
//! so `apr-cli` can call `batuta::agent::code::cmd_code()` directly.

use std::path::PathBuf;
use std::sync::Arc;

use crate::agent::capability::Capability;
use crate::agent::driver::LlmDriver;
use crate::agent::manifest::{AgentManifest, ModelConfig, ResourceQuota};
use crate::agent::tool::file::{FileEditTool, FileReadTool, FileWriteTool};
use crate::agent::tool::search::{GlobTool, GrepTool};
use crate::agent::tool::shell::ShellTool;
use crate::agent::tool::ToolRegistry;
use crate::serve::backends::PrivacyTier;

/// Entry point for `batuta code` / `apr code`.
///
/// This is the public library API — callable from both the batuta binary
/// and apr-cli (PMAT-162). Handles model discovery, driver selection,
/// tool registration, and REPL launch.
#[allow(clippy::too_many_arguments)]
pub fn cmd_code(
    model: Option<PathBuf>,
    project: PathBuf,
    resume: Option<Option<String>>,
    prompt: Vec<String>,
    print: bool,
    max_turns: u32,
    manifest_path: Option<PathBuf>,
    emit_trace: Option<PathBuf>,
    // PMAT-CODE-OUTPUT-FORMAT-001 / PMAT-CODE-INPUT-FORMAT-001:
    // accepted as &str ("text" | "json") to keep this crate's public API
    // independent of apr-cli's ValueEnum types. Unknown values fall back
    // to "text" — the legacy behavior — under Poka-Yoke.
    output_format: &str,
    input_format: &str,
) -> anyhow::Result<()> {
    // --project: change working directory for project instructions
    if project.as_os_str() != "." && project.is_dir() {
        std::env::set_current_dir(&project)?;
    }

    // Load manifest or build default. When `--manifest` is set it short-
    // circuits the settings ladder (the manifest is treated as a complete
    // agent specification); otherwise we fold in
    // `~/.config/apr/settings.json` (user-global) and
    // `<project_root>/.apr/settings.json` (project-local) as Claude-Code
    // parity defaults (PMAT-CODE-CONFIG-LADDER-001). CLI flags always win.
    let mut manifest = match manifest_path {
        Some(ref path) => {
            let content = std::fs::read_to_string(path)
                .map_err(|e| anyhow::anyhow!("cannot read manifest {}: {e}", path.display()))?;
            let m = AgentManifest::from_toml(&content)
                .map_err(|e| anyhow::anyhow!("invalid manifest: {e}"))?;
            eprintln!("✓ Loaded manifest: {}", path.display());
            m
        }
        None => {
            let mut m = build_default_manifest();
            // PMAT-CODE-CONFIG-LADDER-001: settings.json layered defaults.
            // Errors are surfaced (Poka-Yoke) — a malformed settings file
            // is reported rather than silently ignored.
            let project_root = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
            let settings = crate::agent::settings::AprSettings::load_layered(&project_root)?;
            apply_settings_to_manifest(&mut m, &settings)?;
            m
        }
    };

    // --model flag overrides manifest model_path (and therefore overrides
    // any settings.json `model` field — CLI always wins, per the parity
    // ladder contract).
    if let Some(ref model_path) = model {
        manifest.model.model_path = Some(model_path.clone());
    }

    // PMAT-150: discover model with Jidoka validation (broken APR → GGUF fallback)
    discover_and_set_model(&mut manifest);

    // PMAT-198: Scale system prompt based on model size.
    // Small models (<2B) degrade with the full tool table + project context.
    if let Some(ref path) = manifest.model.model_path {
        let params_b = estimate_model_params_from_name(path);
        if params_b < 2.0 {
            manifest.model.system_prompt = scale_prompt_for_model(params_b);
        }
    }

    // Contract: no_model_error — never silently use MockDriver
    if manifest.model.resolve_model_path().is_none() && manifest_path.is_none() {
        print_no_model_error();
        std::process::exit(exit_code::NO_MODEL);
    }

    // PMAT-160: Try AprServeDriver first (apr serve has full CUDA/GPU).
    // Falls back to embedded RealizarDriver if `apr` binary not found.
    // PMAT-CODE-SPAWN-PARITY-001: driver stored as Arc so TaskTool can
    // share it with the AgentPool for sub-agent execution.
    let driver: Arc<dyn LlmDriver> = if let Some(model_path) = manifest.model.resolve_model_path() {
        match crate::agent::driver::apr_serve::AprServeDriver::launch(
            model_path,
            manifest.model.context_window,
        ) {
            Ok(d) => Arc::new(d),
            Err(e) => {
                eprintln!("⚠ apr serve unavailable ({e}), using embedded inference");
                Arc::from(build_fallback_driver(&manifest)?)
            }
        }
    } else {
        Arc::from(build_fallback_driver(&manifest)?)
    };

    // PMAT-CODE-MCP-JSON-LOADER-001: merge `<project>/.mcp.json` (Claude-Code-
    // shape) servers into manifest.mcp_servers BEFORE tool registration. The
    // manifest's TOML-declared servers always win on name collision (operator-
    // declared > project-default), matching the settings-ladder semantics.
    // Missing .mcp.json is a non-error; malformed JSON is a hard error.
    #[cfg(feature = "agents-mcp")]
    {
        let project_root = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
        match crate::agent::mcp_json::load_and_merge(&mut manifest, &project_root) {
            Ok(0) => {}
            Ok(n) => {
                eprintln!("✓ Loaded {n} MCP server(s) from .mcp.json");
            }
            Err(e) => {
                anyhow::bail!("invalid .mcp.json: {e}");
            }
        }
    }

    // Build tool registry with coding tools
    let mut tools = build_code_tools(&manifest);

    // PMAT-CODE-MCP-CLIENT-001: register MCP client tools from manifest.mcp_servers.
    // Synchronous wrapper over async discover_mcp_tools — a no-op when mcp_servers is
    // empty (the default for `apr code` without a manifest).
    register_mcp_client_tools(&mut tools, &manifest);

    // PMAT-CODE-SPAWN-PARITY-001: register Task tool (Claude-Code Agent parity).
    // `task` lets the agent delegate to typed subagents (general-purpose,
    // explore, plan) with bounded recursion depth (Jidoka).
    crate::agent::task_tool::register_task_tool(
        &mut tools,
        &manifest,
        Arc::clone(&driver),
        /* max_depth */ 3,
    );

    // PMAT-CODE-HOOKS-001: build hook registry from manifest and fire SessionStart.
    // Returned Warn messages are surfaced to the user; a Block here aborts session
    // startup (matching Claude Code's exit-code-2 semantics).
    let hooks_reg = crate::agent::hooks::HookRegistry::from_configs(manifest.hooks.clone());
    let hook_cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
    match hooks_reg.run(crate::agent::hooks::HookEvent::SessionStart, "", &hook_cwd) {
        crate::agent::hooks::HookDecision::Allow => {}
        crate::agent::hooks::HookDecision::Warn(msg) => {
            if !msg.is_empty() {
                eprintln!("⚠ SessionStart hook: {msg}");
            }
        }
        crate::agent::hooks::HookDecision::Block(reason) => {
            anyhow::bail!("SessionStart hook blocked session: {reason}");
        }
    }

    // Build memory
    let memory = crate::agent::memory::InMemorySubstrate::new();

    // Non-interactive mode: single prompt
    // PMAT-161: Return exit code instead of process::exit() so driver Drop
    // runs and kills the apr serve subprocess (no zombie processes).
    if print || !prompt.is_empty() {
        let prompt_text = if prompt.is_empty() {
            let mut buf = String::new();
            std::io::Read::read_to_string(&mut std::io::stdin(), &mut buf)?;
            // PMAT-CODE-INPUT-FORMAT-001: when --input-format=json, parse
            // a `{"role":"user","content":"..."}` envelope and use `content`
            // as the prompt. Empty/missing content is a hard error so the
            // operator notices the malformed envelope.
            if input_format.eq_ignore_ascii_case("json") {
                parse_json_input_envelope(&buf)?
            } else {
                buf
            }
        } else {
            prompt.join(" ")
        };
        let code = run_single_prompt(
            &manifest,
            driver.as_ref(),
            &tools,
            &memory,
            &prompt_text,
            emit_trace.as_deref(),
            output_format,
        );
        drop(driver); // Kill apr serve subprocess before exit
        std::process::exit(code);
    }

    // --resume: load previous session
    // PMAT-165: auto-resume prompt when recent session exists (spec §6.3)
    let resume_session_id = match resume {
        Some(Some(id)) => Some(id), // --resume=<session-id>
        Some(None) => {
            // --resume (no ID): find most recent for cwd
            crate::agent::session::SessionStore::find_recent_for_cwd().map(|m| m.id)
        }
        None => {
            // No --resume flag: check for recent session and prompt
            crate::agent::session::offer_auto_resume()
        }
    };

    // Interactive REPL (local inference is free — budget unlimited)
    crate::agent::repl::run_repl(
        &manifest,
        driver.as_ref(),
        &tools,
        &memory,
        max_turns,
        f64::MAX,
        resume_session_id.as_deref(),
    )
}

/// PMAT-CODE-CONFIG-LADDER-001: fold loaded `~/.config/apr/settings.json` /
/// `<project>/.apr/settings.json` defaults into the default manifest **before**
/// CLI flags apply. Each `Some(_)` field on settings overrides the manifest
/// default; `None` fields leave the manifest alone. The CLI surface is wired
/// AFTER this so `--model` / `--max-turns` always win over settings.
///
/// PMAT-CODE-CONFIG-LADDER-FIELDS-001 (2026-05-07): also honors
/// `permissionMode` (validated via [`PermissionMode::parse`]; unknown
/// strings produce a hard error so a typo doesn't run the agent under the
/// wrong policy) and `allowedHosts` (mapped to [`AgentManifest::allowed_hosts`];
/// Sovereign privacy tier still wins as a Poka-Yoke).
fn apply_settings_to_manifest(
    manifest: &mut AgentManifest,
    settings: &crate::agent::settings::AprSettings,
) -> anyhow::Result<()> {
    if let Some(ref model) = settings.model {
        // Heuristic: a slash or starts with `hf://` / `./` / `/` → repo or
        // path. We keep this loose because the same field accepts both
        // `qwen3:1.7b-q4k` (apr pull alias) and `/abs/path.gguf`.
        if std::path::Path::new(model).is_absolute()
            || model.starts_with("./")
            || model.starts_with("../")
            || (!model.contains(':') && !model.starts_with("hf://"))
        {
            manifest.model.model_path = Some(std::path::PathBuf::from(model));
        } else {
            manifest.model.model_repo = Some(model.clone());
        }
    }
    if let Some(extra) = settings.extra_system_prompt.as_deref() {
        if !extra.trim().is_empty() {
            // Append, don't replace — base prompt must keep tool-calling
            // grammar guidance intact.
            manifest.model.system_prompt.push_str("\n\n");
            manifest.model.system_prompt.push_str(extra);
        }
    }
    if let Some(mt) = settings.max_turns {
        manifest.resources.max_iterations = mt;
    }
    if let Some(ref pm) = settings.permission_mode {
        // Parse once at apply time so the operator sees a clear error with
        // the bad value rather than a generic serde error. Currently only
        // the parse + validate is enforced — the runtime per-tool verdict
        // gate is tracked by PMAT-CODE-PERMISSIONS-RUNTIME-001.
        if crate::agent::permission::PermissionMode::parse(pm).is_none() {
            anyhow::bail!(
                "settings.json permissionMode: unknown mode {pm:?} \
                 (expected default | plan | acceptEdits | bypassPermissions)"
            );
        }
    }
    if let Some(ref hosts) = settings.allowed_hosts {
        // Only apply if the operator hasn't already declared an explicit
        // list via TOML manifest. Keeps manifest > settings precedence.
        if manifest.allowed_hosts.is_empty() {
            manifest.allowed_hosts = hosts.clone();
        }
    }
    Ok(())
}

/// Build fallback driver (embedded RealizarDriver) when AprServeDriver unavailable.
fn build_fallback_driver(manifest: &AgentManifest) -> anyhow::Result<Box<dyn LlmDriver>> {
    #[cfg(feature = "inference")]
    {
        if let Some(model_path) = manifest.model.resolve_model_path() {
            let driver = crate::agent::driver::realizar::RealizarDriver::new(
                model_path,
                manifest.model.context_window,
            )?;
            return Ok(Box::new(driver));
        }
    }
    let _ = manifest;
    // No model or no inference feature — return MockDriver
    Ok(Box::new(crate::agent::driver::mock::MockDriver::single_response(
        "Hello! I'm running in dry-run mode. \
         Set model_path in your agent manifest or install the `apr` binary.",
    )))
}

/// Auto-discover model if none explicitly set (APR preferred over GGUF).
fn discover_and_set_model(manifest: &mut AgentManifest) {
    if manifest.model.model_path.is_some() || manifest.model.model_repo.is_some() {
        return;
    }
    let Some(discovered) = ModelConfig::discover_model() else {
        return;
    };
    eprintln!(
        "Model: {} (auto-discovered)",
        discovered.file_name().unwrap_or_default().to_string_lossy()
    );
    let ext = discovered.extension().and_then(|e| e.to_str()).unwrap_or("");
    if ext == "gguf" && check_invalid_apr_in_search_dirs() {
        eprintln!(
            "⚠ APR model found but invalid (missing tokenizer). Using GGUF fallback: {}",
            discovered.display()
        );
        eprintln!("  Re-convert with: apr convert <source>.gguf -o <output>.apr\n");
    }
    manifest.model.model_path = Some(discovered);
}

/// Print actionable error when no local model is available.
fn print_no_model_error() {
    eprintln!("✗ No local model found. apr code requires a local model.\n");
    if check_invalid_apr_in_search_dirs() {
        eprintln!("  ⚠ APR model(s) found but invalid (missing embedded tokenizer).");
        eprintln!("  Re-convert: apr convert <source>.gguf -o <output>.apr\n");
    }
    eprintln!("  Download a model (APR format preferred):");
    eprintln!("    apr pull qwen3:1.7b-q4k            (default — best tool use at 1.2GB)");
    eprintln!("    apr pull qwen3:8b-q4k              (recommended for complex tasks)");
    eprintln!();
    eprintln!("  Or place a .apr/.gguf file in ~/.apr/models/ (auto-discovered)");
    eprintln!();
    eprintln!("  Then run: apr code or apr code --model <path>");
}

/// Check if any APR files in standard model search dirs are invalid.
fn check_invalid_apr_in_search_dirs() -> bool {
    for dir in &ModelConfig::model_search_dirs() {
        if let Ok(entries) = std::fs::read_dir(dir) {
            for entry in entries.flatten() {
                let path = entry.path();
                if path.extension().is_some_and(|e| e == "apr")
                    && !crate::agent::driver::validate::is_valid_model_file(&path)
                {
                    return true;
                }
            }
        }
    }
    false
}

/// Load project-level instructions from APR.md or CLAUDE.md.
fn load_project_instructions(max_bytes: usize) -> Option<String> {
    let cwd = std::env::current_dir().ok()?;

    for filename in &["APR.md", "CLAUDE.md"] {
        let path = cwd.join(filename);
        if path.is_file() {
            if let Ok(content) = std::fs::read_to_string(&path) {
                if max_bytes == 0 {
                    return None;
                }
                let truncated = if content.len() > max_bytes {
                    let end = content
                        .char_indices()
                        .take_while(|(i, _)| *i < max_bytes)
                        .last()
                        .map(|(i, c)| i + c.len_utf8())
                        .unwrap_or(max_bytes.min(content.len()));
                    format!("{}...\n(truncated from {} bytes)", &content[..end], content.len())
                } else {
                    content
                };
                return Some(truncated);
            }
        }
    }
    None
}

/// Compute instruction budget based on model context window.
fn instruction_budget(context_window: usize) -> usize {
    if context_window < 4096 {
        return 0;
    }
    let budget = context_window / 4;
    budget.min(4096)
}

/// PMAT-CODE-ORG-POLICY-RUNTIME-001: assemble the system prompt from
/// its component blocks in the canonical order (matches PolicyTier
/// precedence + project-instruction conventions).
///
/// Pure function — no I/O, no global state. Each input is `Option`-
/// wrapped so the caller can pass `None` for a missing block; the
/// helper is responsible for choosing whether to emit the section
/// heading at all.
///
/// Ordering rationale:
///
/// 1. `base` — the always-present `CODE_SYSTEM_PROMPT` (tool table,
///    grammar, sovereign-by-default reminders).
/// 2. `## Enforced organization policy` — `PolicyTier::Enforced`,
///    highest precedence; surfaced FIRST after `base` so downstream
///    sections cannot override it.
/// 3. `## Project Context` — git branch, file stats, language.
/// 4. `## Project Instructions` — CLAUDE.md / APR.md (with @import
///    expansion + user-level fallback).
/// 5. `## Auto-memory` — per-project memory directory contents.
fn assemble_system_prompt(
    base: &str,
    project_context: &str,
    project_instructions: Option<&str>,
    auto_memory: Option<&str>,
    org_policy: Option<&crate::agent::org_policy::OrgPolicy>,
) -> String {
    let mut out = String::from(base);
    if let Some(pol) = org_policy {
        out.push_str(&format!(
            "\n\n## Enforced organization policy ({source})\n\n{content}",
            source = pol.source.display(),
            content = pol.content
        ));
    }
    out.push_str(&format!("\n\n## Project Context\n\n{project_context}"));
    if let Some(instructions) = project_instructions {
        out.push_str(&format!("\n## Project Instructions\n\n{instructions}"));
    }
    if let Some(mem) = auto_memory {
        out.push_str(&format!("\n## Auto-memory\n\n{mem}"));
    }
    out
}

/// Gather project context — git info, file stats, language.
fn gather_project_context() -> String {
    let mut ctx = String::new();
    let cwd = std::env::current_dir().unwrap_or_default();
    ctx.push_str(&format!("Working directory: {}\n", cwd.display()));

    if let Ok(output) =
        std::process::Command::new("git").args(["rev-parse", "--abbrev-ref", "HEAD"]).output()
    {
        if output.status.success() {
            let branch = String::from_utf8_lossy(&output.stdout).trim().to_string();
            ctx.push_str(&format!("Git branch: {branch}\n"));
        }
    }
    if let Ok(output) =
        std::process::Command::new("git").args(["diff", "--stat", "--no-color"]).output()
    {
        if output.status.success() {
            let diff = String::from_utf8_lossy(&output.stdout);
            let dirty_count = diff.lines().count().saturating_sub(1);
            if dirty_count > 0 {
                ctx.push_str(&format!("Dirty files: {dirty_count}\n"));
            }
        }
    }

    let mut rs_count = 0u32;
    let mut py_count = 0u32;
    let mut total = 0u32;
    if let Ok(entries) = std::fs::read_dir("src") {
        for e in entries.flatten() {
            total += 1;
            if let Some(ext) = e.path().extension() {
                match ext.to_str() {
                    Some("rs") => rs_count += 1,
                    Some("py") => py_count += 1,
                    _ => {}
                }
            }
        }
    }
    let lang = if rs_count > py_count {
        "Rust"
    } else if py_count > 0 {
        "Python"
    } else {
        "unknown"
    };
    ctx.push_str(&format!("Language: {lang} ({total} files in src/)\n"));

    if PathBuf::from("Cargo.toml").exists() {
        ctx.push_str("Build system: Cargo (Rust)\n");
    } else if PathBuf::from("pyproject.toml").exists() {
        ctx.push_str("Build system: pyproject.toml (Python)\n");
    }

    ctx
}

/// Build a default `AgentManifest` for coding tasks.
fn build_default_manifest() -> AgentManifest {
    let ctx_window = 4096_usize;
    let budget = instruction_budget(ctx_window);
    // PMAT-CODE-MEMORY-PARITY-001: Use layered loader (user-global → project)
    // with `@import` resolution. Falls through to legacy single-file load
    // when nothing matches at either layer.
    let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
    let mut import_warnings = Vec::new();
    let project_instructions =
        crate::agent::instructions::load_layered_instructions(&cwd, budget, &mut import_warnings)
            .or_else(|| load_project_instructions(budget));
    for w in &import_warnings {
        eprintln!("⚠ instructions: {w}");
    }
    let project_context = gather_project_context();

    // PMAT-CODE-MEMORY-AUTO-001: load `*.md` files from
    // `~/.config/apr/projects/<slug>/memory/` into the system prompt
    // under a `## Auto-memory` section. Slug matches Claude Code's
    // hyphenated-path convention so `~/.claude/projects/` symlinks
    // continue to work cross-tool.
    let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
    let mut auto_warns: Vec<String> = Vec::new();
    let auto_memory = crate::agent::auto_memory::load_auto_memory(&cwd, &mut auto_warns);
    for w in &auto_warns {
        eprintln!("{w}");
    }

    // PMAT-CODE-ORG-POLICY-RUNTIME-001: load enforced org policy from
    // `/etc/apr-code/CLAUDE.md` (native first) or `/etc/claude-code/CLAUDE.md`
    // (cross-compat). The loader silently skips missing files + I/O errors so
    // a sandboxed runtime can't ransom REPL boot. PolicyTier::Enforced is the
    // highest tier — surfaced FIRST in the system prompt so a downstream
    // project / user / auto-memory section cannot override it. Uses the same
    // 25%-of-context budget as project_instructions; `max_bytes == 0`
    // disables the loader entirely (small models).
    let org_policy = crate::agent::org_policy::load_org_policy(
        &crate::agent::org_policy::canonical_system_roots(),
        "CLAUDE.md",
        budget,
    );

    let system_prompt = assemble_system_prompt(
        CODE_SYSTEM_PROMPT,
        &project_context,
        project_instructions.as_deref(),
        auto_memory.as_deref(),
        org_policy.as_ref(),
    );

    AgentManifest {
        name: "apr-code".to_string(),
        description: "Interactive AI coding assistant".to_string(),
        privacy: PrivacyTier::Sovereign,
        model: ModelConfig {
            system_prompt,
            max_tokens: 4096,
            temperature: 0.0,
            // PMAT-197: Qwen3 supports 32K context. Default 4096 caused
            // truncate_messages to drop user query (9 tool schemas ~4000 tokens
            // consumed the entire window). Set to 32K for Qwen3-class models.
            context_window: Some(32768),
            ..ModelConfig::default()
        },
        resources: ResourceQuota {
            max_iterations: 50,
            max_tool_calls: 200,
            max_cost_usd: 0.0,
            max_tokens_budget: None,
        },
        capabilities: vec![
            Capability::FileRead { allowed_paths: vec!["*".into()] },
            Capability::FileWrite { allowed_paths: vec!["*".into()] },
            Capability::Shell { allowed_commands: vec!["*".into()] },
            Capability::Memory,
            Capability::Rag,
        ],
        ..AgentManifest::default()
    }
}

/// PMAT-CODE-MCP-CLIENT-001 — register external MCP servers declared in
/// `manifest.mcp_servers[]` as tools in the `apr code` registry. Mirrors
/// Claude Code's `.mcp.json` → agent-tool-provider wiring. Synchronous
/// wrapper because `cmd_code` is sync; opens a scoped current-thread
/// runtime for the discovery handshake. No-op when the feature is off
/// or the manifest has no servers.
#[allow(unused_variables)]
fn register_mcp_client_tools(tools: &mut ToolRegistry, manifest: &AgentManifest) {
    #[cfg(feature = "agents-mcp")]
    {
        if manifest.mcp_servers.is_empty() {
            return;
        }
        let rt = match tokio::runtime::Builder::new_current_thread().enable_all().build() {
            Ok(rt) => rt,
            Err(e) => {
                eprintln!("⚠ failed to create MCP discovery runtime: {e}");
                return;
            }
        };
        let discovered = rt.block_on(crate::agent::tool::mcp_client::discover_mcp_tools(manifest));
        let count = discovered.len();
        for tool in discovered {
            tools.register(Box::new(tool));
        }
        if count > 0 {
            eprintln!(
                "✓ Registered {count} MCP tool(s) from {} server(s)",
                manifest.mcp_servers.len()
            );
        }
    }
}

/// Register all coding tools.
fn build_code_tools(manifest: &AgentManifest) -> ToolRegistry {
    let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));

    let mut tools = ToolRegistry::new();
    tools.register(Box::new(FileReadTool::new(vec!["*".into()])));
    tools.register(Box::new(FileWriteTool::new(vec!["*".into()])));
    tools.register(Box::new(FileEditTool::new(vec!["*".into()])));
    tools.register(Box::new(GlobTool::new(vec!["*".into()])));
    tools.register(Box::new(GrepTool::new(vec!["*".into()])));
    tools.register(Box::new(ShellTool::new(vec!["*".into()], cwd)));

    let memory_sub = Arc::new(crate::agent::memory::InMemorySubstrate::new());
    tools.register(Box::new(crate::agent::tool::memory::MemoryTool::new(
        memory_sub,
        manifest.name.clone(),
    )));

    // PMAT-163: dedicated pmat_query tool
    tools.register(Box::new(crate::agent::tool::pmat_query::PmatQueryTool::new()));

    #[cfg(feature = "rag")]
    {
        let oracle = Arc::new(crate::oracle::rag::RagOracle::new());
        tools.register(Box::new(crate::agent::tool::rag::RagTool::new(oracle, 5)));
    }

    // PMAT-CODE-WEB-TOOLS-001: register NetworkTool behind the privacy-tier
    // gate. Sovereign tier always blocks (Poka-Yoke); Standard/Private
    // tiers register iff `allowed_hosts` is non-empty (explicit opt-in).
    register_web_tools(&mut tools, manifest);

    tools
}

/// Register NetworkTool (+ BrowserTool when the `agents-browser` feature is
/// on) when the manifest declares a non-Sovereign privacy tier and a
/// non-empty `allowed_hosts` list.
fn register_web_tools(tools: &mut ToolRegistry, manifest: &AgentManifest) {
    use crate::serve::backends::PrivacyTier;

    if matches!(manifest.privacy, PrivacyTier::Sovereign) {
        return;
    }
    if manifest.allowed_hosts.is_empty() {
        return;
    }

    tools.register(Box::new(crate::agent::tool::network::NetworkTool::new(
        manifest.allowed_hosts.clone(),
    )));

    #[cfg(feature = "agents-browser")]
    {
        tools.register(Box::new(crate::agent::tool::browser::BrowserTool::new(manifest.privacy)));
    }
}

pub use super::code_prompts::exit_code;

/// Run a single prompt (non-interactive). PMAT-172: cap iterations at 10.
fn run_single_prompt(
    manifest: &AgentManifest,
    driver: &dyn LlmDriver,
    tools: &ToolRegistry,
    memory: &dyn crate::agent::memory::MemorySubstrate,
    prompt: &str,
    emit_trace: Option<&std::path::Path>,
    // PMAT-CODE-OUTPUT-FORMAT-001: "text" (default) or "json".
    output_format: &str,
) -> i32 {
    let mut single_manifest = manifest.clone();
    single_manifest.resources.max_iterations = single_manifest.resources.max_iterations.min(10);
    // PMAT-197: Use compact system prompt for -p mode.
    // The full CODE_SYSTEM_PROMPT (9-tool table + project context + CLAUDE.md)
    // overwhelms Qwen3 1.7B causing </think> loops. For -p mode, use a minimal
    // prompt that lets the model answer directly. Tools still available if needed.
    single_manifest.model.system_prompt = COMPACT_SYSTEM_PROMPT.to_string();
    // Note: context_window is set at driver launch time (build_default_manifest),
    // not here. See PMAT-197 fix in build_default_manifest.

    let rt = match tokio::runtime::Builder::new_current_thread().enable_all().build() {
        Ok(rt) => rt,
        Err(e) => {
            eprintln!("Error: failed to create tokio runtime: {e}");
            return exit_code::AGENT_ERROR;
        }
    };

    let started = std::time::Instant::now();

    // PMAT-197: Use non-nudge loop for -p mode. The nudge ("Use a tool!") forces
    // small models to make tool calls even for simple questions like "What is 2+2?"
    // which causes stuck loops. Let the model decide whether to use tools.
    let result = rt.block_on(crate::agent::runtime::run_agent_loop(
        &single_manifest,
        prompt,
        driver,
        tools,
        memory,
        None,
    ));

    match result {
        Ok(r) => {
            let elapsed = started.elapsed();
            if r.text.is_empty() {
                // PMAT-190: Empty response — model may be emitting only thinking tokens
                // that get stripped by strip_thinking_blocks(). Common with Qwen3 when
                // the serve backend doesn't use Qwen3NoThinkTemplate.
                eprintln!(
                    "⚠ Empty response ({} iterations, {} tool calls). \
                     Model may be in thinking mode — rebuild apr from source for Qwen3NoThinkTemplate fix.",
                    r.iterations, r.tool_calls
                );
                if output_format.eq_ignore_ascii_case("json") {
                    println!("{}", build_json_result_envelope(&r, elapsed, /*is_error*/ true));
                }
            } else if output_format.eq_ignore_ascii_case("json") {
                // PMAT-CODE-OUTPUT-FORMAT-001: structured envelope mirroring
                // Claude Code's `claude -p --output-format json` shape.
                println!("{}", build_json_result_envelope(&r, elapsed, /*is_error*/ false));
            } else {
                println!("{}", r.text);
            }

            // PMAT-CODE-EMIT-TRACE-001 (M28): write a ccpa-trace.jsonl
            // describing this run. Used by `ccpa measure` to score
            // apr code against canonical Claude Code reference fixtures.
            if let Some(trace_path) = emit_trace {
                let model = single_manifest
                    .model
                    .resolve_model_path()
                    .map(|p| p.display().to_string())
                    .unwrap_or_else(|| "apr-code-unknown".to_owned());
                if let Err(e) = emit_ccpa_trace(trace_path, prompt, &r, started.elapsed(), &model) {
                    eprintln!("⚠ failed to write ccpa-trace to {}: {e}", trace_path.display());
                }
            }

            exit_code::SUCCESS
        }
        Err(e) => {
            eprintln!("Error: {e}");
            map_error_to_exit_code(&e)
        }
    }
}

/// Emit a `ccpa-trace.jsonl` (M28) describing a single apr-code run.
///
/// Schema mirrors `claude-code-parity-apr-v1.yaml § trace_schema`. For
/// the M28 minimum-viable scope we emit four records:
///
///   1. `session_start`  with a synthetic `session_id` derived from
///      `started`'s wall-clock ts so re-runs differ; `cwd_sha256`
///      placeholder is normalized at compare time by the differ.
///   2. `user_prompt`    turn 0, verbatim text.
///   3. `assistant_turn` turn 1, single `Block::Text` carrying
///      `result.text`. Tool dispatch + hook + skill records are
///      M29+ enrichment follow-ups.
///   4. `session_end`    real elapsed_ms + token counts from
///      `result.usage`.
fn emit_ccpa_trace(
    path: &std::path::Path,
    prompt: &str,
    result: &super::result::AgentLoopResult,
    elapsed: std::time::Duration,
    model: &str,
) -> std::io::Result<()> {
    use std::time::{SystemTime, UNIX_EPOCH};

    let ts_micros =
        SystemTime::now().duration_since(UNIX_EPOCH).map(|d| d.as_micros()).unwrap_or(0);
    // session_id: UUIDv7-shaped hex string of the start ts. Normalized
    // by the differ at compare time so this only needs to be stable
    // across teacher and student of the SAME fixture (re-running the
    // same fixture produces a different session_id, which is fine).
    let session_id = format!(
        "{:08x}-{:04x}-7000-{:04x}-{:012x}",
        (ts_micros >> 64) as u32 & 0xFFFF_FFFF,
        ((ts_micros >> 48) & 0xFFFF) as u16,
        ((ts_micros >> 32) & 0xFFFF) as u16,
        (ts_micros & 0xFFFF_FFFF_FFFF) as u64
    );
    // ts in ISO 8601 — not strictly RFC 3339, but the differ
    // normalizes ts at compare time.
    let secs = SystemTime::now().duration_since(UNIX_EPOCH).map(|d| d.as_secs()).unwrap_or(0);
    let ts = format!("@{secs}");
    let cwd_sha256 = "0".repeat(64);

    let session_start = serde_json::json!({
        "v": 1,
        "kind": "session_start",
        "session_id": session_id,
        "ts": ts,
        "actor": "apr-code",
        "model": model,
        "cwd_sha256": cwd_sha256,
    });
    let user_prompt = serde_json::json!({
        "v": 1,
        "kind": "user_prompt",
        "turn": 0,
        "text": prompt,
    });
    let assistant_turn = serde_json::json!({
        "v": 1,
        "kind": "assistant_turn",
        "turn": 1,
        "blocks": [{"type": "text", "text": result.text}],
        "stop_reason": "end_turn",
    });
    let session_end = serde_json::json!({
        "v": 1,
        "kind": "session_end",
        "turn": 1,
        "stop_reason": "end_turn",
        "elapsed_ms": elapsed.as_millis() as u64,
        "tokens_in": result.usage.input_tokens,
        "tokens_out": result.usage.output_tokens,
    });

    let body = format!("{}\n{}\n{}\n{}\n", session_start, user_prompt, assistant_turn, session_end);
    std::fs::write(path, body)
}

/// PMAT-CODE-INPUT-FORMAT-001 (M-NON-INT-002): parse a `{"role":"user","content":"..."}`
/// JSON envelope from stdin and return the prompt text. Mirrors the shape Claude
/// Code accepts on `claude -p --input-format json`.
///
/// Errors are surfaced (not silently downgraded) so a malformed envelope fails
/// loudly instead of running the agent on garbage. `role` other than `"user"`
/// is also rejected — the non-interactive surface is single-user-turn only.
fn parse_json_input_envelope(buf: &str) -> anyhow::Result<String> {
    let trimmed = buf.trim();
    if trimmed.is_empty() {
        anyhow::bail!("--input-format=json: stdin is empty (expected JSON envelope)");
    }
    let v: serde_json::Value = serde_json::from_str(trimmed)
        .map_err(|e| anyhow::anyhow!("--input-format=json: invalid JSON on stdin: {e}"))?;
    let role = v.get("role").and_then(|r| r.as_str()).unwrap_or("user");
    if role != "user" {
        anyhow::bail!("--input-format=json: only role=\"user\" supported, got \"{role}\"");
    }
    let content = v
        .get("content")
        .and_then(|c| c.as_str())
        .ok_or_else(|| anyhow::anyhow!("--input-format=json: missing string field `content`"))?;
    Ok(content.to_owned())
}

/// PMAT-CODE-OUTPUT-FORMAT-001 (M-NON-INT-001): build a structured JSON
/// envelope mirroring Claude Code's `claude -p --output-format json` shape:
///
/// ```json
/// {
///   "type": "result",
///   "subtype": "success",
///   "is_error": false,
///   "duration_ms": 1234,
///   "result": "the assistant text",
///   "session_id": "<uuidv7-shaped>",
///   "num_turns": 1,
///   "total_cost_usd": 0
/// }
/// ```
fn build_json_result_envelope(
    result: &super::result::AgentLoopResult,
    elapsed: std::time::Duration,
    is_error: bool,
) -> String {
    use std::time::{SystemTime, UNIX_EPOCH};
    let ts_micros =
        SystemTime::now().duration_since(UNIX_EPOCH).map(|d| d.as_micros()).unwrap_or(0);
    // Same UUIDv7-shaped stable-per-run session id used by emit_ccpa_trace.
    let session_id = format!(
        "{:08x}-{:04x}-7000-{:04x}-{:012x}",
        (ts_micros >> 64) as u32 & 0xFFFF_FFFF,
        ((ts_micros >> 48) & 0xFFFF) as u16,
        ((ts_micros >> 32) & 0xFFFF) as u16,
        (ts_micros & 0xFFFF_FFFF_FFFF) as u64
    );
    let envelope = serde_json::json!({
        "type": "result",
        "subtype": if is_error { "error" } else { "success" },
        "is_error": is_error,
        "duration_ms": elapsed.as_millis() as u64,
        "result": result.text,
        "session_id": session_id,
        "num_turns": result.iterations,
        "tokens_in": result.usage.input_tokens,
        "tokens_out": result.usage.output_tokens,
        // Local sovereign inference: cost is always zero by construction.
        "total_cost_usd": 0,
    });
    envelope.to_string()
}

// Prompts and exit codes extracted to code_prompts.rs
use super::code_prompts::{
    estimate_model_params_from_name, map_error_to_exit_code, scale_prompt_for_model,
    CODE_SYSTEM_PROMPT, COMPACT_SYSTEM_PROMPT,
};

#[cfg(test)]
#[path = "code_tests.rs"]
mod tests;