Skip to main content

batuta/agent/
manifest.rs

1//! Agent manifest configuration.
2//!
3//! Defines the TOML-based configuration for agent instances.
4//! Includes model path, resource quotas (Muda elimination),
5//! granted capabilities (Poka-Yoke), and privacy tier.
6
7use serde::{Deserialize, Serialize};
8use std::path::{Path, PathBuf};
9
10use super::capability::Capability;
11use crate::serve::backends::PrivacyTier;
12
13/// Agent configuration loaded from TOML.
14#[derive(Debug, Clone, Serialize, Deserialize)]
15#[serde(default)]
16pub struct AgentManifest {
17    /// Human-readable agent name.
18    pub name: String,
19    /// Semantic version.
20    pub version: String,
21    /// Description of what this agent does.
22    pub description: String,
23    /// LLM model configuration.
24    pub model: ModelConfig,
25    /// Resource quotas (Muda elimination).
26    pub resources: ResourceQuota,
27    /// Granted capabilities (Poka-Yoke).
28    pub capabilities: Vec<Capability>,
29    /// Privacy tier. Default: Sovereign (local-only).
30    pub privacy: PrivacyTier,
31    /// External MCP servers to connect to (agents-mcp feature). [F-022]
32    #[cfg(feature = "agents-mcp")]
33    #[serde(default)]
34    pub mcp_servers: Vec<McpServerConfig>,
35    /// Hooks fired on agent lifecycle events (Claude-Code parity). [PMAT-CODE-HOOKS-001]
36    ///
37    /// ```toml
38    /// [[hooks]]
39    /// event = "SessionStart"
40    /// command = "date >> ~/.apr/session.log"
41    ///
42    /// [[hooks]]
43    /// event = "PreToolUse"
44    /// matcher = "shell"
45    /// command = "./scripts/shell-guard.sh"
46    /// ```
47    #[serde(default)]
48    pub hooks: Vec<super::hooks::HookConfig>,
49    /// Hostnames agents may reach via `NetworkTool` / `BrowserTool`.
50    /// Empty → network tools not registered (Sovereign-by-default).
51    /// Ignored when `privacy = Sovereign` (tier always wins — Poka-Yoke).
52    /// [PMAT-CODE-WEB-TOOLS-001]
53    ///
54    /// ```toml
55    /// privacy = "Standard"
56    /// allowed_hosts = ["docs.anthropic.com", "crates.io"]
57    /// ```
58    #[serde(default)]
59    pub allowed_hosts: Vec<String>,
60}
61
62impl Default for AgentManifest {
63    fn default() -> Self {
64        Self {
65            name: "unnamed-agent".into(),
66            version: "0.1.0".into(),
67            description: String::new(),
68            model: ModelConfig::default(),
69            resources: ResourceQuota::default(),
70            capabilities: vec![Capability::Rag, Capability::Memory],
71            privacy: PrivacyTier::Sovereign,
72            #[cfg(feature = "agents-mcp")]
73            mcp_servers: Vec::new(),
74            hooks: Vec::new(),
75            allowed_hosts: Vec::new(),
76        }
77    }
78}
79
80/// LLM model configuration.
81#[derive(Debug, Clone, Serialize, Deserialize)]
82#[serde(default)]
83pub struct ModelConfig {
84    /// Path to local model file (GGUF/APR/SafeTensors).
85    pub model_path: Option<PathBuf>,
86    /// Remote model identifier (Phase 2, for spillover).
87    pub remote_model: Option<String>,
88    /// `HuggingFace` repo ID for auto-pull (Phase 2).
89    /// When set and `model_path` is None, resolves via `apr pull`.
90    pub model_repo: Option<String>,
91    /// Quantization variant for auto-pull (e.g., `q4_k_m`).
92    pub model_quantization: Option<String>,
93    /// Maximum tokens per completion.
94    pub max_tokens: u32,
95    /// Sampling temperature.
96    pub temperature: f32,
97    /// System prompt injected at start of conversation.
98    pub system_prompt: String,
99    /// Context window size override (auto-detected if None).
100    pub context_window: Option<usize>,
101}
102
103impl Default for ModelConfig {
104    fn default() -> Self {
105        Self {
106            model_path: None,
107            remote_model: None,
108            model_repo: None,
109            model_quantization: None,
110            max_tokens: 4096,
111            temperature: 0.3,
112            system_prompt: "You are a helpful assistant.".into(),
113            context_window: None,
114        }
115    }
116}
117
118impl ModelConfig {
119    /// Resolve the effective model path from explicit config only.
120    ///
121    /// Resolution order:
122    /// 1. Explicit `model_path` — return as-is
123    /// 2. `model_repo` — resolve via pacha cache
124    /// 3. Neither — return None
125    ///
126    /// Note: auto-discovery from standard paths is done separately
127    /// in `cmd_code` (via `discover_model()`) to avoid side effects
128    /// in agent manifest validation and tests.
129    pub fn resolve_model_path(&self) -> Option<PathBuf> {
130        if let Some(ref path) = self.model_path {
131            return Some(path.clone());
132        }
133        if let Some(ref repo) = self.model_repo {
134            let quant = self.model_quantization.as_deref().unwrap_or("q4_k_m");
135            let cache_dir = dirs::cache_dir()
136                .unwrap_or_else(|| PathBuf::from("/tmp"))
137                .join("pacha")
138                .join("models");
139            let filename = format!("{}-{}.gguf", repo.replace('/', "--"), quant,);
140            return Some(cache_dir.join(filename));
141        }
142        None
143    }
144
145    /// Resolve model path with auto-discovery fallback.
146    ///
147    /// Same as `resolve_model_path()` but also scans standard paths
148    /// (`~/.apr/models/`, `~/.cache/huggingface/`, `./models/`) for
149    /// APR/GGUF files. Used by `cmd_code` for the interactive REPL.
150    pub fn resolve_model_path_with_discovery(&self) -> Option<PathBuf> {
151        self.resolve_model_path().or_else(Self::discover_model)
152    }
153
154    /// Check if model needs to be downloaded (auto-pull).
155    ///
156    /// Returns `Some(repo)` if `model_repo` is set but the
157    /// resolved cache path does not exist on disk.
158    pub fn needs_pull(&self) -> Option<&str> {
159        if self.model_path.is_some() {
160            return None;
161        }
162        if let Some(ref repo) = self.model_repo {
163            if let Some(path) = self.resolve_model_path() {
164                if !path.exists() {
165                    return Some(repo.as_str());
166                }
167            }
168        }
169        None
170    }
171
172    /// Discover a local model by scanning standard paths.
173    ///
174    /// Search order (per apr-code.md §5.1):
175    /// 1. `~/.apr/models/`
176    /// 2. `~/.cache/huggingface/` (hub models)
177    /// 3. `./models/` (project-local)
178    ///
179    /// Within each directory, prefer `.apr` over `.gguf` (APR is the
180    /// stack's native format — faster loading, row-major layout).
181    /// Files sorted by modification time (newest first).
182    ///
183    /// **PMAT-150 (Jidoka):** APR files are validated at discovery time —
184    /// if an APR file lacks an embedded tokenizer, it is deprioritized
185    /// so GGUF files are tried first. This prevents the user from hitting
186    /// a dead-end error when the only APR model is broken.
187    pub fn discover_model() -> Option<PathBuf> {
188        // (path, mtime, is_apr, is_valid)
189        let mut candidates: Vec<(PathBuf, std::time::SystemTime, bool, bool)> = Vec::new();
190
191        let search_dirs = Self::model_search_dirs();
192        for dir in &search_dirs {
193            if !dir.is_dir() {
194                continue;
195            }
196            if let Ok(entries) = std::fs::read_dir(dir) {
197                for entry in entries.flatten() {
198                    let path = entry.path();
199                    let is_apr = path.extension().is_some_and(|e| e == "apr");
200                    let is_gguf = path.extension().is_some_and(|e| e == "gguf");
201                    if !is_apr && !is_gguf {
202                        continue;
203                    }
204                    let mtime = entry
205                        .metadata()
206                        .ok()
207                        .and_then(|m| m.modified().ok())
208                        .unwrap_or(std::time::UNIX_EPOCH);
209
210                    // PMAT-150: validate APR files at discovery (Jidoka).
211                    // Invalid APR → deprioritize (valid=false) so GGUF wins.
212                    let is_valid = super::driver::validate::is_valid_model_file(&path);
213
214                    candidates.push((path, mtime, is_apr, is_valid));
215                }
216            }
217        }
218
219        if candidates.is_empty() {
220            return None;
221        }
222
223        // Sort: valid → preferred-name → newest mtime → APR format (tiebreaker).
224        //
225        // PMAT-185: mtime before format — the model the user most recently
226        // downloaded is more likely their intended default.
227        //
228        // Default-model preference (added 2026-04-28): when the user has
229        // pulled the recommended `apr code` default
230        // (Qwen3-Coder-30B-A3B-Instruct), prefer it over a newer-but-smaller
231        // model. The 4090's 24 GB capacity rewards the larger model, and the
232        // small fallback hits PMAT-190 thinking-loop bugs that emit gibberish
233        // — bad UX even though mtime says it's "newest". See
234        // `is_preferred_default_model` for the canonical name list.
235        candidates.sort_by(|a, b| {
236            let a_pref = is_preferred_default_model(&a.0);
237            let b_pref = is_preferred_default_model(&b.0);
238            b.3.cmp(&a.3) // valid preferred
239                .then_with(|| b_pref.cmp(&a_pref)) // preferred-name first
240                .then_with(|| b.1.cmp(&a.1)) // newest mtime
241                .then_with(|| b.2.cmp(&a.2)) // APR format (tiebreaker)
242        });
243
244        Some(candidates[0].0.clone())
245    }
246
247    /// Sort model candidates by priority. Extracted for contract testing (PMAT-188).
248    ///
249    /// Sort order: valid > preferred-name > newest mtime > APR format.
250    #[cfg(test)]
251    pub(crate) fn sort_candidates(
252        candidates: &mut [(std::path::PathBuf, std::time::SystemTime, bool, bool)],
253    ) {
254        candidates.sort_by(|a, b| {
255            let a_pref = is_preferred_default_model(&a.0);
256            let b_pref = is_preferred_default_model(&b.0);
257            b.3.cmp(&a.3)
258                .then_with(|| b_pref.cmp(&a_pref))
259                .then_with(|| b.1.cmp(&a.1))
260                .then_with(|| b.2.cmp(&a.2))
261        });
262    }
263
264    /// Standard model search directories.
265    pub fn model_search_dirs() -> Vec<PathBuf> {
266        let mut dirs = Vec::new();
267        if let Some(home) = dirs::home_dir() {
268            dirs.push(home.join(".apr").join("models"));
269            // `apr pull` writes content-addressed files here. Names are
270            // hashes (e.g. `2b88b180a790988f.gguf`) so they won't trip
271            // the preferred-name filter on their own — pair with a
272            // friendly symlink in `~/.apr/models/` for default-discovery
273            // to pick the recommended model.
274            dirs.push(home.join(".cache").join("pacha").join("models"));
275            dirs.push(home.join(".cache").join("huggingface"));
276        }
277        dirs.push(PathBuf::from("./models"));
278        dirs
279    }
280
281    /// Auto-pull model via `apr pull` subprocess.
282    ///
283    /// Invokes `apr pull <repo>` with a configurable timeout.
284    /// The `apr` CLI handles caching internally at
285    /// `~/.cache/pacha/models/`. Returns the resolved cache path
286    /// on success.
287    ///
288    /// Jidoka: stops on subprocess failure rather than continuing
289    /// with a missing model.
290    pub fn auto_pull(&self, timeout_secs: u64) -> Result<PathBuf, AutoPullError> {
291        let repo = self.model_repo.as_deref().ok_or(AutoPullError::NoRepo)?;
292
293        let target_path = self.resolve_model_path().ok_or(AutoPullError::NoRepo)?;
294
295        // Check if `apr` binary is available
296        let apr_path = which_apr()?;
297
298        // Build model reference: repo or repo:quant
299        let model_ref = match self.model_quantization.as_deref() {
300            Some(q) => format!("{repo}:{q}"),
301            None => repo.to_string(),
302        };
303
304        let mut child = std::process::Command::new(&apr_path)
305            .args(["pull", &model_ref])
306            .stdout(std::process::Stdio::inherit())
307            .stderr(std::process::Stdio::piped())
308            .spawn()
309            .map_err(|e| AutoPullError::Subprocess(format!("cannot spawn apr pull: {e}")))?;
310
311        let output = wait_with_timeout(&mut child, timeout_secs)?;
312
313        if !output.status.success() {
314            let stderr = String::from_utf8_lossy(&output.stderr);
315            return Err(AutoPullError::Subprocess(format!(
316                "apr pull exited with {}: {}",
317                output.status,
318                stderr.trim(),
319            )));
320        }
321
322        if !target_path.exists() {
323            return Err(AutoPullError::Subprocess(
324                "apr pull completed but model file not found at expected path".into(),
325            ));
326        }
327
328        Ok(target_path)
329    }
330}
331
332/// Whether a discovered model file matches one of the recommended
333/// `apr code` defaults. Used by [`ModelConfig::discover_model`] to
334/// jump preferred models ahead of newer-but-smaller models in the
335/// discovery sort order.
336///
337/// As of 2026-04-28 the canonical default is
338/// `Qwen3-Coder-30B-A3B-Instruct` at Q4_K_M (~17–19 GB depending on
339/// quant variant) — see the `qwen3-coder` alias in
340/// `aprender-registry/src/aliases.rs`. Match is case-insensitive
341/// substring against the file basename, so a friendly symlink in
342/// `~/.apr/models/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf` pointing
343/// at a content-hashed file in `~/.cache/pacha/models/` is the
344/// idiomatic way to opt in.
345fn is_preferred_default_model(path: &Path) -> bool {
346    const PREFERRED_NAME_TOKENS: &[&str] = &[
347        // Primary: Qwen3-Coder-30B-A3B-Instruct (any quant).
348        "qwen3-coder-30b-a3b",
349        // Secondary fallbacks (still 4090-appropriate; any of these
350        // beats a 1-2 B fallback).
351        "qwen3-coder-next",
352        "qwen2.5-coder-32b",
353        "qwen2.5-coder-14b",
354    ];
355    let Some(name) = path.file_name().and_then(|s| s.to_str()) else {
356        return false;
357    };
358    let name_lc: String = name.to_ascii_lowercase();
359    PREFERRED_NAME_TOKENS.iter().any(|token| name_lc.contains(token))
360}
361
362/// Errors from model auto-pull operations.
363#[derive(Debug)]
364pub enum AutoPullError {
365    /// No `model_repo` configured.
366    NoRepo,
367    /// `apr` binary not found in PATH.
368    NotInstalled,
369    /// Subprocess execution failed.
370    Subprocess(String),
371    /// Filesystem I/O error.
372    Io(String),
373}
374
375impl std::fmt::Display for AutoPullError {
376    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
377        match self {
378            Self::NoRepo => write!(f, "no model_repo configured"),
379            Self::NotInstalled => {
380                write!(f, "apr binary not found in PATH; install with: cargo install apr-cli")
381            }
382            Self::Subprocess(msg) | Self::Io(msg) => write!(f, "{msg}"),
383        }
384    }
385}
386
387impl std::error::Error for AutoPullError {}
388
389/// Locate the `apr` binary in PATH.
390fn which_apr() -> Result<PathBuf, AutoPullError> {
391    // Check common names: `apr`, `apr-cli`
392    for name in &["apr", "apr-cli"] {
393        if let Ok(path) = which::which(name) {
394            return Ok(path);
395        }
396    }
397    Err(AutoPullError::NotInstalled)
398}
399
400/// Wait for a child process with a polling timeout.
401fn wait_with_timeout(
402    child: &mut std::process::Child,
403    timeout_secs: u64,
404) -> Result<std::process::Output, AutoPullError> {
405    let deadline = std::time::Instant::now() + std::time::Duration::from_secs(timeout_secs);
406
407    loop {
408        match child.try_wait() {
409            Ok(Some(status)) => {
410                let stderr = child
411                    .stderr
412                    .take()
413                    .map(|mut s| {
414                        let mut buf = Vec::new();
415                        std::io::Read::read_to_end(&mut s, &mut buf).ok();
416                        buf
417                    })
418                    .unwrap_or_default();
419                return Ok(std::process::Output { status, stdout: Vec::new(), stderr });
420            }
421            Ok(None) => {
422                if std::time::Instant::now() >= deadline {
423                    child.kill().ok();
424                    return Err(AutoPullError::Subprocess(format!(
425                        "apr pull timed out after {timeout_secs}s"
426                    )));
427                }
428                std::thread::sleep(std::time::Duration::from_millis(500));
429            }
430            Err(e) => {
431                return Err(AutoPullError::Subprocess(format!("wait error: {e}")));
432            }
433        }
434    }
435}
436
437/// Resource quotas (Muda elimination).
438#[derive(Debug, Clone, Serialize, Deserialize)]
439#[serde(default)]
440pub struct ResourceQuota {
441    /// Maximum loop iterations per invocation.
442    pub max_iterations: u32,
443    /// Maximum tool calls per invocation.
444    pub max_tool_calls: u32,
445    /// Maximum cost in USD (for hybrid deployments).
446    pub max_cost_usd: f64,
447    /// Maximum cumulative token budget (input+output). None = unlimited.
448    #[serde(default)]
449    pub max_tokens_budget: Option<u64>,
450}
451
452impl Default for ResourceQuota {
453    fn default() -> Self {
454        Self { max_iterations: 20, max_tool_calls: 50, max_cost_usd: 0.0, max_tokens_budget: None }
455    }
456}
457
458/// Configuration for an external MCP server connection. [F-022]
459#[cfg(feature = "agents-mcp")]
460#[derive(Debug, Clone, Default, Serialize, Deserialize)]
461pub struct McpServerConfig {
462    /// MCP server name (used for capability matching).
463    pub name: String,
464    /// Transport type (stdio, SSE, WebSocket).
465    pub transport: McpTransport,
466    /// For stdio: command + args to launch the server process.
467    #[serde(default)]
468    pub command: Vec<String>,
469    /// For SSE/WebSocket: URL to connect to.
470    pub url: Option<String>,
471    /// Tool names granted from this server. `["*"]` grants all.
472    #[serde(default)]
473    pub capabilities: Vec<String>,
474    /// Environment variables to set in the MCP subprocess (stdio only).
475    /// PMAT-CODE-MCP-ENV-001: threaded from `.mcp.json` `env` field.
476    /// Empty map = inherit parent env unchanged. Only stdio transport
477    /// honors this; SSE/WebSocket use HTTP, not subprocess spawn.
478    #[serde(default)]
479    pub env: std::collections::BTreeMap<String, String>,
480}
481
482/// MCP transport mechanism. [F-022]
483#[cfg(feature = "agents-mcp")]
484#[derive(Debug, Clone, Default, Serialize, Deserialize)]
485#[serde(rename_all = "snake_case")]
486pub enum McpTransport {
487    /// Subprocess communication via stdin/stdout.
488    #[default]
489    Stdio,
490    /// Server-Sent Events over HTTP.
491    Sse,
492    /// WebSocket full-duplex.
493    WebSocket,
494}
495
496impl AgentManifest {
497    /// Parse an agent manifest from TOML string.
498    pub fn from_toml(toml_str: &str) -> Result<Self, toml::de::Error> {
499        toml::from_str(toml_str)
500    }
501
502    /// Validate the manifest for consistency.
503    pub fn validate(&self) -> Result<(), Vec<String>> {
504        let mut errors = Vec::new();
505
506        if self.name.is_empty() {
507            errors.push("name must not be empty".into());
508        }
509        if self.resources.max_iterations == 0 {
510            errors.push("max_iterations must be > 0".into());
511        }
512        if self.resources.max_tool_calls == 0 {
513            errors.push("max_tool_calls must be > 0".into());
514        }
515        if self.model.max_tokens == 0 {
516            errors.push("max_tokens must be > 0".into());
517        }
518        if self.model.temperature < 0.0 || self.model.temperature > 2.0 {
519            errors.push("temperature must be in [0.0, 2.0]".into());
520        }
521        if self.privacy == PrivacyTier::Sovereign && self.model.remote_model.is_some() {
522            errors.push("sovereign privacy tier cannot use remote_model".into());
523        }
524        if self.model.model_repo.is_some() && self.model.model_path.is_some() {
525            errors.push("model_repo and model_path are mutually exclusive".into());
526        }
527        #[cfg(feature = "agents-mcp")]
528        self.validate_mcp_servers(&mut errors);
529
530        if errors.is_empty() {
531            Ok(())
532        } else {
533            Err(errors)
534        }
535    }
536
537    /// Validate MCP server configurations (Poka-Yoke).
538    #[cfg(feature = "agents-mcp")]
539    fn validate_mcp_servers(&self, errors: &mut Vec<String>) {
540        for server in &self.mcp_servers {
541            if server.name.is_empty() {
542                errors.push("MCP server name must not be empty".into());
543            }
544            if self.privacy == PrivacyTier::Sovereign
545                && matches!(server.transport, McpTransport::Sse | McpTransport::WebSocket)
546            {
547                errors.push(format!(
548                    "sovereign privacy tier blocks network MCP transport for server '{}'",
549                    server.name,
550                ));
551            }
552            if matches!(server.transport, McpTransport::Stdio) && server.command.is_empty() {
553                errors.push(format!(
554                    "MCP server '{}' uses stdio transport but has no command",
555                    server.name,
556                ));
557            }
558        }
559    }
560}
561
562#[cfg(test)]
563#[path = "manifest_tests.rs"]
564mod tests;
565
566#[cfg(test)]
567#[path = "manifest_tests_validation.rs"]
568mod tests_validation;
569
570#[cfg(test)]
571#[path = "manifest_tests_discovery.rs"]
572mod tests_discovery;