Skip to main content

ati/core/
manifest.rs

1use chrono::{DateTime, Utc};
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4use std::path::Path;
5use thiserror::Error;
6
7/// Separator between provider name and tool name in compound tool identifiers.
8/// Example: `"finnhub:quote"`, `"github:search_repositories"`.
9pub const TOOL_SEP: char = ':';
10pub const TOOL_SEP_STR: &str = ":";
11
12#[derive(Error, Debug)]
13pub enum ManifestError {
14    #[error("Failed to read manifest file {0}: {1}")]
15    Io(String, std::io::Error),
16    #[error("Failed to parse manifest {0}: {1}")]
17    Parse(String, toml::de::Error),
18    #[error("No manifests directory found at {0}")]
19    NoDirectory(String),
20    #[error("Manifest {0} is invalid: {1}")]
21    Invalid(String, String),
22}
23
24#[derive(Debug, Clone, Deserialize)]
25#[serde(rename_all = "snake_case")]
26#[derive(Default)]
27pub enum AuthType {
28    Bearer,
29    Header,
30    Query,
31    Basic,
32    #[default]
33    None,
34    Oauth2,
35    /// API key is embedded in the URL path via `${key_name}` placeholder.
36    /// No auth header is sent — the key is resolved from the keyring and
37    /// interpolated into the URL at connection time by `resolve_env_value`.
38    /// Example: `mcp_url = "https://mcp.serpapi.com/${serpapi_api_key}/mcp"`
39    Url,
40}
41
42#[derive(Debug, Clone, Deserialize)]
43pub struct Provider {
44    pub name: String,
45    pub description: String,
46    /// Base URL for HTTP providers. Optional for MCP providers.
47    #[serde(default)]
48    pub base_url: String,
49    #[serde(default)]
50    pub auth_type: AuthType,
51    #[serde(default)]
52    pub auth_key_name: Option<String>,
53    /// Custom header name for auth_type = "header" (default: "X-Api-Key").
54    /// Examples: "X-Finnhub-Token", "X-API-KEY", "Authorization"
55    #[serde(default)]
56    pub auth_header_name: Option<String>,
57    /// Custom query parameter name for auth_type = "query" (default: "api_key").
58    #[serde(default)]
59    pub auth_query_name: Option<String>,
60    /// Optional prefix for auth header value (e.g. "Token ", "Basic ").
61    /// Used with auth_type = "header". Value becomes: "{prefix}{key}".
62    #[serde(default)]
63    pub auth_value_prefix: Option<String>,
64    /// Additional headers to include on every request for this provider.
65    /// Examples: X-Goog-FieldMask, X-EBAY-C-MARKETPLACE-ID
66    #[serde(default)]
67    pub extra_headers: HashMap<String, String>,
68    /// Token URL for OAuth2 (relative to base_url or absolute)
69    #[serde(default)]
70    pub oauth2_token_url: Option<String>,
71    /// Second key name for OAuth2 client_secret
72    #[serde(default)]
73    pub auth_secret_name: Option<String>,
74    /// Optional override: name of the sandbox env var whose value is sent as
75    /// `Authorization: Bearer <value>` to the proxy when calling tools
76    /// declared by this provider. Defaults to `ATI_SESSION_TOKEN` when
77    /// `None` or when the named env var is unset/empty.
78    ///
79    /// Used for audience separation through the proxy (issue #121): the
80    /// orchestrator mints a per-MCP-audience JWT and stores it in a
81    /// dedicated env var (e.g. `PARCHA_TOOLS_SESSION_TOKEN`), the manifest
82    /// declares which env var to use for that provider, and the proxy is
83    /// configured with `ATI_JWT_ACCEPTED_AUDIENCES` to accept the
84    /// alternative audience.
85    ///
86    /// The same env → `<NAME>_FILE` → default-path resolution that
87    /// `ATI_SESSION_TOKEN` enjoys is applied (see [`core::token`]) so the
88    /// per-provider token gets the same hot-rotation semantics.
89    #[serde(default)]
90    pub auth_session_token_env: Option<String>,
91    /// If true, send OAuth2 credentials via Basic Auth header instead of form body.
92    /// Some providers (e.g. Sovos) require this per RFC 6749 §2.3.1.
93    #[serde(default)]
94    pub oauth2_basic_auth: bool,
95    #[serde(default)]
96    pub internal: bool,
97    #[serde(default = "default_handler")]
98    pub handler: String,
99
100    // --- MCP provider fields (handler = "mcp") ---
101    /// MCP transport type: "stdio" or "http"
102    #[serde(default)]
103    pub mcp_transport: Option<String>,
104    /// Command to launch stdio MCP server (e.g., "npx", "uvx")
105    #[serde(default)]
106    pub mcp_command: Option<String>,
107    /// Arguments for stdio command (e.g., ["-y", "@modelcontextprotocol/server-github"])
108    #[serde(default)]
109    pub mcp_args: Vec<String>,
110    /// URL for HTTP/Streamable HTTP MCP server
111    #[serde(default)]
112    pub mcp_url: Option<String>,
113    /// Environment variables to pass to stdio subprocess
114    #[serde(default)]
115    pub mcp_env: HashMap<String, String>,
116
117    // --- CLI provider fields (handler = "cli") ---
118    /// Command to run for CLI providers (e.g., "gsutil", "gh", "kubectl")
119    #[serde(default)]
120    pub cli_command: Option<String>,
121    /// Default args prepended to every invocation
122    #[serde(default)]
123    pub cli_default_args: Vec<String>,
124    /// Environment variables for CLI. ${key} = string from keyring, @{key} = credential file
125    #[serde(default)]
126    pub cli_env: HashMap<String, String>,
127    /// Default timeout in seconds (default: 120)
128    #[serde(default)]
129    pub cli_timeout_secs: Option<u64>,
130    /// Named flags whose value is an output file path the proxy must capture.
131    /// Example: `["--output", "-o", "--out"]`. When the agent passes one of these
132    /// flags + a value, the proxy substitutes a temp path, runs the CLI, then
133    /// reads the file back and base64s it into the response. The sandbox-side
134    /// CLI writes those bytes to the original path the agent specified.
135    #[serde(default)]
136    pub cli_output_args: Vec<String>,
137    /// Subcommand prefix → 0-based positional argument index that designates
138    /// an output file path. Example: `{"browse screenshot": 0}` matches
139    /// `bb browse screenshot /tmp/x.png` — arg 0 of the remaining positional
140    /// args (after the matched prefix) is the output path.
141    #[serde(default)]
142    pub cli_output_positional: HashMap<String, usize>,
143
144    // --- file_manager provider fields (handler = "file_manager") ---
145    /// Operator-declared allowlist of upload destinations. Each key is a
146    /// short name agents can pass via `--destination <key>`; the value is a
147    /// typed sink (GCS bucket, fal storage). Anything not in this map is
148    /// refused. **An empty map disables uploads entirely.**
149    #[serde(default)]
150    pub upload_destinations: HashMap<String, crate::core::file_manager::UploadDestination>,
151    /// Destination key used when the agent omits `--destination`. Must be
152    /// present in `upload_destinations` (validated at load time).
153    #[serde(default)]
154    pub upload_default_destination: Option<String>,
155
156    // --- OpenAPI provider fields (handler = "openapi") ---
157    /// Path (relative to ~/.ati/specs/) or URL to OpenAPI spec (JSON or YAML)
158    #[serde(default)]
159    pub openapi_spec: Option<String>,
160    /// Only include operations with these tags
161    #[serde(default)]
162    pub openapi_include_tags: Vec<String>,
163    /// Exclude operations with these tags
164    #[serde(default)]
165    pub openapi_exclude_tags: Vec<String>,
166    /// Only include operations with these operationIds
167    #[serde(default)]
168    pub openapi_include_operations: Vec<String>,
169    /// Exclude operations with these operationIds
170    #[serde(default)]
171    pub openapi_exclude_operations: Vec<String>,
172    /// Maximum number of operations to register (for huge APIs)
173    #[serde(default)]
174    pub openapi_max_operations: Option<usize>,
175    /// Per-operationId overrides (hint, tags, description, response_extract, etc.)
176    #[serde(default)]
177    pub openapi_overrides: HashMap<String, OpenApiToolOverride>,
178
179    // --- Auth generator (dynamic credential generation) ---
180    /// Optional auth generator for producing short-lived credentials at call time.
181    /// Runs where secrets live (proxy server in proxy mode, local machine in local mode).
182    #[serde(default)]
183    pub auth_generator: Option<AuthGenerator>,
184
185    // --- Optional metadata fields ---
186    /// Provider category for discovery (e.g., "finance", "search", "social")
187    #[serde(default)]
188    pub category: Option<String>,
189
190    /// Associated skill names that teach agents how to use this provider's tools.
191    /// Resolved from the SkillRegistry (installed skills or GCS registry).
192    #[serde(default)]
193    pub skills: Vec<String>,
194}
195
196fn default_handler() -> String {
197    "http".to_string()
198}
199
200/// Per-operationId overrides for OpenAPI-discovered tools.
201#[derive(Debug, Clone, Deserialize, Default)]
202pub struct OpenApiToolOverride {
203    pub hint: Option<String>,
204    #[serde(default)]
205    pub tags: Vec<String>,
206    #[serde(default)]
207    pub examples: Vec<String>,
208    pub description: Option<String>,
209    pub scope: Option<String>,
210    pub response_extract: Option<String>,
211    pub response_format: Option<String>,
212}
213
214/// Dynamic auth generator configuration — produces short-lived credentials at call time.
215///
216/// Two types:
217/// - `command`: runs an external command, captures stdout as the credential
218/// - `script`: writes an inline script to a temp file and runs it via an interpreter
219///
220/// Variable expansion in `args` and `env` values:
221/// - `${key_name}` → keyring lookup
222/// - `${JWT_SUB}` → agent's JWT `sub` claim
223/// - `${JWT_SCOPE}` → agent's JWT `scope` claim
224/// - `${TOOL_NAME}` → tool being invoked
225/// - `${TIMESTAMP}` → current unix timestamp
226#[derive(Debug, Clone, Deserialize)]
227pub struct AuthGenerator {
228    #[serde(rename = "type")]
229    pub gen_type: AuthGenType,
230    /// Command to run (for `type = "command"`)
231    pub command: Option<String>,
232    /// Arguments for the command
233    #[serde(default)]
234    pub args: Vec<String>,
235    /// Interpreter for inline script (for `type = "script"`, e.g. "python3")
236    pub interpreter: Option<String>,
237    /// Inline script body (for `type = "script"`)
238    pub script: Option<String>,
239    /// TTL for cached credentials (0 = no cache)
240    #[serde(default)]
241    pub cache_ttl_secs: u64,
242    /// Output format: "text" (trimmed stdout) or "json" (parsed, fields extracted via `inject`)
243    #[serde(default)]
244    pub output_format: AuthOutputFormat,
245    /// Environment variables for the subprocess (values support `${key}` expansion)
246    #[serde(default)]
247    pub env: HashMap<String, String>,
248    /// For JSON output: map dot-notation JSON paths to injection targets
249    #[serde(default)]
250    pub inject: HashMap<String, InjectTarget>,
251    /// Subprocess timeout in seconds (default: 30)
252    #[serde(default = "default_gen_timeout")]
253    pub timeout_secs: u64,
254}
255
256fn default_gen_timeout() -> u64 {
257    30
258}
259
260#[derive(Debug, Clone, Deserialize)]
261#[serde(rename_all = "snake_case")]
262pub enum AuthGenType {
263    Command,
264    Script,
265}
266
267#[derive(Debug, Clone, Deserialize, Default)]
268#[serde(rename_all = "snake_case")]
269pub enum AuthOutputFormat {
270    #[default]
271    Text,
272    Json,
273}
274
275/// Target for injecting a JSON-extracted credential value.
276#[derive(Debug, Clone, Deserialize)]
277pub struct InjectTarget {
278    /// Where to inject: "header", "env", or "query"
279    #[serde(rename = "type")]
280    pub inject_type: String,
281    /// Name of the header/env var/query param
282    pub name: String,
283}
284
285#[derive(Debug, Clone, Deserialize)]
286#[serde(rename_all = "UPPERCASE")]
287#[derive(Default)]
288pub enum HttpMethod {
289    #[serde(alias = "get", alias = "Get")]
290    #[default]
291    Get,
292    #[serde(alias = "post", alias = "Post")]
293    Post,
294    #[serde(alias = "put", alias = "Put")]
295    Put,
296    #[serde(alias = "delete", alias = "Delete")]
297    Delete,
298}
299
300impl std::fmt::Display for HttpMethod {
301    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
302        match self {
303            HttpMethod::Get => write!(f, "GET"),
304            HttpMethod::Post => write!(f, "POST"),
305            HttpMethod::Put => write!(f, "PUT"),
306            HttpMethod::Delete => write!(f, "DELETE"),
307        }
308    }
309}
310
311#[derive(Debug, Clone, Deserialize, Default)]
312#[serde(rename_all = "snake_case")]
313pub enum ResponseFormat {
314    MarkdownTable,
315    Json,
316    #[default]
317    Text,
318    Raw,
319}
320
321#[derive(Debug, Clone, Deserialize, Default)]
322pub struct ResponseConfig {
323    /// JSONPath expression to extract useful content from the API response
324    #[serde(default)]
325    pub extract: Option<String>,
326    /// Output format for the extracted data
327    #[serde(default)]
328    pub format: ResponseFormat,
329}
330
331#[derive(Debug, Clone, Deserialize)]
332pub struct Tool {
333    pub name: String,
334    pub description: String,
335    #[serde(default)]
336    pub endpoint: String,
337    #[serde(default)]
338    pub method: HttpMethod,
339    /// Scope required to use this tool (e.g. "tool:web_search")
340    #[serde(default)]
341    pub scope: Option<String>,
342    /// JSON Schema for tool input
343    #[serde(default)]
344    pub input_schema: Option<serde_json::Value>,
345    /// Response extraction config
346    #[serde(default)]
347    pub response: Option<ResponseConfig>,
348
349    // --- Optional metadata fields ---
350    /// Tags for discovery (e.g., ["search", "real-time"])
351    #[serde(default)]
352    pub tags: Vec<String>,
353    /// Short hint for the LLM on when to use this tool
354    #[serde(default)]
355    pub hint: Option<String>,
356    /// Example invocations
357    #[serde(default)]
358    pub examples: Vec<String>,
359}
360
361/// A parsed manifest file: one provider + multiple tools.
362/// For MCP providers, tools may be empty — they're discovered dynamically via tools/list.
363#[derive(Debug, Clone, Deserialize)]
364pub struct Manifest {
365    pub provider: Provider,
366    #[serde(default, rename = "tools")]
367    pub tools: Vec<Tool>,
368}
369
370/// A cached (ephemeral) provider, persisted as JSON in `$ATI_DIR/cache/providers/<name>.json`.
371/// Used by `ati provider load` to make providers available across process invocations
372/// without writing permanent TOML manifests.
373#[derive(Debug, Clone, Serialize, Deserialize)]
374pub struct CachedProvider {
375    pub name: String,
376    /// "openapi" or "mcp"
377    pub provider_type: String,
378    #[serde(default)]
379    pub base_url: String,
380    #[serde(default)]
381    pub auth_type: String,
382    #[serde(default)]
383    pub auth_key_name: Option<String>,
384    #[serde(default)]
385    pub auth_header_name: Option<String>,
386    #[serde(default)]
387    pub auth_query_name: Option<String>,
388    // OpenAPI fields
389    #[serde(default)]
390    pub spec_content: Option<String>,
391    // MCP fields
392    #[serde(default)]
393    pub mcp_transport: Option<String>,
394    #[serde(default)]
395    pub mcp_url: Option<String>,
396    #[serde(default)]
397    pub mcp_command: Option<String>,
398    #[serde(default)]
399    pub mcp_args: Vec<String>,
400    #[serde(default)]
401    pub mcp_env: HashMap<String, String>,
402    // CLI fields
403    #[serde(default)]
404    pub cli_command: Option<String>,
405    #[serde(default)]
406    pub cli_default_args: Vec<String>,
407    #[serde(default)]
408    pub cli_env: HashMap<String, String>,
409    #[serde(default)]
410    pub cli_timeout_secs: Option<u64>,
411    // MCP/HTTP auth
412    #[serde(default)]
413    pub auth: Option<String>,
414    // Skills
415    #[serde(default)]
416    pub skills: Vec<String>,
417    // Cache metadata
418    pub created_at: String,
419    pub ttl_seconds: u64,
420}
421
422impl CachedProvider {
423    /// Returns true if this cached provider has expired.
424    pub fn is_expired(&self) -> bool {
425        let created = match DateTime::parse_from_rfc3339(&self.created_at) {
426            Ok(dt) => dt.with_timezone(&Utc),
427            Err(_) => return true, // Can't parse → treat as expired
428        };
429        let now = Utc::now();
430        let elapsed = now.signed_duration_since(created);
431        elapsed.num_seconds() as u64 > self.ttl_seconds
432    }
433
434    /// Returns the expiry time as an ISO timestamp.
435    pub fn expires_at(&self) -> Option<String> {
436        let created = DateTime::parse_from_rfc3339(&self.created_at).ok()?;
437        let expires = created + chrono::Duration::seconds(self.ttl_seconds as i64);
438        Some(expires.to_rfc3339())
439    }
440
441    /// Returns remaining TTL in seconds (0 if expired).
442    pub fn remaining_seconds(&self) -> u64 {
443        let created = match DateTime::parse_from_rfc3339(&self.created_at) {
444            Ok(dt) => dt.with_timezone(&Utc),
445            Err(_) => return 0,
446        };
447        let now = Utc::now();
448        let elapsed = now.signed_duration_since(created).num_seconds() as u64;
449        self.ttl_seconds.saturating_sub(elapsed)
450    }
451
452    /// Build a Provider struct from this cached entry.
453    pub fn to_provider(&self) -> Provider {
454        let auth_type = match self.auth_type.as_str() {
455            "bearer" => AuthType::Bearer,
456            "header" => AuthType::Header,
457            "query" => AuthType::Query,
458            "basic" => AuthType::Basic,
459            "oauth2" => AuthType::Oauth2,
460            _ => AuthType::None,
461        };
462
463        let handler = match self.provider_type.as_str() {
464            "mcp" => "mcp".to_string(),
465            "openapi" => "openapi".to_string(),
466            _ => "http".to_string(),
467        };
468
469        Provider {
470            name: self.name.clone(),
471            description: format!("{} (cached)", self.name),
472            base_url: self.base_url.clone(),
473            auth_type,
474            auth_key_name: self.auth_key_name.clone(),
475            auth_header_name: self.auth_header_name.clone(),
476            auth_query_name: self.auth_query_name.clone(),
477            auth_value_prefix: None,
478            extra_headers: HashMap::new(),
479            oauth2_token_url: None,
480            auth_secret_name: None,
481            auth_session_token_env: None,
482            oauth2_basic_auth: false,
483            internal: false,
484            handler,
485            mcp_transport: self.mcp_transport.clone(),
486            mcp_command: self.mcp_command.clone(),
487            mcp_args: self.mcp_args.clone(),
488            mcp_url: self.mcp_url.clone(),
489            mcp_env: self.mcp_env.clone(),
490            openapi_spec: None,
491            openapi_include_tags: Vec::new(),
492            openapi_exclude_tags: Vec::new(),
493            openapi_include_operations: Vec::new(),
494            openapi_exclude_operations: Vec::new(),
495            openapi_max_operations: None,
496            openapi_overrides: HashMap::new(),
497            cli_command: self.cli_command.clone(),
498            cli_default_args: self.cli_default_args.clone(),
499            cli_env: self.cli_env.clone(),
500            cli_timeout_secs: self.cli_timeout_secs,
501            cli_output_args: Vec::new(),
502            cli_output_positional: HashMap::new(),
503            upload_destinations: HashMap::new(),
504            upload_default_destination: None,
505            auth_generator: None,
506            category: None,
507            skills: self.skills.clone(),
508        }
509    }
510}
511
512/// A tool discovered from an MCP server via tools/list.
513/// Converted into a Tool for the registry.
514#[derive(Debug, Clone, Serialize, Deserialize)]
515pub struct McpToolDef {
516    pub name: String,
517    #[serde(default)]
518    pub description: Option<String>,
519    #[serde(default, rename = "inputSchema")]
520    pub input_schema: Option<serde_json::Value>,
521}
522
523/// Registry holding all loaded manifests, with indexes for fast lookup.
524pub struct ManifestRegistry {
525    manifests: Vec<Manifest>,
526    /// tool_name -> (manifest_index, tool_index)
527    tool_index: HashMap<String, (usize, usize)>,
528}
529
530impl ManifestRegistry {
531    /// Load all .toml manifests from a directory.
532    /// OpenAPI providers (handler = "openapi") have their specs loaded and tools auto-registered.
533    pub fn load(dir: &Path) -> Result<Self, ManifestError> {
534        if !dir.is_dir() {
535            return Err(ManifestError::NoDirectory(dir.display().to_string()));
536        }
537
538        let mut manifests = Vec::new();
539        let mut tool_index = HashMap::new();
540
541        let pattern = dir.join("*.toml");
542        let entries = glob::glob(pattern.to_str().unwrap_or(""))
543            .map_err(|e| ManifestError::NoDirectory(e.to_string()))?;
544
545        // Resolve specs dir: sibling of manifests dir (e.g., ~/.ati/specs/)
546        let specs_dir = dir.parent().map(|p| p.join("specs"));
547
548        for entry in entries {
549            let path = entry.map_err(|e| {
550                ManifestError::Io(format!("{e}"), std::io::Error::other("glob error"))
551            })?;
552            let contents = std::fs::read_to_string(&path)
553                .map_err(|e| ManifestError::Io(path.display().to_string(), e))?;
554            let mut manifest: Manifest = toml::from_str(&contents)
555                .map_err(|e| ManifestError::Parse(path.display().to_string(), e))?;
556
557            // For OpenAPI providers, load spec and register tools
558            if manifest.provider.is_openapi() {
559                if let Some(spec_ref) = &manifest.provider.openapi_spec {
560                    match crate::core::openapi::load_and_register(
561                        &manifest.provider,
562                        spec_ref,
563                        specs_dir.as_deref(),
564                    ) {
565                        Ok(tools) => {
566                            manifest.tools = tools;
567                        }
568                        Err(e) => {
569                            tracing::warn!(
570                                provider = %manifest.provider.name,
571                                error = %e,
572                                "failed to load OpenAPI spec for provider"
573                            );
574                            // Graceful degradation — continue without tools
575                        }
576                    }
577                }
578            }
579
580            // For file_manager providers, validate that any declared default
581            // destination is actually present in the allowlist. Refuse to load
582            // an inconsistent manifest rather than silently coercing it.
583            if manifest.provider.handler == "file_manager" {
584                if let Some(ref default) = manifest.provider.upload_default_destination {
585                    if !manifest.provider.upload_destinations.contains_key(default) {
586                        return Err(ManifestError::Invalid(
587                            path.display().to_string(),
588                            format!(
589                                "upload_default_destination '{default}' is not present in [provider.upload_destinations]"
590                            ),
591                        ));
592                    }
593                }
594            }
595
596            // For CLI providers with no [[tools]], auto-register one implicit tool
597            if manifest.provider.is_cli() && manifest.tools.is_empty() {
598                let tool_name = manifest.provider.name.clone();
599                manifest.tools.push(Tool {
600                    name: tool_name.clone(),
601                    description: manifest.provider.description.clone(),
602                    endpoint: String::new(),
603                    method: HttpMethod::Get,
604                    scope: Some(format!("tool:{tool_name}")),
605                    input_schema: None,
606                    response: None,
607                    tags: Vec::new(),
608                    hint: None,
609                    examples: Vec::new(),
610                });
611            }
612
613            // Auto-assign scope to tools that don't have one explicitly set.
614            // This ensures all tools participate in JWT scope filtering.
615            let provider_name = &manifest.provider.name;
616            for tool in &mut manifest.tools {
617                if tool.scope.is_none() && !manifest.provider.internal {
618                    tool.scope = Some(format!("tool:{}", tool.name));
619                    tracing::trace!(
620                        tool = %tool.name,
621                        provider = %provider_name,
622                        scope = ?tool.scope,
623                        "auto-assigned scope to tool"
624                    );
625                }
626            }
627
628            let mi = manifests.len();
629            for (ti, tool) in manifest.tools.iter().enumerate() {
630                tool_index.insert(tool.name.clone(), (mi, ti));
631            }
632            manifests.push(manifest);
633        }
634
635        // Load cached providers from cache/providers/*.json
636        // Cache dir is sibling of manifests dir: e.g., ~/.ati/cache/providers/
637        if let Some(parent) = dir.parent() {
638            let cache_dir = parent.join("cache").join("providers");
639            if cache_dir.is_dir() {
640                let cache_pattern = cache_dir.join("*.json");
641                if let Ok(cache_entries) = glob::glob(cache_pattern.to_str().unwrap_or("")) {
642                    for entry in cache_entries {
643                        let path = match entry {
644                            Ok(p) => p,
645                            Err(_) => continue,
646                        };
647                        let content = match std::fs::read_to_string(&path) {
648                            Ok(c) => c,
649                            Err(_) => continue,
650                        };
651                        let cached: CachedProvider = match serde_json::from_str(&content) {
652                            Ok(c) => c,
653                            Err(_) => continue,
654                        };
655
656                        // Skip and delete expired entries
657                        if cached.is_expired() {
658                            let _ = std::fs::remove_file(&path);
659                            continue;
660                        }
661
662                        // Skip if a permanent manifest with same provider name already exists
663                        if manifests.iter().any(|m| m.provider.name == cached.name) {
664                            continue;
665                        }
666
667                        let provider = cached.to_provider();
668
669                        let mut cached_tools = Vec::new();
670                        if cached.provider_type == "openapi" {
671                            if let Some(spec_content) = &cached.spec_content {
672                                if let Ok(spec) = crate::core::openapi::parse_spec(spec_content) {
673                                    let filters = crate::core::openapi::OpenApiFilters {
674                                        include_tags: vec![],
675                                        exclude_tags: vec![],
676                                        include_operations: vec![],
677                                        exclude_operations: vec![],
678                                        max_operations: None,
679                                    };
680                                    let defs = crate::core::openapi::extract_tools(&spec, &filters);
681                                    cached_tools = defs
682                                        .into_iter()
683                                        .map(|def| {
684                                            crate::core::openapi::to_ati_tool(
685                                                def,
686                                                &cached.name,
687                                                &HashMap::new(),
688                                            )
689                                        })
690                                        .collect();
691                                }
692                            }
693                        }
694                        // MCP providers have empty tools — lazy discovery at run time
695
696                        let mi = manifests.len();
697                        for (ti, tool) in cached_tools.iter().enumerate() {
698                            tool_index.insert(tool.name.clone(), (mi, ti));
699                        }
700                        manifests.push(Manifest {
701                            provider,
702                            tools: cached_tools,
703                        });
704                    }
705                }
706            }
707        }
708
709        let mut registry = ManifestRegistry {
710            manifests,
711            tool_index,
712        };
713        register_file_manager_provider(&mut registry);
714        Ok(registry)
715    }
716
717    /// Create an empty registry (no manifests loaded).
718    pub fn empty() -> Self {
719        let mut registry = ManifestRegistry {
720            manifests: Vec::new(),
721            tool_index: HashMap::new(),
722        };
723        register_file_manager_provider(&mut registry);
724        registry
725    }
726
727    /// Look up a tool by name. Returns the provider and tool definition.
728    pub fn get_tool(&self, name: &str) -> Option<(&Provider, &Tool)> {
729        self.tool_index.get(name).map(|(mi, ti)| {
730            let m = &self.manifests[*mi];
731            (&m.provider, &m.tools[*ti])
732        })
733    }
734
735    /// List all tools across all providers.
736    pub fn list_tools(&self) -> Vec<(&Provider, &Tool)> {
737        self.manifests
738            .iter()
739            .flat_map(|m| m.tools.iter().map(move |t| (&m.provider, t)))
740            .collect()
741    }
742
743    /// List all providers.
744    pub fn list_providers(&self) -> Vec<&Provider> {
745        self.manifests.iter().map(|m| &m.provider).collect()
746    }
747
748    /// List all non-internal tools (excludes providers marked internal=true).
749    pub fn list_public_tools(&self) -> Vec<(&Provider, &Tool)> {
750        self.manifests
751            .iter()
752            .filter(|m| !m.provider.internal)
753            .flat_map(|m| m.tools.iter().map(move |t| (&m.provider, t)))
754            .collect()
755    }
756
757    /// Get the number of loaded tools.
758    pub fn tool_count(&self) -> usize {
759        self.tool_index.len()
760    }
761
762    /// Get the number of loaded providers.
763    pub fn provider_count(&self) -> usize {
764        self.manifests.len()
765    }
766
767    /// List all MCP providers (handler = "mcp").
768    pub fn list_mcp_providers(&self) -> Vec<&Provider> {
769        self.manifests
770            .iter()
771            .filter(|m| m.provider.handler == "mcp")
772            .map(|m| &m.provider)
773            .collect()
774    }
775
776    /// If `tool_name` has a `<provider>:<name>` prefix matching an MCP provider, return it.
777    pub fn find_mcp_provider_for_tool(&self, tool_name: &str) -> Option<&Provider> {
778        let prefix = tool_name.split(TOOL_SEP).next()?;
779        self.manifests
780            .iter()
781            .find(|m| m.provider.handler == "mcp" && m.provider.name == prefix)
782            .map(|m| &m.provider)
783    }
784
785    /// List all OpenAPI providers (handler = "openapi").
786    pub fn list_openapi_providers(&self) -> Vec<&Provider> {
787        self.manifests
788            .iter()
789            .filter(|m| m.provider.handler == "openapi")
790            .map(|m| &m.provider)
791            .collect()
792    }
793
794    /// Check if a provider with the given name exists.
795    pub fn has_provider(&self, name: &str) -> bool {
796        self.manifests.iter().any(|m| m.provider.name == name)
797    }
798
799    /// Get tools belonging to a specific provider.
800    pub fn tools_by_provider(&self, provider_name: &str) -> Vec<(&Provider, &Tool)> {
801        self.manifests
802            .iter()
803            .filter(|m| m.provider.name == provider_name)
804            .flat_map(|m| m.tools.iter().map(move |t| (&m.provider, t)))
805            .collect()
806    }
807
808    /// List all CLI providers (handler = "cli").
809    pub fn list_cli_providers(&self) -> Vec<&Provider> {
810        self.manifests
811            .iter()
812            .filter(|m| m.provider.handler == "cli")
813            .map(|m| &m.provider)
814            .collect()
815    }
816
817    /// Register dynamically discovered MCP tools for a provider.
818    /// Tools are prefixed with provider name: `"github:read_file"`.
819    pub fn register_mcp_tools(&mut self, provider_name: &str, mcp_tools: Vec<McpToolDef>) {
820        // Find the manifest for this provider
821        let mi = match self
822            .manifests
823            .iter()
824            .position(|m| m.provider.name == provider_name)
825        {
826            Some(idx) => idx,
827            None => return,
828        };
829
830        for mcp_tool in mcp_tools {
831            let prefixed_name = format!("{}{}{}", provider_name, TOOL_SEP_STR, mcp_tool.name);
832
833            let tool = Tool {
834                name: prefixed_name.clone(),
835                description: mcp_tool.description.unwrap_or_default(),
836                endpoint: String::new(),
837                method: HttpMethod::Post,
838                scope: Some(format!("tool:{prefixed_name}")),
839                input_schema: mcp_tool.input_schema,
840                response: None,
841                tags: Vec::new(),
842                hint: None,
843                examples: Vec::new(),
844            };
845
846            let ti = self.manifests[mi].tools.len();
847            self.manifests[mi].tools.push(tool);
848            self.tool_index.insert(prefixed_name, (mi, ti));
849        }
850    }
851}
852
853impl Provider {
854    /// Returns true if this provider uses MCP protocol.
855    pub fn is_mcp(&self) -> bool {
856        self.handler == "mcp"
857    }
858
859    /// Returns true if this provider uses OpenAPI spec-based tool discovery.
860    pub fn is_openapi(&self) -> bool {
861        self.handler == "openapi"
862    }
863
864    /// Returns true if this provider uses CLI handler.
865    pub fn is_cli(&self) -> bool {
866        self.handler == "cli"
867    }
868
869    /// Returns the MCP transport type, defaulting to "stdio".
870    pub fn mcp_transport_type(&self) -> &str {
871        self.mcp_transport.as_deref().unwrap_or("stdio")
872    }
873
874    /// Returns true if this provider uses the built-in file_manager handler.
875    pub fn is_file_manager(&self) -> bool {
876        self.handler == "file_manager"
877    }
878}
879
880/// Register the virtual `file_manager` provider (download + upload tools).
881///
882/// Three cases:
883/// 1. Operator manifest already declares the `file_manager` provider WITH tools
884///    → leave it alone.
885/// 2. Operator manifest declares it but with no `[[tools]]` (the common case —
886///    they're just declaring the upload allowlist) → attach the built-in tools
887///    so the operator only needs the destinations block.
888/// 3. No manifest at all → register a default provider with the built-in tools
889///    and an empty destinations map (uploads will return UploadNotConfigured).
890pub(crate) fn register_file_manager_provider(registry: &mut ManifestRegistry) {
891    let download_tool = build_file_manager_download_tool();
892    let upload_tool = build_file_manager_upload_tool();
893
894    if let Some(mi) = registry
895        .manifests
896        .iter()
897        .position(|m| m.provider.handler == "file_manager")
898    {
899        // Operator declared it. Backfill tools if they didn't list any.
900        if registry.manifests[mi].tools.is_empty() {
901            let tools = vec![download_tool, upload_tool];
902            for (ti, tool) in tools.iter().enumerate() {
903                registry.tool_index.insert(tool.name.clone(), (mi, ti));
904            }
905            registry.manifests[mi].tools = tools;
906        }
907        return;
908    }
909
910    let provider = Provider {
911        name: "file_manager".to_string(),
912        description: "Generic binary download/upload for agents".to_string(),
913        base_url: String::new(),
914        auth_type: AuthType::None,
915        auth_key_name: None,
916        auth_header_name: None,
917        auth_query_name: None,
918        auth_value_prefix: None,
919        extra_headers: HashMap::new(),
920        oauth2_token_url: None,
921        auth_secret_name: None,
922        auth_session_token_env: None,
923        oauth2_basic_auth: false,
924        internal: false,
925        handler: "file_manager".to_string(),
926        mcp_transport: None,
927        mcp_command: None,
928        mcp_args: Vec::new(),
929        mcp_url: None,
930        mcp_env: HashMap::new(),
931        cli_command: None,
932        cli_default_args: Vec::new(),
933        cli_env: HashMap::new(),
934        cli_timeout_secs: None,
935        cli_output_args: Vec::new(),
936        cli_output_positional: HashMap::new(),
937        upload_destinations: HashMap::new(),
938        upload_default_destination: None,
939        openapi_spec: None,
940        openapi_include_tags: Vec::new(),
941        openapi_exclude_tags: Vec::new(),
942        openapi_include_operations: Vec::new(),
943        openapi_exclude_operations: Vec::new(),
944        openapi_max_operations: None,
945        openapi_overrides: HashMap::new(),
946        auth_generator: None,
947        category: Some("file_manager".to_string()),
948        skills: Vec::new(),
949    };
950
951    let tools = vec![download_tool, upload_tool];
952    let mi = registry.manifests.len();
953    for (ti, tool) in tools.iter().enumerate() {
954        registry.tool_index.insert(tool.name.clone(), (mi, ti));
955    }
956    registry.manifests.push(Manifest { provider, tools });
957}
958
959fn build_file_manager_download_tool() -> Tool {
960    let schema = serde_json::json!({
961        "type": "object",
962        "required": ["url"],
963        "properties": {
964            "url": {"type": "string", "description": "URL to fetch bytes from"},
965            "out": {"type": "string", "description": "Local path to write bytes; if omitted, returns base64 inline"},
966            "inline": {"type": "boolean", "description": "Return bytes as base64 in the response instead of writing to disk"},
967            "max_bytes": {"type": "integer", "description": "Abort if body exceeds this many bytes (default 500 MB)"},
968            "timeout": {"type": "integer", "description": "Request timeout in seconds (default 120)"},
969            "headers": {"type": "object", "description": "Extra request headers, e.g. {\"Authorization\": \"Bearer abc\"}"},
970            "follow_redirects": {"type": "boolean", "description": "Follow 3xx redirects (default true)"}
971        }
972    });
973
974    Tool {
975        name: "file_manager:download".to_string(),
976        description: "Download bytes from a URL. Writes to --out <path> or returns base64 inline."
977            .to_string(),
978        endpoint: String::new(),
979        method: HttpMethod::Post,
980        scope: Some("tool:file_manager:download".to_string()),
981        input_schema: Some(schema),
982        response: None,
983        tags: vec![
984            "file".to_string(),
985            "download".to_string(),
986            "binary".to_string(),
987        ],
988        hint: Some(
989            "Use for 'I have a URL, give me the bytes' — images, video, audio, PDFs, CSVs, ZIPs."
990                .to_string(),
991        ),
992        examples: vec![
993            "ati run file_manager:download --url https://example.com/file.mp4 --out /tmp/clip.mp4"
994                .to_string(),
995            "ati run file_manager:download --url https://example.com/data.csv --inline true"
996                .to_string(),
997        ],
998    }
999}
1000
1001fn build_file_manager_upload_tool() -> Tool {
1002    let schema = serde_json::json!({
1003        "type": "object",
1004        "required": ["path"],
1005        "properties": {
1006            "path": {"type": "string", "description": "Local file path to upload"},
1007            "content_type": {"type": "string", "description": "Override MIME type (default: inferred from extension)"},
1008            "object_name": {"type": "string", "description": "Object key (when destination is GCS-style); default: auto-generated"},
1009            "destination": {"type": "string", "description": "Allowlist key declared in the operator's file_manager.toml manifest (e.g. \"fal\", \"gcs\"). Omit to use the operator default."}
1010        }
1011    });
1012
1013    Tool {
1014        name: "file_manager:upload".to_string(),
1015        description: "Upload a local file to a manifest-declared destination, return a public URL.".to_string(),
1016        endpoint: String::new(),
1017        method: HttpMethod::Post,
1018        scope: Some("tool:file_manager:upload".to_string()),
1019        input_schema: Some(schema),
1020        response: None,
1021        tags: vec!["file".to_string(), "upload".to_string(), "binary".to_string()],
1022        hint: Some("Upload a local file to a manifest-declared destination (GCS, fal_storage, etc.) and get a public URL.".to_string()),
1023        examples: vec![
1024            "ati run file_manager:upload --path /tmp/narration.mp3".to_string(),
1025            "ati run file_manager:upload --path /tmp/report.pdf --destination gcs".to_string(),
1026        ],
1027    }
1028}