Skip to main content

ati/core/
manifest.rs

1use chrono::{DateTime, Utc};
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4use std::path::Path;
5use thiserror::Error;
6
7/// Separator between provider name and tool name in compound tool identifiers.
8/// Example: `"finnhub:quote"`, `"github:search_repositories"`.
9pub const TOOL_SEP: char = ':';
10pub const TOOL_SEP_STR: &str = ":";
11
12#[derive(Error, Debug)]
13pub enum ManifestError {
14    #[error("Failed to read manifest file {0}: {1}")]
15    Io(String, std::io::Error),
16    #[error("Failed to parse manifest {0}: {1}")]
17    Parse(String, toml::de::Error),
18    #[error("No manifests directory found at {0}")]
19    NoDirectory(String),
20    #[error("Manifest {0} is invalid: {1}")]
21    Invalid(String, String),
22}
23
24#[derive(Debug, Clone, Deserialize)]
25#[serde(rename_all = "snake_case")]
26#[derive(Default)]
27pub enum AuthType {
28    Bearer,
29    Header,
30    Query,
31    Basic,
32    #[default]
33    None,
34    Oauth2,
35    /// API key is embedded in the URL path via `${key_name}` placeholder.
36    /// No auth header is sent — the key is resolved from the keyring and
37    /// interpolated into the URL at connection time by `resolve_env_value`.
38    /// Example: `mcp_url = "https://mcp.serpapi.com/${serpapi_api_key}/mcp"`
39    Url,
40}
41
42#[derive(Debug, Clone, Deserialize)]
43pub struct Provider {
44    pub name: String,
45    pub description: String,
46    /// Base URL for HTTP providers. Optional for MCP providers.
47    #[serde(default)]
48    pub base_url: String,
49    #[serde(default)]
50    pub auth_type: AuthType,
51    #[serde(default)]
52    pub auth_key_name: Option<String>,
53    /// Custom header name for auth_type = "header" (default: "X-Api-Key").
54    /// Examples: "X-Finnhub-Token", "X-API-KEY", "Authorization"
55    #[serde(default)]
56    pub auth_header_name: Option<String>,
57    /// Custom query parameter name for auth_type = "query" (default: "api_key").
58    #[serde(default)]
59    pub auth_query_name: Option<String>,
60    /// Optional prefix for auth header value (e.g. "Token ", "Basic ").
61    /// Used with auth_type = "header". Value becomes: "{prefix}{key}".
62    #[serde(default)]
63    pub auth_value_prefix: Option<String>,
64    /// Additional headers to include on every request for this provider.
65    /// Examples: X-Goog-FieldMask, X-EBAY-C-MARKETPLACE-ID
66    #[serde(default)]
67    pub extra_headers: HashMap<String, String>,
68    /// Token URL for OAuth2 (relative to base_url or absolute)
69    #[serde(default)]
70    pub oauth2_token_url: Option<String>,
71    /// Second key name for OAuth2 client_secret
72    #[serde(default)]
73    pub auth_secret_name: Option<String>,
74    /// Optional override: name of the sandbox env var whose value is sent as
75    /// `Authorization: Bearer <value>` to the proxy when calling tools
76    /// declared by this provider. Defaults to `ATI_SESSION_TOKEN` when
77    /// `None` or when the named env var is unset/empty.
78    ///
79    /// Used for audience separation through the proxy (issue #121): the
80    /// orchestrator mints a per-MCP-audience JWT and stores it in a
81    /// dedicated env var (e.g. `PARCHA_TOOLS_SESSION_TOKEN`), the manifest
82    /// declares which env var to use for that provider, and the proxy is
83    /// configured with `ATI_JWT_ACCEPTED_AUDIENCES` to accept the
84    /// alternative audience.
85    ///
86    /// The same env → `<NAME>_FILE` → default-path resolution that
87    /// `ATI_SESSION_TOKEN` enjoys is applied (see [`core::token`]) so the
88    /// per-provider token gets the same hot-rotation semantics.
89    #[serde(default)]
90    pub auth_session_token_env: Option<String>,
91    /// If true, send OAuth2 credentials via Basic Auth header instead of form body.
92    /// Some providers (e.g. Sovos) require this per RFC 6749 §2.3.1.
93    #[serde(default)]
94    pub oauth2_basic_auth: bool,
95    #[serde(default)]
96    pub internal: bool,
97    #[serde(default = "default_handler")]
98    pub handler: String,
99
100    // --- MCP provider fields (handler = "mcp") ---
101    /// MCP transport type: "stdio" or "http"
102    #[serde(default)]
103    pub mcp_transport: Option<String>,
104    /// Command to launch stdio MCP server (e.g., "npx", "uvx")
105    #[serde(default)]
106    pub mcp_command: Option<String>,
107    /// Arguments for stdio command (e.g., ["-y", "@modelcontextprotocol/server-github"])
108    #[serde(default)]
109    pub mcp_args: Vec<String>,
110    /// URL for HTTP/Streamable HTTP MCP server
111    #[serde(default)]
112    pub mcp_url: Option<String>,
113    /// Optional override: name of the sandbox env var whose value the ATI
114    /// client ships to the proxy via the `X-Ati-Upstream-Url` header. The
115    /// proxy validates the URL against a keyring-stored glob allowlist
116    /// (`<provider>_allowed_urls`) and uses it as the MCP upstream for the
117    /// request, overriding [`mcp_url`].
118    ///
119    /// Used for per-environment routing through a shared proxy (issue #124):
120    /// one ATI proxy serves sandboxes from preview/staging/prod; each
121    /// sandbox's backend sets a per-env URL in the named env var; the proxy
122    /// validates against the operator-declared allowlist and dials the
123    /// right upstream.
124    ///
125    /// **Requires `mcp_transport = "http"`** — manifest load fails if
126    /// combined with stdio (stdio MCPs don't have URLs). Falls back to
127    /// `mcp_url` if the env var is unset or the header is absent.
128    #[serde(default)]
129    pub mcp_url_env: Option<String>,
130    /// Environment variables to pass to stdio subprocess
131    #[serde(default)]
132    pub mcp_env: HashMap<String, String>,
133
134    // --- CLI provider fields (handler = "cli") ---
135    /// Command to run for CLI providers (e.g., "gsutil", "gh", "kubectl")
136    #[serde(default)]
137    pub cli_command: Option<String>,
138    /// Default args prepended to every invocation
139    #[serde(default)]
140    pub cli_default_args: Vec<String>,
141    /// Environment variables for CLI. ${key} = string from keyring, @{key} = credential file
142    #[serde(default)]
143    pub cli_env: HashMap<String, String>,
144    /// Default timeout in seconds (default: 120)
145    #[serde(default)]
146    pub cli_timeout_secs: Option<u64>,
147    /// Named flags whose value is an output file path the proxy must capture.
148    /// Example: `["--output", "-o", "--out"]`. When the agent passes one of these
149    /// flags + a value, the proxy substitutes a temp path, runs the CLI, then
150    /// reads the file back and base64s it into the response. The sandbox-side
151    /// CLI writes those bytes to the original path the agent specified.
152    #[serde(default)]
153    pub cli_output_args: Vec<String>,
154    /// Subcommand prefix → 0-based positional argument index that designates
155    /// an output file path. Example: `{"browse screenshot": 0}` matches
156    /// `bb browse screenshot /tmp/x.png` — arg 0 of the remaining positional
157    /// args (after the matched prefix) is the output path.
158    #[serde(default)]
159    pub cli_output_positional: HashMap<String, usize>,
160
161    // --- file_manager provider fields (handler = "file_manager") ---
162    /// Operator-declared allowlist of upload destinations. Each key is a
163    /// short name agents can pass via `--destination <key>`; the value is a
164    /// typed sink (GCS bucket, fal storage). Anything not in this map is
165    /// refused. **An empty map disables uploads entirely.**
166    #[serde(default)]
167    pub upload_destinations: HashMap<String, crate::core::file_manager::UploadDestination>,
168    /// Destination key used when the agent omits `--destination`. Must be
169    /// present in `upload_destinations` (validated at load time).
170    #[serde(default)]
171    pub upload_default_destination: Option<String>,
172
173    // --- OpenAPI provider fields (handler = "openapi") ---
174    /// Path (relative to ~/.ati/specs/) or URL to OpenAPI spec (JSON or YAML)
175    #[serde(default)]
176    pub openapi_spec: Option<String>,
177    /// Only include operations with these tags
178    #[serde(default)]
179    pub openapi_include_tags: Vec<String>,
180    /// Exclude operations with these tags
181    #[serde(default)]
182    pub openapi_exclude_tags: Vec<String>,
183    /// Only include operations with these operationIds
184    #[serde(default)]
185    pub openapi_include_operations: Vec<String>,
186    /// Exclude operations with these operationIds
187    #[serde(default)]
188    pub openapi_exclude_operations: Vec<String>,
189    /// Maximum number of operations to register (for huge APIs)
190    #[serde(default)]
191    pub openapi_max_operations: Option<usize>,
192    /// Per-operationId overrides (hint, tags, description, response_extract, etc.)
193    #[serde(default)]
194    pub openapi_overrides: HashMap<String, OpenApiToolOverride>,
195
196    // --- Auth generator (dynamic credential generation) ---
197    /// Optional auth generator for producing short-lived credentials at call time.
198    /// Runs where secrets live (proxy server in proxy mode, local machine in local mode).
199    #[serde(default)]
200    pub auth_generator: Option<AuthGenerator>,
201
202    // --- Optional metadata fields ---
203    /// Provider category for discovery (e.g., "finance", "search", "social")
204    #[serde(default)]
205    pub category: Option<String>,
206
207    /// Associated skill names that teach agents how to use this provider's tools.
208    /// Resolved from the SkillRegistry (installed skills or GCS registry).
209    #[serde(default)]
210    pub skills: Vec<String>,
211}
212
213fn default_handler() -> String {
214    "http".to_string()
215}
216
217/// Per-operationId overrides for OpenAPI-discovered tools.
218#[derive(Debug, Clone, Deserialize, Default)]
219pub struct OpenApiToolOverride {
220    pub hint: Option<String>,
221    #[serde(default)]
222    pub tags: Vec<String>,
223    #[serde(default)]
224    pub examples: Vec<String>,
225    pub description: Option<String>,
226    pub scope: Option<String>,
227    pub response_extract: Option<String>,
228    pub response_format: Option<String>,
229}
230
231/// Dynamic auth generator configuration — produces short-lived credentials at call time.
232///
233/// Two types:
234/// - `command`: runs an external command, captures stdout as the credential
235/// - `script`: writes an inline script to a temp file and runs it via an interpreter
236///
237/// Variable expansion in `args` and `env` values:
238/// - `${key_name}` → keyring lookup
239/// - `${JWT_SUB}` → agent's JWT `sub` claim
240/// - `${JWT_SCOPE}` → agent's JWT `scope` claim
241/// - `${TOOL_NAME}` → tool being invoked
242/// - `${TIMESTAMP}` → current unix timestamp
243#[derive(Debug, Clone, Deserialize)]
244pub struct AuthGenerator {
245    #[serde(rename = "type")]
246    pub gen_type: AuthGenType,
247    /// Command to run (for `type = "command"`)
248    pub command: Option<String>,
249    /// Arguments for the command
250    #[serde(default)]
251    pub args: Vec<String>,
252    /// Interpreter for inline script (for `type = "script"`, e.g. "python3")
253    pub interpreter: Option<String>,
254    /// Inline script body (for `type = "script"`)
255    pub script: Option<String>,
256    /// TTL for cached credentials (0 = no cache)
257    #[serde(default)]
258    pub cache_ttl_secs: u64,
259    /// Output format: "text" (trimmed stdout) or "json" (parsed, fields extracted via `inject`)
260    #[serde(default)]
261    pub output_format: AuthOutputFormat,
262    /// Environment variables for the subprocess (values support `${key}` expansion)
263    #[serde(default)]
264    pub env: HashMap<String, String>,
265    /// For JSON output: map dot-notation JSON paths to injection targets
266    #[serde(default)]
267    pub inject: HashMap<String, InjectTarget>,
268    /// Subprocess timeout in seconds (default: 30)
269    #[serde(default = "default_gen_timeout")]
270    pub timeout_secs: u64,
271}
272
273fn default_gen_timeout() -> u64 {
274    30
275}
276
277#[derive(Debug, Clone, Deserialize)]
278#[serde(rename_all = "snake_case")]
279pub enum AuthGenType {
280    Command,
281    Script,
282}
283
284#[derive(Debug, Clone, Deserialize, Default)]
285#[serde(rename_all = "snake_case")]
286pub enum AuthOutputFormat {
287    #[default]
288    Text,
289    Json,
290}
291
292/// Target for injecting a JSON-extracted credential value.
293#[derive(Debug, Clone, Deserialize)]
294pub struct InjectTarget {
295    /// Where to inject: "header", "env", or "query"
296    #[serde(rename = "type")]
297    pub inject_type: String,
298    /// Name of the header/env var/query param
299    pub name: String,
300}
301
302#[derive(Debug, Clone, Deserialize)]
303#[serde(rename_all = "UPPERCASE")]
304#[derive(Default)]
305pub enum HttpMethod {
306    #[serde(alias = "get", alias = "Get")]
307    #[default]
308    Get,
309    #[serde(alias = "post", alias = "Post")]
310    Post,
311    #[serde(alias = "put", alias = "Put")]
312    Put,
313    #[serde(alias = "delete", alias = "Delete")]
314    Delete,
315}
316
317impl std::fmt::Display for HttpMethod {
318    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
319        match self {
320            HttpMethod::Get => write!(f, "GET"),
321            HttpMethod::Post => write!(f, "POST"),
322            HttpMethod::Put => write!(f, "PUT"),
323            HttpMethod::Delete => write!(f, "DELETE"),
324        }
325    }
326}
327
328#[derive(Debug, Clone, Deserialize, Default)]
329#[serde(rename_all = "snake_case")]
330pub enum ResponseFormat {
331    MarkdownTable,
332    Json,
333    #[default]
334    Text,
335    Raw,
336}
337
338#[derive(Debug, Clone, Deserialize, Default)]
339pub struct ResponseConfig {
340    /// JSONPath expression to extract useful content from the API response
341    #[serde(default)]
342    pub extract: Option<String>,
343    /// Output format for the extracted data
344    #[serde(default)]
345    pub format: ResponseFormat,
346}
347
348#[derive(Debug, Clone, Deserialize)]
349pub struct Tool {
350    pub name: String,
351    pub description: String,
352    #[serde(default)]
353    pub endpoint: String,
354    #[serde(default)]
355    pub method: HttpMethod,
356    /// Scope required to use this tool (e.g. "tool:web_search")
357    #[serde(default)]
358    pub scope: Option<String>,
359    /// JSON Schema for tool input
360    #[serde(default)]
361    pub input_schema: Option<serde_json::Value>,
362    /// Response extraction config
363    #[serde(default)]
364    pub response: Option<ResponseConfig>,
365
366    // --- Optional metadata fields ---
367    /// Tags for discovery (e.g., ["search", "real-time"])
368    #[serde(default)]
369    pub tags: Vec<String>,
370    /// Short hint for the LLM on when to use this tool
371    #[serde(default)]
372    pub hint: Option<String>,
373    /// Example invocations
374    #[serde(default)]
375    pub examples: Vec<String>,
376}
377
378/// A parsed manifest file: one provider + multiple tools.
379/// For MCP providers, tools may be empty — they're discovered dynamically via tools/list.
380#[derive(Debug, Clone, Deserialize)]
381pub struct Manifest {
382    pub provider: Provider,
383    #[serde(default, rename = "tools")]
384    pub tools: Vec<Tool>,
385}
386
387/// A cached (ephemeral) provider, persisted as JSON in `$ATI_DIR/cache/providers/<name>.json`.
388/// Used by `ati provider load` to make providers available across process invocations
389/// without writing permanent TOML manifests.
390#[derive(Debug, Clone, Serialize, Deserialize)]
391pub struct CachedProvider {
392    pub name: String,
393    /// "openapi" or "mcp"
394    pub provider_type: String,
395    #[serde(default)]
396    pub base_url: String,
397    #[serde(default)]
398    pub auth_type: String,
399    #[serde(default)]
400    pub auth_key_name: Option<String>,
401    #[serde(default)]
402    pub auth_header_name: Option<String>,
403    #[serde(default)]
404    pub auth_query_name: Option<String>,
405    // OpenAPI fields
406    #[serde(default)]
407    pub spec_content: Option<String>,
408    // MCP fields
409    #[serde(default)]
410    pub mcp_transport: Option<String>,
411    #[serde(default)]
412    pub mcp_url: Option<String>,
413    #[serde(default)]
414    pub mcp_command: Option<String>,
415    #[serde(default)]
416    pub mcp_args: Vec<String>,
417    #[serde(default)]
418    pub mcp_env: HashMap<String, String>,
419    // CLI fields
420    #[serde(default)]
421    pub cli_command: Option<String>,
422    #[serde(default)]
423    pub cli_default_args: Vec<String>,
424    #[serde(default)]
425    pub cli_env: HashMap<String, String>,
426    #[serde(default)]
427    pub cli_timeout_secs: Option<u64>,
428    // MCP/HTTP auth
429    #[serde(default)]
430    pub auth: Option<String>,
431    // Skills
432    #[serde(default)]
433    pub skills: Vec<String>,
434    // Cache metadata
435    pub created_at: String,
436    pub ttl_seconds: u64,
437}
438
439impl CachedProvider {
440    /// Returns true if this cached provider has expired.
441    pub fn is_expired(&self) -> bool {
442        let created = match DateTime::parse_from_rfc3339(&self.created_at) {
443            Ok(dt) => dt.with_timezone(&Utc),
444            Err(_) => return true, // Can't parse → treat as expired
445        };
446        let now = Utc::now();
447        let elapsed = now.signed_duration_since(created);
448        elapsed.num_seconds() as u64 > self.ttl_seconds
449    }
450
451    /// Returns the expiry time as an ISO timestamp.
452    pub fn expires_at(&self) -> Option<String> {
453        let created = DateTime::parse_from_rfc3339(&self.created_at).ok()?;
454        let expires = created + chrono::Duration::seconds(self.ttl_seconds as i64);
455        Some(expires.to_rfc3339())
456    }
457
458    /// Returns remaining TTL in seconds (0 if expired).
459    pub fn remaining_seconds(&self) -> u64 {
460        let created = match DateTime::parse_from_rfc3339(&self.created_at) {
461            Ok(dt) => dt.with_timezone(&Utc),
462            Err(_) => return 0,
463        };
464        let now = Utc::now();
465        let elapsed = now.signed_duration_since(created).num_seconds() as u64;
466        self.ttl_seconds.saturating_sub(elapsed)
467    }
468
469    /// Build a Provider struct from this cached entry.
470    pub fn to_provider(&self) -> Provider {
471        let auth_type = match self.auth_type.as_str() {
472            "bearer" => AuthType::Bearer,
473            "header" => AuthType::Header,
474            "query" => AuthType::Query,
475            "basic" => AuthType::Basic,
476            "oauth2" => AuthType::Oauth2,
477            _ => AuthType::None,
478        };
479
480        let handler = match self.provider_type.as_str() {
481            "mcp" => "mcp".to_string(),
482            "openapi" => "openapi".to_string(),
483            _ => "http".to_string(),
484        };
485
486        Provider {
487            name: self.name.clone(),
488            description: format!("{} (cached)", self.name),
489            base_url: self.base_url.clone(),
490            auth_type,
491            auth_key_name: self.auth_key_name.clone(),
492            auth_header_name: self.auth_header_name.clone(),
493            auth_query_name: self.auth_query_name.clone(),
494            auth_value_prefix: None,
495            extra_headers: HashMap::new(),
496            oauth2_token_url: None,
497            auth_secret_name: None,
498            auth_session_token_env: None,
499            mcp_url_env: None,
500            oauth2_basic_auth: false,
501            internal: false,
502            handler,
503            mcp_transport: self.mcp_transport.clone(),
504            mcp_command: self.mcp_command.clone(),
505            mcp_args: self.mcp_args.clone(),
506            mcp_url: self.mcp_url.clone(),
507            mcp_env: self.mcp_env.clone(),
508            openapi_spec: None,
509            openapi_include_tags: Vec::new(),
510            openapi_exclude_tags: Vec::new(),
511            openapi_include_operations: Vec::new(),
512            openapi_exclude_operations: Vec::new(),
513            openapi_max_operations: None,
514            openapi_overrides: HashMap::new(),
515            cli_command: self.cli_command.clone(),
516            cli_default_args: self.cli_default_args.clone(),
517            cli_env: self.cli_env.clone(),
518            cli_timeout_secs: self.cli_timeout_secs,
519            cli_output_args: Vec::new(),
520            cli_output_positional: HashMap::new(),
521            upload_destinations: HashMap::new(),
522            upload_default_destination: None,
523            auth_generator: None,
524            category: None,
525            skills: self.skills.clone(),
526        }
527    }
528}
529
530/// A tool discovered from an MCP server via tools/list.
531/// Converted into a Tool for the registry.
532#[derive(Debug, Clone, Serialize, Deserialize)]
533pub struct McpToolDef {
534    pub name: String,
535    #[serde(default)]
536    pub description: Option<String>,
537    #[serde(default, rename = "inputSchema")]
538    pub input_schema: Option<serde_json::Value>,
539}
540
541/// Registry holding all loaded manifests, with indexes for fast lookup.
542pub struct ManifestRegistry {
543    manifests: Vec<Manifest>,
544    /// tool_name -> (manifest_index, tool_index)
545    tool_index: HashMap<String, (usize, usize)>,
546}
547
548impl ManifestRegistry {
549    /// Load all .toml manifests from a directory.
550    /// OpenAPI providers (handler = "openapi") have their specs loaded and tools auto-registered.
551    pub fn load(dir: &Path) -> Result<Self, ManifestError> {
552        if !dir.is_dir() {
553            return Err(ManifestError::NoDirectory(dir.display().to_string()));
554        }
555
556        let mut manifests = Vec::new();
557        let mut tool_index = HashMap::new();
558
559        let pattern = dir.join("*.toml");
560        let entries = glob::glob(pattern.to_str().unwrap_or(""))
561            .map_err(|e| ManifestError::NoDirectory(e.to_string()))?;
562
563        // Resolve specs dir: sibling of manifests dir (e.g., ~/.ati/specs/)
564        let specs_dir = dir.parent().map(|p| p.join("specs"));
565
566        for entry in entries {
567            let path = entry.map_err(|e| {
568                ManifestError::Io(format!("{e}"), std::io::Error::other("glob error"))
569            })?;
570            let contents = std::fs::read_to_string(&path)
571                .map_err(|e| ManifestError::Io(path.display().to_string(), e))?;
572            let mut manifest: Manifest = toml::from_str(&contents)
573                .map_err(|e| ManifestError::Parse(path.display().to_string(), e))?;
574
575            // For OpenAPI providers, load spec and register tools
576            if manifest.provider.is_openapi() {
577                if let Some(spec_ref) = &manifest.provider.openapi_spec {
578                    match crate::core::openapi::load_and_register(
579                        &manifest.provider,
580                        spec_ref,
581                        specs_dir.as_deref(),
582                    ) {
583                        Ok(tools) => {
584                            manifest.tools = tools;
585                        }
586                        Err(e) => {
587                            tracing::warn!(
588                                provider = %manifest.provider.name,
589                                error = %e,
590                                "failed to load OpenAPI spec for provider"
591                            );
592                            // Graceful degradation — continue without tools
593                        }
594                    }
595                }
596            }
597
598            // For file_manager providers, validate that any declared default
599            // destination is actually present in the allowlist. Refuse to load
600            // an inconsistent manifest rather than silently coercing it.
601            if manifest.provider.handler == "file_manager" {
602                if let Some(ref default) = manifest.provider.upload_default_destination {
603                    if !manifest.provider.upload_destinations.contains_key(default) {
604                        return Err(ManifestError::Invalid(
605                            path.display().to_string(),
606                            format!(
607                                "upload_default_destination '{default}' is not present in [provider.upload_destinations]"
608                            ),
609                        ));
610                    }
611                }
612            }
613
614            // mcp_url_env is meaningful only for HTTP-transport MCP providers.
615            // stdio MCPs have no URL — silently ignoring the field would leak
616            // a misconfigured manifest through to runtime. Issue #124.
617            if let Some(ref env_name) = manifest.provider.mcp_url_env {
618                let trimmed = env_name.trim();
619                if trimmed.is_empty() {
620                    return Err(ManifestError::Invalid(
621                        path.display().to_string(),
622                        "mcp_url_env must not be empty when set".to_string(),
623                    ));
624                }
625                // POSIX env var names: [A-Z_][A-Z0-9_]*. We accept lowercase
626                // too for permissiveness — the actual env lookup is
627                // case-sensitive and the operator picks the name.
628                let valid_name = trimmed.chars().enumerate().all(|(i, c)| {
629                    if i == 0 {
630                        c.is_ascii_alphabetic() || c == '_'
631                    } else {
632                        c.is_ascii_alphanumeric() || c == '_'
633                    }
634                });
635                if !valid_name {
636                    return Err(ManifestError::Invalid(
637                        path.display().to_string(),
638                        format!("mcp_url_env '{env_name}' is not a valid POSIX env var name"),
639                    ));
640                }
641                let transport = manifest.provider.mcp_transport.as_deref().unwrap_or("");
642                if !manifest.provider.is_mcp() || transport != "http" {
643                    return Err(ManifestError::Invalid(
644                        path.display().to_string(),
645                        format!(
646                            "mcp_url_env requires handler = \"mcp\" and mcp_transport = \"http\" (got handler = \"{}\", transport = \"{}\")",
647                            manifest.provider.handler, transport
648                        ),
649                    ));
650                }
651            }
652
653            // For CLI providers with no [[tools]], auto-register one implicit tool
654            if manifest.provider.is_cli() && manifest.tools.is_empty() {
655                let tool_name = manifest.provider.name.clone();
656                manifest.tools.push(Tool {
657                    name: tool_name.clone(),
658                    description: manifest.provider.description.clone(),
659                    endpoint: String::new(),
660                    method: HttpMethod::Get,
661                    scope: Some(format!("tool:{tool_name}")),
662                    input_schema: None,
663                    response: None,
664                    tags: Vec::new(),
665                    hint: None,
666                    examples: Vec::new(),
667                });
668            }
669
670            // Auto-assign scope to tools that don't have one explicitly set.
671            // This ensures all tools participate in JWT scope filtering.
672            let provider_name = &manifest.provider.name;
673            for tool in &mut manifest.tools {
674                if tool.scope.is_none() && !manifest.provider.internal {
675                    tool.scope = Some(format!("tool:{}", tool.name));
676                    tracing::trace!(
677                        tool = %tool.name,
678                        provider = %provider_name,
679                        scope = ?tool.scope,
680                        "auto-assigned scope to tool"
681                    );
682                }
683            }
684
685            let mi = manifests.len();
686            for (ti, tool) in manifest.tools.iter().enumerate() {
687                tool_index.insert(tool.name.clone(), (mi, ti));
688            }
689            manifests.push(manifest);
690        }
691
692        // Load cached providers from cache/providers/*.json
693        // Cache dir is sibling of manifests dir: e.g., ~/.ati/cache/providers/
694        if let Some(parent) = dir.parent() {
695            let cache_dir = parent.join("cache").join("providers");
696            if cache_dir.is_dir() {
697                let cache_pattern = cache_dir.join("*.json");
698                if let Ok(cache_entries) = glob::glob(cache_pattern.to_str().unwrap_or("")) {
699                    for entry in cache_entries {
700                        let path = match entry {
701                            Ok(p) => p,
702                            Err(_) => continue,
703                        };
704                        let content = match std::fs::read_to_string(&path) {
705                            Ok(c) => c,
706                            Err(_) => continue,
707                        };
708                        let cached: CachedProvider = match serde_json::from_str(&content) {
709                            Ok(c) => c,
710                            Err(_) => continue,
711                        };
712
713                        // Skip and delete expired entries
714                        if cached.is_expired() {
715                            let _ = std::fs::remove_file(&path);
716                            continue;
717                        }
718
719                        // Skip if a permanent manifest with same provider name already exists
720                        if manifests.iter().any(|m| m.provider.name == cached.name) {
721                            continue;
722                        }
723
724                        let provider = cached.to_provider();
725
726                        let mut cached_tools = Vec::new();
727                        if cached.provider_type == "openapi" {
728                            if let Some(spec_content) = &cached.spec_content {
729                                if let Ok(spec) = crate::core::openapi::parse_spec(spec_content) {
730                                    let filters = crate::core::openapi::OpenApiFilters {
731                                        include_tags: vec![],
732                                        exclude_tags: vec![],
733                                        include_operations: vec![],
734                                        exclude_operations: vec![],
735                                        max_operations: None,
736                                    };
737                                    let defs = crate::core::openapi::extract_tools(&spec, &filters);
738                                    cached_tools = defs
739                                        .into_iter()
740                                        .map(|def| {
741                                            crate::core::openapi::to_ati_tool(
742                                                def,
743                                                &cached.name,
744                                                &HashMap::new(),
745                                            )
746                                        })
747                                        .collect();
748                                }
749                            }
750                        }
751                        // MCP providers have empty tools — lazy discovery at run time
752
753                        let mi = manifests.len();
754                        for (ti, tool) in cached_tools.iter().enumerate() {
755                            tool_index.insert(tool.name.clone(), (mi, ti));
756                        }
757                        manifests.push(Manifest {
758                            provider,
759                            tools: cached_tools,
760                        });
761                    }
762                }
763            }
764        }
765
766        let mut registry = ManifestRegistry {
767            manifests,
768            tool_index,
769        };
770        register_file_manager_provider(&mut registry);
771        Ok(registry)
772    }
773
774    /// Create an empty registry (no manifests loaded).
775    pub fn empty() -> Self {
776        let mut registry = ManifestRegistry {
777            manifests: Vec::new(),
778            tool_index: HashMap::new(),
779        };
780        register_file_manager_provider(&mut registry);
781        registry
782    }
783
784    /// Look up a tool by name. Returns the provider and tool definition.
785    pub fn get_tool(&self, name: &str) -> Option<(&Provider, &Tool)> {
786        self.tool_index.get(name).map(|(mi, ti)| {
787            let m = &self.manifests[*mi];
788            (&m.provider, &m.tools[*ti])
789        })
790    }
791
792    /// List all tools across all providers.
793    pub fn list_tools(&self) -> Vec<(&Provider, &Tool)> {
794        self.manifests
795            .iter()
796            .flat_map(|m| m.tools.iter().map(move |t| (&m.provider, t)))
797            .collect()
798    }
799
800    /// List all providers.
801    pub fn list_providers(&self) -> Vec<&Provider> {
802        self.manifests.iter().map(|m| &m.provider).collect()
803    }
804
805    /// List all non-internal tools (excludes providers marked internal=true).
806    pub fn list_public_tools(&self) -> Vec<(&Provider, &Tool)> {
807        self.manifests
808            .iter()
809            .filter(|m| !m.provider.internal)
810            .flat_map(|m| m.tools.iter().map(move |t| (&m.provider, t)))
811            .collect()
812    }
813
814    /// Get the number of loaded tools.
815    pub fn tool_count(&self) -> usize {
816        self.tool_index.len()
817    }
818
819    /// Get the number of loaded providers.
820    pub fn provider_count(&self) -> usize {
821        self.manifests.len()
822    }
823
824    /// List all MCP providers (handler = "mcp").
825    pub fn list_mcp_providers(&self) -> Vec<&Provider> {
826        self.manifests
827            .iter()
828            .filter(|m| m.provider.handler == "mcp")
829            .map(|m| &m.provider)
830            .collect()
831    }
832
833    /// If `tool_name` has a `<provider>:<name>` prefix matching an MCP provider, return it.
834    pub fn find_mcp_provider_for_tool(&self, tool_name: &str) -> Option<&Provider> {
835        let prefix = tool_name.split(TOOL_SEP).next()?;
836        self.manifests
837            .iter()
838            .find(|m| m.provider.handler == "mcp" && m.provider.name == prefix)
839            .map(|m| &m.provider)
840    }
841
842    /// List all OpenAPI providers (handler = "openapi").
843    pub fn list_openapi_providers(&self) -> Vec<&Provider> {
844        self.manifests
845            .iter()
846            .filter(|m| m.provider.handler == "openapi")
847            .map(|m| &m.provider)
848            .collect()
849    }
850
851    /// Check if a provider with the given name exists.
852    pub fn has_provider(&self, name: &str) -> bool {
853        self.manifests.iter().any(|m| m.provider.name == name)
854    }
855
856    /// Get tools belonging to a specific provider.
857    pub fn tools_by_provider(&self, provider_name: &str) -> Vec<(&Provider, &Tool)> {
858        self.manifests
859            .iter()
860            .filter(|m| m.provider.name == provider_name)
861            .flat_map(|m| m.tools.iter().map(move |t| (&m.provider, t)))
862            .collect()
863    }
864
865    /// List all CLI providers (handler = "cli").
866    pub fn list_cli_providers(&self) -> Vec<&Provider> {
867        self.manifests
868            .iter()
869            .filter(|m| m.provider.handler == "cli")
870            .map(|m| &m.provider)
871            .collect()
872    }
873
874    /// Register dynamically discovered MCP tools for a provider.
875    /// Tools are prefixed with provider name: `"github:read_file"`.
876    pub fn register_mcp_tools(&mut self, provider_name: &str, mcp_tools: Vec<McpToolDef>) {
877        // Find the manifest for this provider
878        let mi = match self
879            .manifests
880            .iter()
881            .position(|m| m.provider.name == provider_name)
882        {
883            Some(idx) => idx,
884            None => return,
885        };
886
887        for mcp_tool in mcp_tools {
888            let prefixed_name = format!("{}{}{}", provider_name, TOOL_SEP_STR, mcp_tool.name);
889
890            let tool = Tool {
891                name: prefixed_name.clone(),
892                description: mcp_tool.description.unwrap_or_default(),
893                endpoint: String::new(),
894                method: HttpMethod::Post,
895                scope: Some(format!("tool:{prefixed_name}")),
896                input_schema: mcp_tool.input_schema,
897                response: None,
898                tags: Vec::new(),
899                hint: None,
900                examples: Vec::new(),
901            };
902
903            let ti = self.manifests[mi].tools.len();
904            self.manifests[mi].tools.push(tool);
905            self.tool_index.insert(prefixed_name, (mi, ti));
906        }
907    }
908}
909
910impl Provider {
911    /// Returns true if this provider uses MCP protocol.
912    pub fn is_mcp(&self) -> bool {
913        self.handler == "mcp"
914    }
915
916    /// Returns true if this provider uses OpenAPI spec-based tool discovery.
917    pub fn is_openapi(&self) -> bool {
918        self.handler == "openapi"
919    }
920
921    /// Returns true if this provider uses CLI handler.
922    pub fn is_cli(&self) -> bool {
923        self.handler == "cli"
924    }
925
926    /// Returns the MCP transport type, defaulting to "stdio".
927    pub fn mcp_transport_type(&self) -> &str {
928        self.mcp_transport.as_deref().unwrap_or("stdio")
929    }
930
931    /// Returns true if this provider uses the built-in file_manager handler.
932    pub fn is_file_manager(&self) -> bool {
933        self.handler == "file_manager"
934    }
935}
936
937/// Register the virtual `file_manager` provider (download + upload tools).
938///
939/// Three cases:
940/// 1. Operator manifest already declares the `file_manager` provider WITH tools
941///    → leave it alone.
942/// 2. Operator manifest declares it but with no `[[tools]]` (the common case —
943///    they're just declaring the upload allowlist) → attach the built-in tools
944///    so the operator only needs the destinations block.
945/// 3. No manifest at all → register a default provider with the built-in tools
946///    and an empty destinations map (uploads will return UploadNotConfigured).
947pub(crate) fn register_file_manager_provider(registry: &mut ManifestRegistry) {
948    let download_tool = build_file_manager_download_tool();
949    let upload_tool = build_file_manager_upload_tool();
950
951    if let Some(mi) = registry
952        .manifests
953        .iter()
954        .position(|m| m.provider.handler == "file_manager")
955    {
956        // Operator declared it. Backfill tools if they didn't list any.
957        if registry.manifests[mi].tools.is_empty() {
958            let tools = vec![download_tool, upload_tool];
959            for (ti, tool) in tools.iter().enumerate() {
960                registry.tool_index.insert(tool.name.clone(), (mi, ti));
961            }
962            registry.manifests[mi].tools = tools;
963        }
964        return;
965    }
966
967    let provider = Provider {
968        name: "file_manager".to_string(),
969        description: "Generic binary download/upload for agents".to_string(),
970        base_url: String::new(),
971        auth_type: AuthType::None,
972        auth_key_name: None,
973        auth_header_name: None,
974        auth_query_name: None,
975        auth_value_prefix: None,
976        extra_headers: HashMap::new(),
977        oauth2_token_url: None,
978        auth_secret_name: None,
979        auth_session_token_env: None,
980        mcp_url_env: None,
981        oauth2_basic_auth: false,
982        internal: false,
983        handler: "file_manager".to_string(),
984        mcp_transport: None,
985        mcp_command: None,
986        mcp_args: Vec::new(),
987        mcp_url: None,
988        mcp_env: HashMap::new(),
989        cli_command: None,
990        cli_default_args: Vec::new(),
991        cli_env: HashMap::new(),
992        cli_timeout_secs: None,
993        cli_output_args: Vec::new(),
994        cli_output_positional: HashMap::new(),
995        upload_destinations: HashMap::new(),
996        upload_default_destination: None,
997        openapi_spec: None,
998        openapi_include_tags: Vec::new(),
999        openapi_exclude_tags: Vec::new(),
1000        openapi_include_operations: Vec::new(),
1001        openapi_exclude_operations: Vec::new(),
1002        openapi_max_operations: None,
1003        openapi_overrides: HashMap::new(),
1004        auth_generator: None,
1005        category: Some("file_manager".to_string()),
1006        skills: Vec::new(),
1007    };
1008
1009    let tools = vec![download_tool, upload_tool];
1010    let mi = registry.manifests.len();
1011    for (ti, tool) in tools.iter().enumerate() {
1012        registry.tool_index.insert(tool.name.clone(), (mi, ti));
1013    }
1014    registry.manifests.push(Manifest { provider, tools });
1015}
1016
1017fn build_file_manager_download_tool() -> Tool {
1018    let schema = serde_json::json!({
1019        "type": "object",
1020        "required": ["url"],
1021        "properties": {
1022            "url": {"type": "string", "description": "URL to fetch bytes from"},
1023            "out": {"type": "string", "description": "Local path to write bytes; if omitted, returns base64 inline"},
1024            "inline": {"type": "boolean", "description": "Return bytes as base64 in the response instead of writing to disk"},
1025            "max_bytes": {"type": "integer", "description": "Abort if body exceeds this many bytes (default 500 MB)"},
1026            "timeout": {"type": "integer", "description": "Request timeout in seconds (default 120)"},
1027            "headers": {"type": "object", "description": "Extra request headers, e.g. {\"Authorization\": \"Bearer abc\"}"},
1028            "follow_redirects": {"type": "boolean", "description": "Follow 3xx redirects (default true)"}
1029        }
1030    });
1031
1032    Tool {
1033        name: "file_manager:download".to_string(),
1034        description: "Download bytes from a URL. Writes to --out <path> or returns base64 inline."
1035            .to_string(),
1036        endpoint: String::new(),
1037        method: HttpMethod::Post,
1038        scope: Some("tool:file_manager:download".to_string()),
1039        input_schema: Some(schema),
1040        response: None,
1041        tags: vec![
1042            "file".to_string(),
1043            "download".to_string(),
1044            "binary".to_string(),
1045        ],
1046        hint: Some(
1047            "Use for 'I have a URL, give me the bytes' — images, video, audio, PDFs, CSVs, ZIPs."
1048                .to_string(),
1049        ),
1050        examples: vec![
1051            "ati run file_manager:download --url https://example.com/file.mp4 --out /tmp/clip.mp4"
1052                .to_string(),
1053            "ati run file_manager:download --url https://example.com/data.csv --inline true"
1054                .to_string(),
1055        ],
1056    }
1057}
1058
1059fn build_file_manager_upload_tool() -> Tool {
1060    let schema = serde_json::json!({
1061        "type": "object",
1062        "required": ["path"],
1063        "properties": {
1064            "path": {"type": "string", "description": "Local file path to upload"},
1065            "content_type": {"type": "string", "description": "Override MIME type (default: inferred from extension)"},
1066            "object_name": {"type": "string", "description": "Object key (when destination is GCS-style); default: auto-generated"},
1067            "destination": {"type": "string", "description": "Allowlist key declared in the operator's file_manager.toml manifest (e.g. \"fal\", \"gcs\"). Omit to use the operator default."}
1068        }
1069    });
1070
1071    Tool {
1072        name: "file_manager:upload".to_string(),
1073        description: "Upload a local file to a manifest-declared destination, return a public URL.".to_string(),
1074        endpoint: String::new(),
1075        method: HttpMethod::Post,
1076        scope: Some("tool:file_manager:upload".to_string()),
1077        input_schema: Some(schema),
1078        response: None,
1079        tags: vec!["file".to_string(), "upload".to_string(), "binary".to_string()],
1080        hint: Some("Upload a local file to a manifest-declared destination (GCS, fal_storage, etc.) and get a public URL.".to_string()),
1081        examples: vec![
1082            "ati run file_manager:upload --path /tmp/narration.mp3".to_string(),
1083            "ati run file_manager:upload --path /tmp/report.pdf --destination gcs".to_string(),
1084        ],
1085    }
1086}