Skip to main content

ai_lib_rust/protocol/v2/
manifest.rs

1//! V2 三环清单结构 — Ring1 核心骨架 / Ring2 能力映射 / Ring3 高级扩展
2//!
3//! V2 manifest structure implementing the concentric circle model.
4//! Parses the three-ring structure from YAML/JSON and provides typed access
5//! to all V2 features including MCP, Computer Use, and Extended Multimodal.
6
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9
10use super::capabilities::CapabilitiesV2;
11use crate::protocol::config::{
12    AccumulatorConfig, CandidateConfig, DecoderConfig, EndpointConfig, ErrorClassification,
13    EventMapRule, RateLimitHeaders, RetryPolicy, ServiceConfig, TerminationConfig,
14};
15
16// ─── Ring 1: Core Skeleton (Required) ───────────────────────────────────────
17
18/// V2 authentication configuration (Ring 1).
19#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct AuthConfigV2 {
21    #[serde(rename = "type")]
22    pub auth_type: String,
23    #[serde(default, skip_serializing_if = "Option::is_none")]
24    pub header: Option<String>,
25    #[serde(default, skip_serializing_if = "Option::is_none")]
26    pub prefix: Option<String>,
27    #[serde(default, skip_serializing_if = "Option::is_none")]
28    pub token_env: Option<String>,
29    #[serde(default, skip_serializing_if = "Option::is_none")]
30    pub param_name: Option<String>,
31    #[serde(default, skip_serializing_if = "Option::is_none")]
32    pub extra_headers: Option<Vec<ExtraHeader>>,
33}
34
35/// Extra header entry for authentication.
36#[derive(Debug, Clone, Serialize, Deserialize)]
37pub struct ExtraHeader {
38    pub name: String,
39    pub value: String,
40}
41
42/// V2 endpoint definition (Ring 1).
43#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct EndpointV2 {
45    pub base_url: String,
46    #[serde(default, skip_serializing_if = "Option::is_none")]
47    pub chat: Option<String>,
48    #[serde(default, skip_serializing_if = "Option::is_none")]
49    pub embeddings: Option<String>,
50    #[serde(default, skip_serializing_if = "Option::is_none")]
51    pub auth: Option<AuthConfigV2>,
52}
53
54// ─── Ring 2: Capability Mapping (Conditional) ───────────────────────────────
55
56/// V2 streaming configuration (Ring 2).
57#[derive(Debug, Clone, Serialize, Deserialize)]
58pub struct StreamingV2 {
59    #[serde(default, skip_serializing_if = "Option::is_none")]
60    pub decoder: Option<DecoderConfig>,
61    #[serde(default)]
62    pub event_map: Vec<EventMapRule>,
63    #[serde(default, skip_serializing_if = "Option::is_none")]
64    pub candidate: Option<CandidateConfig>,
65    #[serde(default, skip_serializing_if = "Option::is_none")]
66    pub accumulator: Option<AccumulatorConfig>,
67}
68
69/// V2 parameter definition (Ring 2).
70#[derive(Debug, Clone, Serialize, Deserialize)]
71pub struct ParameterDef {
72    #[serde(rename = "type", default, skip_serializing_if = "Option::is_none")]
73    pub param_type: Option<String>,
74    #[serde(default, skip_serializing_if = "Option::is_none")]
75    pub range: Option<Vec<f64>>,
76    #[serde(default, skip_serializing_if = "Option::is_none")]
77    pub default: Option<serde_json::Value>,
78    #[serde(default, skip_serializing_if = "Option::is_none")]
79    pub min: Option<i64>,
80    #[serde(default, skip_serializing_if = "Option::is_none")]
81    pub max: Option<i64>,
82    #[serde(default, skip_serializing_if = "Option::is_none")]
83    pub alias: Option<String>,
84    #[serde(default, skip_serializing_if = "Option::is_none")]
85    pub required: Option<bool>,
86}
87
88// ─── Ring 2: MCP Integration ────────────────────────────────────────────────
89
90/// MCP integration configuration (Ring 2).
91#[derive(Debug, Clone, Default, Serialize, Deserialize)]
92pub struct McpConfig {
93    #[serde(default, skip_serializing_if = "Option::is_none")]
94    pub client: Option<McpClientConfig>,
95    #[serde(default, skip_serializing_if = "Option::is_none")]
96    pub server: Option<McpServerConfig>,
97}
98
99/// MCP client configuration.
100#[derive(Debug, Clone, Serialize, Deserialize)]
101pub struct McpClientConfig {
102    #[serde(default)]
103    pub supported: bool,
104    #[serde(default, skip_serializing_if = "Option::is_none")]
105    pub protocol_version: Option<String>,
106    #[serde(default)]
107    pub transports: Vec<String>,
108    #[serde(default)]
109    pub auth_methods: Vec<String>,
110    #[serde(default, skip_serializing_if = "Option::is_none")]
111    pub capabilities: Option<McpCapabilities>,
112    #[serde(default, skip_serializing_if = "Option::is_none")]
113    pub tool_filtering: Option<McpToolFiltering>,
114    #[serde(default)]
115    pub approval_modes: Vec<String>,
116    #[serde(default, skip_serializing_if = "Option::is_none")]
117    pub provider_mapping: Option<HashMap<String, serde_json::Value>>,
118}
119
120/// MCP server capabilities that can be consumed.
121#[derive(Debug, Clone, Default, Serialize, Deserialize)]
122pub struct McpCapabilities {
123    #[serde(default)]
124    pub tools: bool,
125    #[serde(default)]
126    pub resources: bool,
127    #[serde(default)]
128    pub prompts: bool,
129    #[serde(default)]
130    pub sampling: bool,
131    #[serde(default)]
132    pub elicitation: bool,
133}
134
135/// MCP tool filtering configuration.
136#[derive(Debug, Clone, Default, Serialize, Deserialize)]
137pub struct McpToolFiltering {
138    #[serde(default)]
139    pub allowed_tools: bool,
140    #[serde(default)]
141    pub denied_tools: bool,
142}
143
144/// MCP server mode configuration.
145#[derive(Debug, Clone, Serialize, Deserialize)]
146pub struct McpServerConfig {
147    #[serde(default)]
148    pub supported: bool,
149    #[serde(default)]
150    pub transports: Vec<String>,
151    #[serde(default)]
152    pub exposed_capabilities: Vec<String>,
153}
154
155// ─── Ring 2: Computer Use Abstraction ───────────────────────────────────────
156
157/// Computer Use configuration (Ring 2).
158#[derive(Debug, Clone, Default, Serialize, Deserialize)]
159pub struct ComputerUseConfig {
160    #[serde(default)]
161    pub supported: bool,
162    #[serde(default, skip_serializing_if = "Option::is_none")]
163    pub status: Option<String>,
164    #[serde(default, skip_serializing_if = "Option::is_none")]
165    pub implementation: Option<String>,
166    #[serde(default, skip_serializing_if = "Option::is_none")]
167    pub actions: Option<serde_json::Value>,
168    #[serde(default, skip_serializing_if = "Option::is_none")]
169    pub safety: Option<serde_json::Value>,
170    #[serde(default, skip_serializing_if = "Option::is_none")]
171    pub environment: Option<serde_json::Value>,
172    #[serde(default, skip_serializing_if = "Option::is_none")]
173    pub provider_mapping: Option<HashMap<String, serde_json::Value>>,
174}
175
176// ─── Ring 2: Extended Multimodal ────────────────────────────────────────────
177
178/// Extended multimodal configuration (Ring 2).
179#[derive(Debug, Clone, Default, Serialize, Deserialize)]
180pub struct MultimodalConfig {
181    #[serde(default, skip_serializing_if = "Option::is_none")]
182    pub input: Option<MultimodalInput>,
183    #[serde(default, skip_serializing_if = "Option::is_none")]
184    pub output: Option<MultimodalOutput>,
185    #[serde(default, skip_serializing_if = "Option::is_none")]
186    pub omni_mode: Option<OmniModeConfig>,
187}
188
189/// Multimodal input modalities.
190#[derive(Debug, Clone, Default, Serialize, Deserialize)]
191pub struct MultimodalInput {
192    #[serde(default, skip_serializing_if = "Option::is_none")]
193    pub vision: Option<VisionConfig>,
194    #[serde(default, skip_serializing_if = "Option::is_none")]
195    pub audio: Option<AudioInputConfig>,
196    #[serde(default, skip_serializing_if = "Option::is_none")]
197    pub video: Option<VideoInputConfig>,
198}
199
200/// Vision input configuration.
201#[derive(Debug, Clone, Default, Serialize, Deserialize)]
202pub struct VisionConfig {
203    #[serde(default)]
204    pub supported: bool,
205    #[serde(default)]
206    pub formats: Vec<String>,
207    #[serde(default)]
208    pub encoding_methods: Vec<String>,
209    #[serde(default)]
210    pub document_understanding: bool,
211    #[serde(default, skip_serializing_if = "Option::is_none")]
212    pub max_file_size: Option<String>,
213    #[serde(default, skip_serializing_if = "Option::is_none")]
214    pub max_resolution: Option<String>,
215}
216
217/// Audio input configuration.
218#[derive(Debug, Clone, Default, Serialize, Deserialize)]
219pub struct AudioInputConfig {
220    #[serde(default)]
221    pub supported: bool,
222    #[serde(default)]
223    pub formats: Vec<String>,
224    #[serde(default)]
225    pub real_time_streaming: bool,
226    #[serde(default)]
227    pub speech_recognition: bool,
228}
229
230/// Video input configuration.
231#[derive(Debug, Clone, Default, Serialize, Deserialize)]
232pub struct VideoInputConfig {
233    #[serde(default)]
234    pub supported: bool,
235    #[serde(default)]
236    pub formats: Vec<String>,
237    #[serde(default)]
238    pub temporal_reasoning: bool,
239    #[serde(default)]
240    pub audio_track: bool,
241}
242
243/// Multimodal output modalities.
244#[derive(Debug, Clone, Default, Serialize, Deserialize)]
245pub struct MultimodalOutput {
246    #[serde(default)]
247    pub text: bool,
248    #[serde(default, skip_serializing_if = "Option::is_none")]
249    pub audio: Option<AudioOutputConfig>,
250    #[serde(default, skip_serializing_if = "Option::is_none")]
251    pub image: Option<ImageOutputConfig>,
252}
253
254/// Audio output configuration.
255#[derive(Debug, Clone, Default, Serialize, Deserialize)]
256pub struct AudioOutputConfig {
257    #[serde(default)]
258    pub supported: bool,
259    #[serde(default)]
260    pub real_time_tts: bool,
261    #[serde(default)]
262    pub natural_voice: bool,
263    #[serde(default)]
264    pub voice_selection: bool,
265}
266
267/// Image generation output configuration.
268#[derive(Debug, Clone, Default, Serialize, Deserialize)]
269pub struct ImageOutputConfig {
270    #[serde(default)]
271    pub supported: bool,
272    #[serde(default)]
273    pub formats: Vec<String>,
274}
275
276/// Omni-mode configuration.
277#[derive(Debug, Clone, Default, Serialize, Deserialize)]
278pub struct OmniModeConfig {
279    #[serde(default)]
280    pub supported: bool,
281    #[serde(default)]
282    pub real_time_voice_chat: bool,
283    #[serde(default)]
284    pub streaming_multimodal: bool,
285}
286
287// ─── Root V2 Manifest ───────────────────────────────────────────────────────
288
289/// Complete V2 Provider Manifest — three-ring concentric circle structure.
290///
291/// Ring 1 fields are required. Ring 2 fields are conditional on capabilities.
292/// Ring 3 fields are optional advanced extensions.
293#[derive(Debug, Clone, Serialize, Deserialize)]
294pub struct ManifestV2 {
295    // ─── Ring 1: Core Skeleton (Required) ───
296    pub id: String,
297    pub protocol_version: String,
298    pub endpoint: EndpointV2,
299    #[serde(default, skip_serializing_if = "Option::is_none")]
300    pub error_classification: Option<ErrorClassification>,
301
302    // Provider metadata
303    #[serde(default, skip_serializing_if = "Option::is_none")]
304    pub name: Option<String>,
305    #[serde(default, skip_serializing_if = "Option::is_none")]
306    pub version: Option<String>,
307    #[serde(default, skip_serializing_if = "Option::is_none")]
308    pub status: Option<String>,
309    #[serde(default, skip_serializing_if = "Option::is_none")]
310    pub category: Option<String>,
311    #[serde(default, skip_serializing_if = "Option::is_none")]
312    pub official_url: Option<String>,
313
314    // ─── Ring 2: Capability Mapping (Conditional) ───
315    pub capabilities: CapabilitiesV2,
316    #[serde(default, skip_serializing_if = "Option::is_none")]
317    pub parameters: Option<HashMap<String, ParameterDef>>,
318    #[serde(default, skip_serializing_if = "Option::is_none")]
319    pub streaming: Option<StreamingV2>,
320    #[serde(default, skip_serializing_if = "Option::is_none")]
321    pub multimodal: Option<MultimodalConfig>,
322    #[serde(default, skip_serializing_if = "Option::is_none")]
323    pub computer_use: Option<ComputerUseConfig>,
324    #[serde(default, skip_serializing_if = "Option::is_none")]
325    pub mcp: Option<McpConfig>,
326
327    // ─── Ring 3: Advanced Extensions (Optional) ───
328    #[serde(default, skip_serializing_if = "Option::is_none")]
329    pub api_families: Option<Vec<String>>,
330    #[serde(default, skip_serializing_if = "Option::is_none")]
331    pub default_api_family: Option<String>,
332    #[serde(default, skip_serializing_if = "Option::is_none")]
333    pub endpoints: Option<HashMap<String, EndpointConfig>>,
334    #[serde(default, skip_serializing_if = "Option::is_none")]
335    pub services: Option<HashMap<String, ServiceConfig>>,
336    #[serde(default, skip_serializing_if = "Option::is_none")]
337    pub rate_limit_headers: Option<RateLimitHeaders>,
338    #[serde(default, skip_serializing_if = "Option::is_none")]
339    pub retry_policy: Option<RetryPolicy>,
340    #[serde(default, skip_serializing_if = "Option::is_none")]
341    pub termination: Option<TerminationConfig>,
342    #[serde(default, skip_serializing_if = "Option::is_none")]
343    pub metadata: Option<serde_json::Value>,
344
345    // Catch-all for forward compatibility
346    #[serde(flatten)]
347    pub extra: HashMap<String, serde_json::Value>,
348}
349
350impl ManifestV2 {
351    /// Check if the manifest declares support for a given capability.
352    pub fn has_capability(&self, cap: super::capabilities::Capability) -> bool {
353        self.capabilities.has_capability(cap)
354    }
355
356    /// Check if MCP client is supported.
357    pub fn mcp_client_supported(&self) -> bool {
358        self.mcp
359            .as_ref()
360            .and_then(|m| m.client.as_ref())
361            .map(|c| c.supported)
362            .unwrap_or(false)
363    }
364
365    /// Check if Computer Use is supported.
366    pub fn computer_use_supported(&self) -> bool {
367        self.computer_use
368            .as_ref()
369            .map(|cu| cu.supported)
370            .unwrap_or(false)
371    }
372
373    /// Get the base URL for API requests.
374    pub fn base_url(&self) -> &str {
375        &self.endpoint.base_url
376    }
377
378    /// Get the chat endpoint path.
379    pub fn chat_path(&self) -> &str {
380        self.endpoint.chat.as_deref().unwrap_or("/chat/completions")
381    }
382
383    /// Detect the API style from the manifest structure.
384    pub fn detect_api_style(&self) -> ApiStyle {
385        // Heuristic: check streaming decoder strategy or endpoint patterns
386        if let Some(streaming) = &self.streaming {
387            if let Some(decoder) = &streaming.decoder {
388                if let Some(strategy) = &decoder.strategy {
389                    if strategy.starts_with("anthropic") {
390                        return ApiStyle::AnthropicMessages;
391                    }
392                    if strategy.starts_with("gemini") {
393                        return ApiStyle::GeminiGenerate;
394                    }
395                }
396            }
397        }
398        // Check endpoint path for Gemini pattern
399        if self.chat_path().contains(":generateContent") {
400            return ApiStyle::GeminiGenerate;
401        }
402        if self.chat_path().contains("/messages") && !self.chat_path().contains("/chat/") {
403            return ApiStyle::AnthropicMessages;
404        }
405        ApiStyle::OpenAiCompatible
406    }
407
408    /// Determine the protocol version as a semver-like tuple.
409    pub fn protocol_semver(&self) -> (u32, u32) {
410        let parts: Vec<&str> = self.protocol_version.split('.').collect();
411        let major = parts.first().and_then(|s| s.parse().ok()).unwrap_or(1);
412        let minor = parts.get(1).and_then(|s| s.parse().ok()).unwrap_or(0);
413        (major, minor)
414    }
415
416    /// Check if this is a V2 manifest.
417    pub fn is_v2(&self) -> bool {
418        self.protocol_semver().0 >= 2
419    }
420}
421
422/// API style classification for ProviderDriver selection.
423#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
424pub enum ApiStyle {
425    /// OpenAI chat completions format (also used by DeepSeek, Moonshot, Zhipu, etc.)
426    OpenAiCompatible,
427    /// Anthropic messages format
428    AnthropicMessages,
429    /// Google Gemini generateContent format
430    GeminiGenerate,
431    /// Custom format requiring a dedicated driver
432    Custom,
433}
434
435impl std::fmt::Display for ApiStyle {
436    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
437        match self {
438            Self::OpenAiCompatible => write!(f, "openai_compatible"),
439            Self::AnthropicMessages => write!(f, "anthropic_messages"),
440            Self::GeminiGenerate => write!(f, "gemini_generate"),
441            Self::Custom => write!(f, "custom"),
442        }
443    }
444}
445
446#[cfg(test)]
447mod tests {
448    use super::*;
449
450    #[test]
451    fn test_parse_v2_manifest_from_yaml() {
452        let yaml = r#"
453id: openai
454protocol_version: "2.0"
455name: OpenAI
456status: stable
457endpoint:
458  base_url: https://api.openai.com/v1
459  chat: /chat/completions
460  auth:
461    type: bearer
462    header: Authorization
463    prefix: Bearer
464error_classification:
465  by_http_status:
466    "400": invalid_request
467    "429": rate_limited
468capabilities:
469  required: [text, streaming, tools]
470  optional: [vision, mcp_client, computer_use]
471  feature_flags:
472    structured_output: true
473    parallel_tool_calls: true
474mcp:
475  client:
476    supported: true
477    protocol_version: "2025-11-25"
478    transports: [streamable_http, sse]
479computer_use:
480  supported: true
481  status: preview
482  implementation: screen_based
483streaming:
484  decoder:
485    format: sse
486    strategy: openai_chat
487"#;
488        let manifest: ManifestV2 = serde_yaml::from_str(yaml).unwrap();
489        assert_eq!(manifest.id, "openai");
490        assert!(manifest.is_v2());
491        assert!(manifest.mcp_client_supported());
492        assert!(manifest.computer_use_supported());
493        assert_eq!(manifest.detect_api_style(), ApiStyle::OpenAiCompatible);
494        assert!(manifest.has_capability(super::super::capabilities::Capability::McpClient));
495    }
496
497    #[test]
498    fn test_detect_anthropic_style() {
499        let yaml = r#"
500id: anthropic
501protocol_version: "2.0"
502endpoint:
503  base_url: https://api.anthropic.com/v1
504  chat: /messages
505capabilities:
506  required: [text, streaming]
507streaming:
508  decoder:
509    format: anthropic_sse
510    strategy: anthropic_event_stream
511"#;
512        let manifest: ManifestV2 = serde_yaml::from_str(yaml).unwrap();
513        assert_eq!(manifest.detect_api_style(), ApiStyle::AnthropicMessages);
514    }
515
516    #[test]
517    fn test_detect_gemini_style() {
518        let yaml = r#"
519id: google
520protocol_version: "2.0"
521endpoint:
522  base_url: https://generativelanguage.googleapis.com/v1beta
523  chat: "/models/{model}:generateContent"
524capabilities:
525  required: [text, streaming]
526"#;
527        let manifest: ManifestV2 = serde_yaml::from_str(yaml).unwrap();
528        assert_eq!(manifest.detect_api_style(), ApiStyle::GeminiGenerate);
529    }
530}