arcella_types/
manifest.rs

1// arcella/arcella-types/src/manifest/mod.rs
2//
3// Copyright (c) 2025 Arcella Team
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE>
6// or the MIT license <LICENSE-MIT>, at your option.
7// This file may not be copied, modified, or distributed
8// except according to those terms.
9
10use regex::Regex;
11use serde::{Deserialize, Deserializer, Serialize};
12use std::str::FromStr;
13use std::sync::OnceLock;
14
15use crate::{
16	ArcellaTypeError, 
17	ArcellaTypeResult,
18};
19
20use crate::module_id::*;
21
22use crate::interface_list::*;
23
24/// A portable, human-readable descriptor of a WebAssembly component.
25///
26/// The manifest captures **what a component is**, **what it provides**, and **what it needs** —
27/// independently of any specific runtime. It serves three key purposes:
28///
29/// 1. **Identity**: `id` (`name@version`) uniquely identifies the component.
30/// 2. **Contract**: `imports` and `exports` define its interface boundary (like a WIT package).
31/// 3. **Intent**: `capabilities` express environmental requirements (WASI, FS, network, etc.).
32///
33/// This structure supports **three input formats** during deserialization:
34/// - String: `"name@version"` (e.g., in deployment specs)
35/// - Flat object: `{ "name": "...", "version": "...", ... }` (e.g., in `component.toml`)
36/// - Nested object: `{ "id": { "name": "...", "version": "..." }, ... }` (e.g., in JSON state snapshots)
37#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
38pub struct ComponentManifest {
39    /// Canonical, validated identifier of the component: `name@version`.
40    pub id: ModuleId,
41
42    /// Optional short description for documentation or tooling.
43    #[serde(default, skip_serializing_if = "Option::is_none")]
44    pub description: Option<String>,
45
46    /// Interfaces this component **provides** to others.
47    ///
48    /// Each key must be a valid WIT-style interface name:
49    /// - With version: `"logger:log@1.0"`
50    /// - Without version: `"my:custom"`
51    ///
52    /// Values are structured interface specs (e.g., `ComponentInstance` trees).
53    /// When loaded from a simple config (e.g., TOML array), they default to `Unknown`.
54    #[serde(default)]
55    pub exports: InterfaceList,
56
57    /// Interfaces this component **requires** from its environment.
58    ///
59    /// Same format as `exports`. These must be satisfied at link time by:
60    /// - The runtime (e.g., `wasi:cli/stdio`),
61    /// - Other deployed components (e.g., `"auth:validator@1.0"`).
62    #[serde(default)]
63    pub imports: InterfaceList,
64
65    /// Runtime capabilities and resource requirements.
66    ///
67    /// Used by the Arcella executor to:
68    /// - Grant minimal required permissions,
69    /// - Enforce sandboxing,
70    /// - Allocate resources safely.
71    #[serde(default)]
72    pub capabilities: ComponentCapabilities,
73}
74
75// ======================================
76// Deserialize supporting THREE formats:
77// 1. String:       "name@version"
78// 2. Flat object:  { "name": "...", "version": "...", ... }
79// 3. Nested object:{ "id": { "name": "...", "version": "..." }, ... }
80// ======================================
81
82#[derive(Deserialize)]
83#[serde(untagged)]
84enum ComponentManifestDeserializeHelper {
85    // Format 1: just a string ID
86    StringId(String),
87
88    // Format 2: nested with explicit "id"
89    Nested {
90        id: ModuleId,
91        #[serde(default)]
92        description: Option<String>,
93        #[serde(default)]
94        exports: InterfaceList,
95        #[serde(default)]
96        imports: InterfaceList,
97        #[serde(default)]
98        capabilities: ComponentCapabilities,
99    },
100
101    // Format 3: flat with "name" and "version"
102    Flat {
103        name: String,
104        version: String,
105        #[serde(default)]
106        description: Option<String>,
107        #[serde(default)]
108        exports: InterfaceList,
109        #[serde(default)]
110        imports: InterfaceList,
111        #[serde(default)]
112        capabilities: ComponentCapabilities,
113    },
114}
115
116impl<'de> Deserialize<'de> for ComponentManifest {
117    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
118    where
119        D: Deserializer<'de>,
120    {
121        match ComponentManifestDeserializeHelper::deserialize(deserializer)? {
122            // Format 1: string → only id, rest default
123            ComponentManifestDeserializeHelper::StringId(s) => {
124                let id = ModuleId::from_str(&s).map_err(serde::de::Error::custom)?;
125                Ok(ComponentManifest {
126                    id,
127                    description: None,
128                    exports: InterfaceList::default(),
129                    imports: InterfaceList::default(),
130                    capabilities: ComponentCapabilities::default(),
131                })
132            }
133
134            // Format 2: nested object
135            ComponentManifestDeserializeHelper::Nested {
136                id,
137                description,
138                exports,
139                imports,
140                capabilities,
141            } => Ok(ComponentManifest {
142                id,
143                description,
144                exports,
145                imports,
146                capabilities,
147            }),
148
149            // Format 3: flat object (TOML-style)
150            ComponentManifestDeserializeHelper::Flat {
151                name,
152                version,
153                description,
154                exports,
155                imports,
156                capabilities,
157            } => {
158                let id = ModuleId::new(name, version).map_err(serde::de::Error::custom)?;
159                Ok(ComponentManifest {
160                    id,
161                    description,
162                    exports,
163                    imports,
164                    capabilities,
165                })
166            }
167        }
168    }
169}
170
171// ======================================
172// Validation and helpers
173// ======================================
174
175impl ComponentManifest {
176    /// Validates the semantic correctness of the entire manifest.
177    ///
178    /// Since `id` is a `ModuleId`, `name` and `version` are already valid.
179    /// This method only checks interface formats.
180    pub fn validate(&self) -> ArcellaTypeResult<()> {
181        for key in self.imports.keys() {
182            if !Self::validate_interface_format(key) {
183                return Err(ArcellaTypeError::Manifest(
184                    format!("Invalid import interface format: {}", key)
185                ));
186            }
187        }
188        for key in self.exports.keys() {
189            if !Self::validate_interface_format(key) {
190                return Err(ArcellaTypeError::Manifest(
191                    format!("Invalid export interface format: {}", key)
192                ));
193            }
194        }
195        Ok(())
196    }
197
198    /// Checks if a string matches the expected WIT interface reference format.
199    ///
200    /// Two forms are accepted:
201    /// - **With version**: `namespace:interface@version` (e.g., `wasi:http@0.2.0`)
202    /// - **Without version**: `namespace:interface` (e.g., `my:custom`)
203    ///
204    /// Interface part may contain `/` for nested paths (e.g., `wasi:cli/stdio`).
205    pub fn validate_interface_format(s: &str) -> bool {
206        static RE_WITH_VERSION: OnceLock<Regex> = OnceLock::new();
207        static RE_WITHOUT_VERSION: OnceLock<Regex> = OnceLock::new();
208
209        let re1 = RE_WITH_VERSION.get_or_init(|| {
210            Regex::new(r"^[a-zA-Z0-9_-]+:[a-zA-Z0-9_/-]+@[a-zA-Z0-9.+_-]+$").unwrap()
211        });
212        let re2 = RE_WITHOUT_VERSION.get_or_init(|| {
213            Regex::new(r"^[a-zA-Z0-9_-]+:[a-zA-Z0-9_/-]+$").unwrap()
214        });
215
216        re1.is_match(s) || re2.is_match(s)
217    }
218}
219
220// ======================================
221// Capabilities and Resources
222// ======================================
223
224/// Runtime capabilities and environmental requirements of a component.
225///
226/// This struct enables **least-privilege sandboxing**: the executor grants only what is declared.
227/// All fields are **opt-in** — an empty `ComponentCapabilities` means "no special needs".
228#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
229pub struct ComponentCapabilities {
230    /// Required WASI preview2 interfaces (e.g., `["wasi:cli/stdio", "wasi:random"]`).
231    #[serde(default)]
232    pub wasi: Vec<String>,
233
234    /// Filesystem paths the component needs to access (e.g., `["/logs", "/config"]`).
235    ///
236    /// Paths are virtualized; actual mapping is runtime-specific.
237    #[serde(default)]
238    pub filesystem: Vec<String>,
239
240    /// Network access patterns (e.g., `["tcp:localhost:8080", "udp:example.com:53"]`).
241    ///
242    /// Format is not yet standardized — currently treated as opaque strings.
243    #[serde(default)]
244    pub network: Vec<String>,
245
246    /// Required environment variables (e.g., `["DATABASE_URL", "DEBUG"]`).
247    #[serde(default)]
248    pub environment: Vec<String>,
249
250    /// CPU and memory resource limits.
251    #[serde(default)]
252    pub resources: ComponentResources,
253
254    /// Security and trusted execution requirements.
255    #[serde(default)]
256    pub security: ComponentSecurity,
257}
258
259/// Resource constraints for sandboxing and QoS.
260#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
261pub struct ComponentResources {
262    /// Maximum memory in bytes (e.g., `67108864` = 64 MiB).
263    ///
264    /// If `None`, the runtime applies a default or unlimited policy.
265    pub memory_max: Option<u64>,
266
267    /// Relative CPU weight (Linux CFS shares equivalent).
268    ///
269    /// Higher values get more CPU time during contention.
270    pub cpu_shares: Option<u32>,
271}
272
273/// Security and isolation requirements.
274#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
275pub struct ComponentSecurity {
276    /// Whether the component **requires** execution inside a TEE (e.g., SGX, TrustZone).
277    ///
278    /// If `true` and TEE is unavailable, deployment must fail.
279    pub requires_tee: bool,
280
281    /// Whitelist of allowed system calls (if runtime supports syscall filtering).
282    ///
283    /// Empty list = no restriction (or unsupported by runtime).
284    pub allowed_syscalls: Vec<String>,
285}
286
287// ======================================
288// Tests
289// ======================================
290
291#[cfg(test)]
292mod tests {
293    use super::*;
294    use serde_json;
295    use crate::spec::ComponentItemSpec;
296
297    #[test]
298    fn test_component_manifest_deserialize_three_formats() {
299        // Format 1: string
300        let s = r#""http-logger@0.1.0""#;
301        let m1: ComponentManifest = serde_json::from_str(s).unwrap();
302        assert_eq!(m1.id.to_string(), "http-logger@0.1.0");
303        assert!(m1.description.is_none());
304        assert!(m1.imports.is_empty());
305
306        // Format 2: nested object
307        let json_nested = r#"
308        {
309            "id": {
310                "name": "web-handler",
311                "version": "2.0.0"
312            },
313            "description": "Handles HTTP",
314            "imports": ["wasi:http@0.2.0"]
315        }
316        "#;
317        let m2: ComponentManifest = serde_json::from_str(json_nested).unwrap();
318        assert_eq!(m2.id.to_string(), "web-handler@2.0.0");
319        assert_eq!(m2.description, Some("Handles HTTP".to_string()));
320        assert!(m2.imports.contains_key("wasi:http@0.2.0"));
321
322        // Format 3: flat object (TOML-style)
323        let json_flat = r#"
324        {
325            "name": "auth-service",
326            "version": "1.5.0",
327            "exports": ["auth:verify@1.0"]
328        }
329        "#;
330        let m3: ComponentManifest = serde_json::from_str(json_flat).unwrap();
331        assert_eq!(m3.id.to_string(), "auth-service@1.5.0");
332        assert!(m3.exports.contains_key("auth:verify@1.0"));
333    }
334
335    #[test]
336    fn test_component_manifest_from_toml_style() {
337        let toml_input = r#"
338            name = "http-logger"
339            version = "0.1.0"
340            description = "Logs HTTP requests"
341            exports = ["logger:log@1.0"]
342            imports = ["wasi:http/incoming-handler@0.2.0"]
343        "#;
344
345        let manifest: ComponentManifest = toml::from_str(toml_input).unwrap();
346        assert_eq!(manifest.id.name, "http-logger");
347        assert_eq!(manifest.id.version, "0.1.0");
348        assert_eq!(manifest.id.to_string(), "http-logger@0.1.0");
349        assert!(manifest.validate().is_ok());
350    }
351
352    #[test]
353    fn test_invalid_name_rejected() {
354        let toml_input = r#"
355            name = "invalid name!"
356            version = "1.0.0"
357        "#;
358        let err = toml::from_str::<ComponentManifest>(toml_input).unwrap_err();
359        assert!(err.to_string().contains("Invalid module ID name"));
360    }
361
362    #[test]
363    fn test_invalid_interface_rejected() {
364        let mut manifest = ComponentManifest {
365            id: ModuleId::new("test".into(), "1.0.0".into()).unwrap(),
366            description: None,
367            exports: InterfaceList::default(),
368            imports: InterfaceList::default(),
369            capabilities: ComponentCapabilities::default(),
370        };
371        manifest.imports.insert("bad::interface".into(), ComponentItemSpec::Unknown { debug: None });
372        assert!(manifest.validate().is_err());
373    }
374
375    #[test]
376    fn test_json_roundtrip() {
377        let manifest = ComponentManifest {
378            id: ModuleId::new("test".into(), "1.0.0".into()).unwrap(),
379            description: Some("A test component".into()),
380            exports: {
381                let mut m = InterfaceList::default();
382                m.insert("logger:log@1.0".into(), ComponentItemSpec::Unknown { debug: None });
383                m
384            },
385            imports: {
386                let mut m = InterfaceList::default();
387                m.insert("wasi:http@0.2.0".into(), ComponentItemSpec::Unknown { debug: None });
388                m
389            },
390            capabilities: ComponentCapabilities::default(),
391        };
392
393        let json = serde_json::to_string_pretty(&manifest).unwrap();
394        eprintln!("JSON:\n{}", json);
395
396        let restored: ComponentManifest = serde_json::from_str(&json).unwrap();
397        assert_eq!(manifest, restored);
398    }
399}