Skip to main content

boundary_compiler/
profile.rs

1//! Boundary profile definitions.
2//!
3//! A BoundaryProfile defines the constraints and policies under which JCS operates:
4//! - **Dialect**: The JSON dialect (canonical, compact, etc.)
5//! - **SchemaId & SchemaVersion**: Schema identification for validation
6//! - **CanonicalizationProfile**: What transformations are applied
7//! - **UnknownFieldPolicy**: How to handle unknown fields during schema validation
8//! - **ResourceCeilings**: Limits on object count, string length, array length, depth
9
10use crate::error::JcsError;
11use serde::{Deserialize, Serialize};
12
13/// JSON dialect variants.
14#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
15#[serde(rename_all = "snake_case")]
16pub enum Dialect {
17    /// Canonical JSON (RFC 8785 JCS).
18    #[default]
19    Canonical,
20    /// Compact form (minified, no extra whitespace).
21    Compact,
22    /// Pretty-printed for human readability.
23    Pretty,
24}
25
26/// Canonicalization profile — what transformations are applied.
27#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
28#[serde(rename_all = "snake_case")]
29pub enum CanonicalizationProfile {
30    /// Strict RFC 8785 (only Unicode escapes, no sorting hints).
31    #[default]
32    Strict,
33    /// RFC 8785 + normalize field ordering hints.
34    Normalized,
35    /// Application-specific profile.
36    Custom,
37}
38
39/// Policy for unknown fields encountered during schema validation.
40#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
41#[serde(rename_all = "snake_case")]
42pub enum UnknownFieldPolicy {
43    /// Reject with error.
44    #[default]
45    Reject,
46    /// Strip unknown fields silently.
47    Strip,
48    /// Pass unknown fields through unchanged.
49    PassThrough,
50}
51
52/// Resource ceiling limits for a boundary profile.
53#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
54#[serde(default)]
55pub struct ResourceCeilings {
56    /// Maximum number of top-level keys (default: 128).
57    pub max_object_keys: usize,
58    /// Maximum total string length in bytes (default: 1MB).
59    pub max_string_bytes: usize,
60    /// Maximum array length (default: 1024).
61    pub max_array_len: usize,
62    /// Maximum nesting depth (default: 32).
63    pub max_depth: usize,
64    /// Maximum number of semantically-relevant float digits (default: 17 for f64 precision).
65    pub max_float_digits: usize,
66}
67
68impl Default for ResourceCeilings {
69    fn default() -> Self {
70        Self {
71            max_object_keys: 128,
72            max_string_bytes: 1 << 20, // 1 MiB
73            max_array_len: 1024,
74            max_depth: 32,
75            max_float_digits: 17,
76        }
77    }
78}
79
80/// A boundary profile with all constraints and policies.
81#[derive(Debug, Clone, Serialize, Deserialize)]
82#[serde(default)]
83pub struct BoundaryProfile {
84    /// JSON dialect for this profile.
85    pub dialect: Dialect,
86    /// Schema identifier (e.g., `"https://example.com/schema"`).
87    pub schema_id: Option<String>,
88    /// Schema version string (e.g., `"1.2.0"`).
89    pub schema_version: Option<String>,
90    /// Canonicalization profile.
91    pub canonicalization: CanonicalizationProfile,
92    /// Policy for unknown fields during schema validation.
93    pub unknown_field_policy: UnknownFieldPolicy,
94    /// Resource ceiling limits.
95    pub resource_ceilings: ResourceCeilings,
96}
97
98impl Default for BoundaryProfile {
99    fn default() -> Self {
100        Self {
101            dialect: Dialect::Canonical,
102            schema_id: None,
103            schema_version: None,
104            canonicalization: CanonicalizationProfile::Strict,
105            unknown_field_policy: UnknownFieldPolicy::Reject,
106            resource_ceilings: ResourceCeilings::default(),
107        }
108    }
109}
110
111impl BoundaryProfile {
112    /// Creates a default boundary profile with RFC 8785 canonicalization.
113    pub fn rfc8785() -> Self {
114        Self::default()
115    }
116
117    /// Creates a boundary profile with the given schema ID and version.
118    pub fn with_schema(mut self, id: impl Into<String>, version: impl Into<String>) -> Self {
119        self.schema_id = Some(id.into());
120        self.schema_version = Some(version.into());
121        self
122    }
123
124    /// Validates a parsed JSON value against resource ceilings.
125    pub fn check_resources(&self, value: &serde_json::Value) -> Result<(), JcsError> {
126        self.check_resources_inner(value, 0)
127    }
128
129    fn check_resources_inner(
130        &self,
131        value: &serde_json::Value,
132        depth: usize,
133    ) -> Result<(), JcsError> {
134        use crate::error::JcsError::ResourceCeilingExceeded;
135
136        if depth > self.resource_ceilings.max_depth {
137            return Err(ResourceCeilingExceeded {
138                resource: "depth".to_string(),
139                used: depth,
140                limit: self.resource_ceilings.max_depth,
141            });
142        }
143
144        match value {
145            serde_json::Value::Object(map) => {
146                if map.len() > self.resource_ceilings.max_object_keys {
147                    return Err(ResourceCeilingExceeded {
148                        resource: "object_keys".to_string(),
149                        used: map.len(),
150                        limit: self.resource_ceilings.max_object_keys,
151                    });
152                }
153                for (k, v) in map.iter() {
154                    if k.len() > self.resource_ceilings.max_string_bytes {
155                        return Err(ResourceCeilingExceeded {
156                            resource: "string_bytes".to_string(),
157                            used: k.len(),
158                            limit: self.resource_ceilings.max_string_bytes,
159                        });
160                    }
161                    self.check_resources_inner(v, depth + 1)?;
162                }
163            }
164            serde_json::Value::Array(arr) => {
165                if arr.len() > self.resource_ceilings.max_array_len {
166                    return Err(ResourceCeilingExceeded {
167                        resource: "array_len".to_string(),
168                        used: arr.len(),
169                        limit: self.resource_ceilings.max_array_len,
170                    });
171                }
172                for v in arr.iter() {
173                    self.check_resources_inner(v, depth + 1)?;
174                }
175            }
176            serde_json::Value::String(s) if s.len() > self.resource_ceilings.max_string_bytes => {
177                return Err(ResourceCeilingExceeded {
178                    resource: "string_bytes".to_string(),
179                    used: s.len(),
180                    limit: self.resource_ceilings.max_string_bytes,
181                });
182            }
183            _ => {}
184        }
185        Ok(())
186    }
187
188    /// Human-readable identifier for this profile.
189    pub fn identifier(&self) -> String {
190        match (&self.schema_id, &self.schema_version) {
191            (Some(id), Some(ver)) => format!("{id}:{ver}"),
192            (Some(id), None) => id.clone(),
193            _ => "default".to_string(),
194        }
195    }
196}
197
198#[cfg(test)]
199mod tests {
200    use super::*;
201    use serde_json::json;
202
203    #[test]
204    fn test_default_profile() {
205        let p = BoundaryProfile::default();
206        assert_eq!(p.dialect, Dialect::Canonical);
207        assert!(p.schema_id.is_none());
208        assert_eq!(p.canonicalization, CanonicalizationProfile::Strict);
209        assert_eq!(p.unknown_field_policy, UnknownFieldPolicy::Reject);
210    }
211
212    #[test]
213    fn test_profile_with_schema() {
214        let p = BoundaryProfile::default().with_schema("https://example.com/s", "1.0.0");
215        assert_eq!(p.schema_id.as_deref(), Some("https://example.com/s"));
216        assert_eq!(p.schema_version.as_deref(), Some("1.0.0"));
217    }
218
219    #[test]
220    fn test_check_resources_ok() {
221        let p = BoundaryProfile::default();
222        let val = json!({"a": "hello", "b": [1, 2, 3]});
223        p.check_resources(&val).unwrap();
224    }
225
226    #[test]
227    fn test_check_resources_depth_exceeded() {
228        let mut p = BoundaryProfile::default();
229        p.resource_ceilings.max_depth = 2;
230
231        // Depth 3 exceeds limit of 2
232        let val = json!({"a": {"b": {"c": 1}}});
233        let result = p.check_resources(&val);
234        assert!(matches!(
235            result,
236            Err(JcsError::ResourceCeilingExceeded { .. })
237        ));
238    }
239
240    #[test]
241    fn test_check_resources_object_keys_exceeded() {
242        let mut p = BoundaryProfile::default();
243        p.resource_ceilings.max_object_keys = 2;
244
245        let val = json!({"a": 1, "b": 2, "c": 3});
246        let result = p.check_resources(&val);
247        assert!(matches!(
248            result,
249            Err(JcsError::ResourceCeilingExceeded { .. })
250        ));
251    }
252
253    #[test]
254    fn test_check_resources_string_bytes_exceeded() {
255        let mut p = BoundaryProfile::default();
256        p.resource_ceilings.max_string_bytes = 5;
257
258        let val = json!({"toolong": "x"});
259        let result = p.check_resources(&val);
260        assert!(matches!(
261            result,
262            Err(JcsError::ResourceCeilingExceeded { .. })
263        ));
264    }
265}