Skip to main content

a2ui_base/validate/
integrity.rs

1//! Component integrity + recursion/path validation.
2//!
3//! Ports `integrity_checker.py`. Operates on raw `&serde_json::Value` (not
4//! `ComponentModel`) so it can run on incoming message payloads before they are
5//! parsed into the internal model.
6//!
7//! v0.9-flat ONLY. Python's v0.8 structured-component shape
8//! (`{"component": {"Type": {...}}}`) and its arbitrary-nested `componentId`
9//! recursion (`extract_pointers`) are intentionally omitted: `ComponentModel`
10//! already rejects the v0.8 shape, so the validator stays consistent with the
11//! existing parser. In v0.9-flat, a `componentId` only ever appears inside a
12//! `children` `Template` object.
13
14use std::sync::LazyLock;
15
16use regex::Regex;
17use serde_json::Value;
18
19use super::error::{ValidationError, ValidationReport};
20use super::ref_fields::RefFieldSpec;
21
22/// Maximum nesting depth of the whole JSON tree (mirrors Python
23/// `MAX_GLOBAL_DEPTH`). Guards against pathological payloads.
24pub const MAX_GLOBAL_DEPTH: u32 = 50;
25/// Maximum nesting depth of function-call (`{call, args}`) chains (mirrors
26/// Python `MAX_FUNC_CALL_DEPTH`).
27pub const MAX_FUNC_CALL_DEPTH: u32 = 5;
28
29/// Relaxed JSON-Pointer-ish path syntax, ported verbatim from Python
30/// `RELAXED_PATH_PATTERN`. Allows `/seg/seg` and bare `seg` forms, where each
31/// segment char is `[^~/]` or an escape `~0`/`~1`.
32///
33/// Uses `LazyLock` (stable since Rust 1.80). If the toolchain is older, swap
34/// for `OnceLock` + manual `get_or_init`.
35static RELAXED_PATH_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
36    Regex::new(r"^(?:(?:/(?:[^~/]|~[01])*)*|(?:[^~/]|~[01])+(?:/(?:[^~/]|~[01])*)*)$")
37        .expect("RELAXED_PATH_PATTERN is a compile-time-constant regex")
38});
39
40/// Extract every `(ref_id, field_name)` pair from a single v0.9-flat
41/// component object.
42///
43/// For `single_refs` keys: if the value is a string, yield `(string, key)`.
44/// For `list_refs` keys:
45/// - if the value is an array, yield each string item as `(item, "key[i]")`;
46/// - if the value is an object containing `componentId` (the v0.9 `Template`
47///   shape), yield `(obj["componentId"], "key.componentId")`.
48///
49/// Non-matching shapes are silently ignored (graceful).
50pub fn get_component_references(component: &Value, spec: &RefFieldSpec) -> Vec<(String, String)> {
51    let mut refs = Vec::new();
52    let Some(obj) = component.as_object() else {
53        return refs;
54    };
55
56    // single refs: child, activeTab, ...
57    for &key in spec.single_refs {
58        if let Some(s) = obj.get(key).and_then(|v| v.as_str()) {
59            refs.push((s.to_string(), key.to_string()));
60        }
61    }
62
63    // list refs: children (Static array | Template {componentId, path})
64    for &key in spec.list_refs {
65        let Some(val) = obj.get(key) else {
66            continue;
67        };
68        match val {
69            Value::Array(arr) => {
70                for (i, item) in arr.iter().enumerate() {
71                    if let Some(s) = item.as_str() {
72                        refs.push((s.to_string(), format!("{key}[{i}]")));
73                    }
74                }
75            }
76            Value::Object(o) => {
77                // Template shape: { "componentId": "<id>", "path": "..." }
78                if let Some(cid) = o.get("componentId").and_then(|v| v.as_str()) {
79                    refs.push((cid.to_string(), format!("{key}.componentId")));
80                }
81            }
82            _ => {}
83        }
84    }
85
86    refs
87}
88
89/// Validate component integrity: duplicate ids, missing root, dangling
90/// references. Collects ALL errors (does not short-circuit), returning a
91/// `ValidationReport`.
92///
93/// `allow_dangling_references` skips the dangling check entirely (incremental
94/// update refs may live elsewhere). `allow_missing_root` skips the missing-root
95/// check.
96pub fn validate_component_integrity(
97    components: &[Value],
98    spec: &RefFieldSpec,
99    root_id: &str,
100    allow_dangling_references: bool,
101    allow_missing_root: bool,
102) -> ValidationReport {
103    let mut report = ValidationReport::new();
104    let mut ids: std::collections::HashSet<String> = std::collections::HashSet::new();
105
106    // 1. Collect IDs, flag duplicates.
107    for comp in components {
108        let Some(comp_id) = comp.as_object().and_then(|o| o.get("id")).and_then(|v| v.as_str()) else {
109            continue;
110        };
111        if !ids.insert(comp_id.to_string()) {
112            report.push(ValidationError::duplicate_id(comp_id));
113        }
114    }
115
116    // Incremental update: referenced ids may already be on the client — skip
117    // both the root and dangling checks.
118    if allow_dangling_references {
119        return report;
120    }
121
122    // 2. Missing root.
123    if !allow_missing_root && !ids.contains(root_id) {
124        report.push(ValidationError::missing_root(root_id));
125    }
126
127    // 3. Dangling references.
128    for comp in components {
129        let comp_id = comp
130            .as_object()
131            .and_then(|o| o.get("id"))
132            .and_then(|v| v.as_str())
133            .unwrap_or("Unknown");
134        for (ref_id, field) in get_component_references(comp, spec) {
135            if !ids.contains(&ref_id) {
136                report.push(ValidationError::dangling(comp_id, &ref_id, &field));
137            }
138        }
139    }
140
141    report
142}
143
144/// Validate recursion depth and `path` syntax across an arbitrary JSON value.
145///
146/// - Global nesting depth > `MAX_GLOBAL_DEPTH` → `GlobalDepthExceeded`.
147/// - A `path` string that does not match `RELAXED_PATH_PATTERN` →
148///   `InvalidPathSyntax`.
149/// - A v0.9 function-call object (`{call, args}`) nested deeper than
150///   `MAX_FUNC_CALL_DEPTH` → `FuncCallDepthExceeded`.
151///
152/// Collects all errors found (does not stop at the first).
153pub fn validate_recursion_and_paths(data: &Value) -> ValidationReport {
154    let mut report = ValidationReport::new();
155    traverse(data, 0, 0, &mut report);
156    report
157}
158
159fn traverse(item: &Value, global_depth: u32, func_depth: u32, report: &mut ValidationReport) {
160    if global_depth > MAX_GLOBAL_DEPTH {
161        report.push(ValidationError::global_depth("<anon>"));
162        // Don't recurse further — we've already flagged this branch.
163        return;
164    }
165
166    match item {
167        Value::Array(arr) => {
168            for x in arr {
169                traverse(x, global_depth + 1, func_depth, report);
170            }
171        }
172        Value::Object(obj) => {
173            // path syntax check
174            if let Some(p) = obj.get("path").and_then(|v| v.as_str()) {
175                if !RELAXED_PATH_PATTERN.is_match(p) {
176                    report.push(ValidationError::invalid_path(p));
177                }
178            }
179
180            // v0.9 function-call shape: has both "call" and "args".
181            // (Python also handles a v0.8 "functionCall" object; v0.9-flat
182            // payloads don't use it, so it's omitted here.)
183            let is_func_v09 = obj.get("call").is_some() && obj.get("args").is_some();
184            if is_func_v09 {
185                if func_depth >= MAX_FUNC_CALL_DEPTH {
186                    report.push(ValidationError::func_depth());
187                    // Still recurse into args so deeper violations are caught,
188                    // but the limit has already been flagged for this node.
189                    for (k, v) in obj {
190                        if k == "args" {
191                            traverse(v, global_depth + 1, func_depth + 1, report);
192                        } else {
193                            traverse(v, global_depth + 1, func_depth, report);
194                        }
195                    }
196                    return;
197                }
198                for (k, v) in obj {
199                    if k == "args" {
200                        traverse(v, global_depth + 1, func_depth + 1, report);
201                    } else {
202                        traverse(v, global_depth + 1, func_depth, report);
203                    }
204                }
205            } else {
206                for v in obj.values() {
207                    traverse(v, global_depth + 1, func_depth, report);
208                }
209            }
210        }
211        _ => {}
212    }
213}
214
215#[cfg(test)]
216mod tests {
217    use super::*;
218    use crate::validate::error::ValidationErrorCode;
219    use crate::validate::ref_fields::RefFieldSpec;
220    use serde_json::json;
221
222    // Note: these tests use the v0.9-flat shape (id + component string at top
223    // level), matching how Rust payloads actually arrive.
224
225    fn spec() -> RefFieldSpec {
226        RefFieldSpec::DEFAULT
227    }
228
229    // -- get_component_references --
230
231    #[test]
232    fn refs_extract_child_string() {
233        let comp = json!({ "id": "c1", "component": "Box", "child": "child1" });
234        let refs = get_component_references(&comp, &spec());
235        assert!(refs.iter().any(|(r, _)| r == "child1"));
236    }
237
238    #[test]
239    fn refs_extract_children_array() {
240        let comp = json!({ "id": "c1", "component": "Column", "children": ["a", "b"] });
241        let refs = get_component_references(&comp, &spec());
242        let ids: Vec<&str> = refs.iter().map(|(r, _)| r.as_str()).collect();
243        assert!(ids.contains(&"a"));
244        assert!(ids.contains(&"b"));
245    }
246
247    #[test]
248    fn refs_extract_children_template_component_id() {
249        let comp = json!({
250            "id": "c1", "component": "Column",
251            "children": { "componentId": "card", "path": "/items" }
252        });
253        let refs = get_component_references(&comp, &spec());
254        assert!(refs.iter().any(|(r, f)| r == "card" && f == "children.componentId"));
255    }
256
257    #[test]
258    fn refs_extract_active_tab() {
259        let comp = json!({ "id": "c1", "component": "Tabs", "activeTab": "tab1" });
260        let refs = get_component_references(&comp, &spec());
261        assert!(refs.iter().any(|(r, _)| r == "tab1"));
262    }
263
264    // -- validate_component_integrity --
265
266    #[test]
267    fn integrity_valid_no_errors() {
268        let components = vec![
269            json!({ "id": "root", "component": "Column", "children": ["c1"] }),
270            json!({ "id": "c1", "component": "Text", "text": "hi" }),
271        ];
272        let r = validate_component_integrity(&components, &spec(), "root", false, false);
273        assert!(r.is_empty(), "expected no errors, got: {r}");
274    }
275
276    #[test]
277    fn integrity_duplicate_id() {
278        let components = vec![
279            json!({ "id": "c1", "component": "Box" }),
280            json!({ "id": "c1", "component": "Text" }),
281        ];
282        let r = validate_component_integrity(&components, &spec(), "root", false, true);
283        assert!(r.has_code(&ValidationErrorCode::DuplicateId));
284    }
285
286    #[test]
287    fn integrity_missing_root() {
288        let components = vec![json!({ "id": "c1", "component": "Box" })];
289        let r = validate_component_integrity(&components, &spec(), "root", false, false);
290        assert!(r.has_code(&ValidationErrorCode::MissingRoot));
291    }
292
293    #[test]
294    fn integrity_dangling_ref() {
295        let components =
296            vec![json!({ "id": "root", "component": "Box", "child": "nonexistent" })];
297        let r = validate_component_integrity(&components, &spec(), "root", false, false);
298        assert!(r.has_code(&ValidationErrorCode::DanglingReference));
299    }
300
301    // -- validate_recursion_and_paths --
302
303    #[test]
304    fn recursion_valid_path() {
305        let data = json!({ "path": "/valid/path", "nested": [{ "path": "/another" }] });
306        let r = validate_recursion_and_paths(&data);
307        assert!(r.is_empty(), "expected no errors, got: {r}");
308    }
309
310    #[test]
311    fn recursion_invalid_path_syntax() {
312        let data = json!({ "path": "invalid~path//double" });
313        let r = validate_recursion_and_paths(&data);
314        assert!(r.has_code(&ValidationErrorCode::InvalidPathSyntax));
315    }
316
317    #[test]
318    fn recursion_global_depth_exceeded() {
319        // 52-deep nested array.
320        let mut deep = json!(null);
321        for _ in 0..52 {
322            deep = json!([deep]);
323        }
324        let r = validate_recursion_and_paths(&deep);
325        assert!(r.has_code(&ValidationErrorCode::GlobalDepthExceeded));
326    }
327
328    #[test]
329    fn recursion_func_call_depth_exceeded() {
330        // 6-deep {call, args} chain.
331        let mut deep = json!({});
332        for _ in 0..6 {
333            deep = json!({ "call": "func", "args": deep });
334        }
335        let r = validate_recursion_and_paths(&deep);
336        assert!(r.has_code(&ValidationErrorCode::FuncCallDepthExceeded));
337    }
338
339    #[test]
340    fn relaxed_allows_dangling_and_missing_root() {
341        let components =
342            vec![json!({ "id": "root", "component": "Box", "child": "ghost" })];
343        // allow_dangling_references=true short-circuits before the root/dangling
344        // checks, so neither MissingRoot nor DanglingReference is reported.
345        let r = validate_component_integrity(&components, &spec(), "root", true, true);
346        assert!(r.is_empty(), "expected no errors under RELAXED, got: {r}");
347    }
348}