Skip to main content

hedl_core/
traverse.rs

1// Dweve HEDL - Hierarchical Entity Data Language
2//
3// Copyright (c) 2025 Dweve IP B.V. and individual contributors.
4//
5// SPDX-License-Identifier: Apache-2.0
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License in the LICENSE file at the
10// root of this repository or at: http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18//! Document traversal trait for format converters.
19//!
20//! This module provides a shared abstraction for traversing HEDL documents,
21//! reducing code duplication across format converters (JSON, YAML, XML, etc.).
22//!
23//! # Architecture
24//!
25//! The visitor pattern is used to separate traversal logic from conversion logic.
26//! Format converters implement the `DocumentVisitor` trait to handle each element
27//! type, while the `traverse` function handles the recursive structure.
28//!
29//! # Example
30//!
31//! ```text
32//! use hedl_core::traverse::{DocumentVisitor, traverse, VisitorContext};
33//!
34//! struct JsonEmitter { output: String }
35//!
36//! impl DocumentVisitor for JsonEmitter {
37//!     type Error = String;
38//!
39//!     fn visit_scalar(&mut self, key: &str, value: &Value, ctx: &VisitorContext) -> Result<(), Self::Error> {
40//!         // Emit JSON scalar
41//!         Ok(())
42//!     }
43//!     // ... other methods
44//! }
45//!
46//! let mut emitter = JsonEmitter::default();
47//! traverse(&doc, &mut emitter)?;
48//! ```
49
50use crate::{Document, Item, MatrixList, Node, Value};
51
52/// Context provided to visitors during traversal.
53#[derive(Debug, Clone)]
54pub struct VisitorContext<'a> {
55    /// Current nesting depth (0 = root level).
56    pub depth: usize,
57    /// Path from root to current element (key names).
58    pub path: Vec<&'a str>,
59    /// Reference to the document being traversed.
60    pub document: &'a Document,
61    /// Schema for the current list (if within a list context).
62    pub current_schema: Option<&'a [String]>,
63}
64
65impl<'a> VisitorContext<'a> {
66    /// Create a new context for the root level.
67    pub fn new(document: &'a Document) -> Self {
68        Self {
69            depth: 0,
70            path: Vec::new(),
71            document,
72            current_schema: None,
73        }
74    }
75
76    /// Create a child context with incremented depth.
77    pub fn child(&self, key: &'a str) -> Self {
78        let mut path = self.path.clone();
79        path.push(key);
80        Self {
81            depth: self.depth + 1,
82            path,
83            document: self.document,
84            current_schema: self.current_schema,
85        }
86    }
87
88    /// Create a child context with a list schema.
89    pub fn with_schema(&self, schema: &'a [String]) -> Self {
90        Self {
91            depth: self.depth,
92            path: self.path.clone(),
93            document: self.document,
94            current_schema: Some(schema),
95        }
96    }
97
98    /// Get the current path as a string (for error messages).
99    pub fn path_string(&self) -> String {
100        if self.path.is_empty() {
101            "root".to_string()
102        } else {
103            self.path.join(".")
104        }
105    }
106}
107
108/// Trait for visiting elements of a HEDL document.
109///
110/// Implement this trait to perform format conversion or analysis.
111/// All methods have default implementations that do nothing, allowing
112/// implementations to override only the methods they need.
113pub trait DocumentVisitor {
114    /// Error type returned by visitor methods.
115    type Error;
116
117    /// Called at the start of document traversal.
118    fn begin_document(
119        &mut self,
120        _doc: &Document,
121        _ctx: &VisitorContext<'_>,
122    ) -> Result<(), Self::Error> {
123        Ok(())
124    }
125
126    /// Called at the end of document traversal.
127    fn end_document(
128        &mut self,
129        _doc: &Document,
130        _ctx: &VisitorContext<'_>,
131    ) -> Result<(), Self::Error> {
132        Ok(())
133    }
134
135    /// Called when visiting a scalar value.
136    fn visit_scalar(
137        &mut self,
138        key: &str,
139        value: &Value,
140        ctx: &VisitorContext<'_>,
141    ) -> Result<(), Self::Error>;
142
143    /// Called at the start of an object (before visiting children).
144    fn begin_object(&mut self, _key: &str, _ctx: &VisitorContext<'_>) -> Result<(), Self::Error> {
145        Ok(())
146    }
147
148    /// Called at the end of an object (after visiting children).
149    fn end_object(&mut self, _key: &str, _ctx: &VisitorContext<'_>) -> Result<(), Self::Error> {
150        Ok(())
151    }
152
153    /// Called at the start of a matrix list (before visiting rows).
154    fn begin_list(
155        &mut self,
156        _key: &str,
157        _list: &MatrixList,
158        _ctx: &VisitorContext<'_>,
159    ) -> Result<(), Self::Error> {
160        Ok(())
161    }
162
163    /// Called at the end of a matrix list (after visiting rows).
164    fn end_list(
165        &mut self,
166        _key: &str,
167        _list: &MatrixList,
168        _ctx: &VisitorContext<'_>,
169    ) -> Result<(), Self::Error> {
170        Ok(())
171    }
172
173    /// Called when visiting a node (row) in a matrix list.
174    fn visit_node(
175        &mut self,
176        node: &Node,
177        schema: &[String],
178        ctx: &VisitorContext<'_>,
179    ) -> Result<(), Self::Error>;
180
181    /// Called at the start of a node's children (nested entities).
182    fn begin_node_children(
183        &mut self,
184        _node: &Node,
185        _ctx: &VisitorContext<'_>,
186    ) -> Result<(), Self::Error> {
187        Ok(())
188    }
189
190    /// Called at the end of a node's children.
191    fn end_node_children(
192        &mut self,
193        _node: &Node,
194        _ctx: &VisitorContext<'_>,
195    ) -> Result<(), Self::Error> {
196        Ok(())
197    }
198}
199
200/// Traverse a HEDL document, calling visitor methods for each element.
201///
202/// This function handles the recursive structure of documents, freeing
203/// format converters from duplicating traversal logic.
204pub fn traverse<V: DocumentVisitor>(doc: &Document, visitor: &mut V) -> Result<(), V::Error> {
205    let ctx = VisitorContext::new(doc);
206    visitor.begin_document(doc, &ctx)?;
207
208    for (key, item) in &doc.root {
209        traverse_item(key, item, visitor, &ctx)?;
210    }
211
212    visitor.end_document(doc, &ctx)?;
213    Ok(())
214}
215
216/// Traverse a single item recursively.
217fn traverse_item<V: DocumentVisitor>(
218    key: &str,
219    item: &Item,
220    visitor: &mut V,
221    ctx: &VisitorContext<'_>,
222) -> Result<(), V::Error> {
223    match item {
224        Item::Scalar(value) => {
225            visitor.visit_scalar(key, value, ctx)?;
226        }
227        Item::Object(map) => {
228            visitor.begin_object(key, ctx)?;
229            let child_ctx = ctx.child(key);
230            for (child_key, child_item) in map {
231                traverse_item(child_key, child_item, visitor, &child_ctx)?;
232            }
233            visitor.end_object(key, ctx)?;
234        }
235        Item::List(list) => {
236            visitor.begin_list(key, list, ctx)?;
237            let list_ctx = ctx.child(key).with_schema(&list.schema);
238            for node in &list.rows {
239                traverse_node(node, &list.schema, visitor, &list_ctx)?;
240            }
241            visitor.end_list(key, list, ctx)?;
242        }
243    }
244    Ok(())
245}
246
247/// Traverse a node and its children recursively.
248fn traverse_node<V: DocumentVisitor>(
249    node: &Node,
250    schema: &[String],
251    visitor: &mut V,
252    ctx: &VisitorContext<'_>,
253) -> Result<(), V::Error> {
254    visitor.visit_node(node, schema, ctx)?;
255
256    if let Some(children) = node.children() {
257        if !children.is_empty() {
258            visitor.begin_node_children(node, ctx)?;
259
260            let child_ctx = ctx.child(&node.id);
261            for (child_type, child_nodes) in children {
262                // Get schema for child type from document
263                let child_schema = ctx.document.structs.get(child_type);
264                let child_schema = child_schema.map(|s| s.as_slice()).unwrap_or(&[]);
265
266                for child in child_nodes {
267                    traverse_node(child, child_schema, visitor, &child_ctx)?;
268                }
269            }
270
271            visitor.end_node_children(node, ctx)?;
272        }
273    }
274
275    Ok(())
276}
277
278/// Statistics collector visitor for testing and analysis.
279#[derive(Debug, Default)]
280pub struct StatsCollector {
281    /// Number of scalars visited.
282    pub scalar_count: usize,
283    /// Number of objects visited.
284    pub object_count: usize,
285    /// Number of lists visited.
286    pub list_count: usize,
287    /// Number of nodes visited.
288    pub node_count: usize,
289    /// Maximum depth reached.
290    pub max_depth: usize,
291}
292
293impl DocumentVisitor for StatsCollector {
294    type Error = std::convert::Infallible;
295
296    fn visit_scalar(
297        &mut self,
298        _key: &str,
299        _value: &Value,
300        ctx: &VisitorContext<'_>,
301    ) -> Result<(), Self::Error> {
302        self.scalar_count += 1;
303        self.max_depth = self.max_depth.max(ctx.depth);
304        Ok(())
305    }
306
307    fn begin_object(&mut self, _key: &str, ctx: &VisitorContext<'_>) -> Result<(), Self::Error> {
308        self.object_count += 1;
309        self.max_depth = self.max_depth.max(ctx.depth);
310        Ok(())
311    }
312
313    fn begin_list(
314        &mut self,
315        _key: &str,
316        _list: &MatrixList,
317        ctx: &VisitorContext<'_>,
318    ) -> Result<(), Self::Error> {
319        self.list_count += 1;
320        self.max_depth = self.max_depth.max(ctx.depth);
321        Ok(())
322    }
323
324    fn visit_node(
325        &mut self,
326        _node: &Node,
327        _schema: &[String],
328        ctx: &VisitorContext<'_>,
329    ) -> Result<(), Self::Error> {
330        self.node_count += 1;
331        self.max_depth = self.max_depth.max(ctx.depth);
332        Ok(())
333    }
334}
335
336#[cfg(test)]
337mod tests {
338    use super::*;
339    use crate::{parse, Value};
340
341    #[test]
342    fn test_traverse_empty_document() {
343        let hedl = "%V:2.0\n%NULL:~\n%QUOTE:\"\n---\n";
344        let doc = parse(hedl.as_bytes()).unwrap();
345
346        let mut stats = StatsCollector::default();
347        traverse(&doc, &mut stats).unwrap();
348
349        assert_eq!(stats.scalar_count, 0);
350        assert_eq!(stats.object_count, 0);
351        assert_eq!(stats.list_count, 0);
352        assert_eq!(stats.node_count, 0);
353    }
354
355    #[test]
356    fn test_traverse_scalars() {
357        let hedl = "%V:2.0\n%NULL:~\n%QUOTE:\"\n---\nname: Test\ncount: 42\n";
358        let doc = parse(hedl.as_bytes()).unwrap();
359
360        let mut stats = StatsCollector::default();
361        traverse(&doc, &mut stats).unwrap();
362
363        assert_eq!(stats.scalar_count, 2);
364        assert_eq!(stats.max_depth, 0);
365    }
366
367    #[test]
368    fn test_traverse_nested_objects() {
369        let hedl = "%V:2.0\n%NULL:~\n%QUOTE:\"\n---\nouter:\n inner:\n  value: 42\n";
370        let doc = parse(hedl.as_bytes()).unwrap();
371
372        let mut stats = StatsCollector::default();
373        traverse(&doc, &mut stats).unwrap();
374
375        assert_eq!(stats.object_count, 2);
376        assert_eq!(stats.scalar_count, 1);
377        assert_eq!(stats.max_depth, 2);
378    }
379
380    #[test]
381    fn test_traverse_list() {
382        let hedl = "%V:2.0\n%NULL:~\n%QUOTE:\"\n%S:User:[id,name]\n---\nusers:@User\n |alice, Alice\n |bob, Bob\n";
383        let doc = parse(hedl.as_bytes()).unwrap();
384
385        let mut stats = StatsCollector::default();
386        traverse(&doc, &mut stats).unwrap();
387
388        assert_eq!(stats.list_count, 1);
389        assert_eq!(stats.node_count, 2);
390    }
391
392    #[test]
393    fn test_traverse_nested_nodes() {
394        let hedl = "%V:2.0\n%NULL:~\n%QUOTE:\"\n%S:User:[id]\n%S:Post:[id]\n%N:User>Post\n---\nusers:@User\n |alice\n  |post1\n  |post2\n";
395        let doc = parse(hedl.as_bytes()).unwrap();
396
397        let mut stats = StatsCollector::default();
398        traverse(&doc, &mut stats).unwrap();
399
400        assert_eq!(stats.list_count, 1);
401        assert_eq!(stats.node_count, 3); // 1 user + 2 posts
402    }
403
404    #[test]
405    fn test_visitor_context_path() {
406        let hedl = "%V:2.0\n%NULL:~\n%QUOTE:\"\n---\na:\n b:\n  c: 42\n";
407        let doc = parse(hedl.as_bytes()).unwrap();
408
409        struct PathCollector {
410            paths: Vec<String>,
411        }
412
413        impl DocumentVisitor for PathCollector {
414            type Error = std::convert::Infallible;
415
416            fn visit_scalar(
417                &mut self,
418                _key: &str,
419                _value: &Value,
420                ctx: &VisitorContext<'_>,
421            ) -> Result<(), Self::Error> {
422                self.paths.push(ctx.path_string());
423                Ok(())
424            }
425
426            fn begin_object(
427                &mut self,
428                _key: &str,
429                ctx: &VisitorContext<'_>,
430            ) -> Result<(), Self::Error> {
431                self.paths.push(ctx.path_string());
432                Ok(())
433            }
434
435            fn visit_node(
436                &mut self,
437                _node: &Node,
438                _schema: &[String],
439                ctx: &VisitorContext<'_>,
440            ) -> Result<(), Self::Error> {
441                self.paths.push(ctx.path_string());
442                Ok(())
443            }
444        }
445
446        let mut collector = PathCollector { paths: Vec::new() };
447        traverse(&doc, &mut collector).unwrap();
448
449        assert!(collector.paths.contains(&"root".to_string()));
450        assert!(collector.paths.contains(&"a".to_string()));
451        assert!(collector.paths.contains(&"a.b".to_string()));
452    }
453}