Skip to main content

shape_runtime/stdlib/
xml.rs

1//! Native `xml` module for XML parsing and serialization.
2//!
3//! Exports: xml.parse(text), xml.stringify(value)
4//!
5//! XML nodes are represented as Shape TypedObjects with the `XmlNode`
6//! schema: `{ name: string, attributes: HashMap<string, string>,
7//!            children: Array<XmlNode>, text: string }`
8//!
9//! W17-out-of-bundle-A-followups (2026-05-12): children rewire per the
10//! C+ precedent recorded in `phase-2d-playbook.md` §3
11//! ("Bundle-A checkpoint-2 amendment"). Pre-rewire, each child was an
12//! `Arc<HeapValue::HashMap>` carried inside the deleted
13//! `TypedArrayData::HeapValue` arm. Post-rewire, each child is an
14//! `Arc<HeapValue::TypedObject>` with the registered `XmlNode` schema,
15//! and the outer children array lowers to `TypedArrayData::TypedObject`
16//! per ADR-006 §2.7.24 Q25.A's specialized list.
17//!
18//! User-visible API: `node.children[i].name` / `.attributes` / `.text`
19//! continue to work via TypedObject field access (same shape as the
20//! prior HashMap dispatch). The `text` field is now always present
21//! (empty string when absent); the prior optional-field shape was
22//! already flattened.
23//!
24//! Stage C HashMap-marshal P1(b) historical context (2026-05-07):
25//! - `xml.parse` returns the root element as `TypedReturn::OkObjectPairs`
26//!   per Cluster #4 β shape (mirrors `arrow.metadata` / http.rs precedents).
27//! - `xml.stringify` takes `value: HashMap<string, *>` typed input via
28//!   `Vec<(Arc<String>, Arc<HeapValue>)>` FromSlot from Step 1 P1(b)
29//!   infrastructure (commit `36519f6`). Walks the recursive HeapValue
30//!   tree using direct pattern matching — no marshal-boundary
31//!   re-entry per element. The reader now dispatches the `children`
32//!   field through `TypedArrayData::TypedObject` per the post-rewire
33//!   construction shape.
34//! - Attributes (`HashMap<string, string>`) carried via
35//!   `ConcreteReturn::HashMapStringString` on output and read directly
36//!   from `HeapValue::HashMap(d)` on input.
37//!
38//! Tests deleted along with the legacy ValueWord-based fixtures, mirroring
39//! the csv_module migration (commit `9f6b1d3`). New typed-marshal test
40//! harness arrives with the shape-vm cleanup workstream.
41
42use crate::marshal::{register_typed_fn_1, register_typed_fn_1_full};
43use crate::module_exports::{ModuleExports, ModuleParam};
44use crate::type_schema::register_predeclared_any_schema;
45use crate::typed_module_exports::{ConcreteReturn, ConcreteType, TypedReturn};
46use quick_xml::events::{BytesEnd, BytesStart, BytesText, Event};
47use quick_xml::{Reader, Writer};
48use shape_value::heap_value::{HashMapData, HeapValue, TypedObjectStorage};
49use shape_value::v2::typed_array::TypedArray;
50use shape_value::{HeapKind, NativeKind, ValueSlot};
51use std::io::Cursor;
52use std::sync::Arc;
53
54/// XmlNode schema field order: matches `into_typed_object_arc` field-pair
55/// order. The schema is auto-registered via
56/// `register_predeclared_any_schema` on first use so the field list is the
57/// single source of truth.
58const XML_NODE_FIELDS: &[&str] = &["name", "attributes", "children", "text"];
59
60/// Parsed XML element data: a recursive structure where each element has
61/// a name, attribute pairs, child elements, and optional text content.
62struct ElementData {
63    name: String,
64    attributes: Vec<(String, String)>,
65    children: Vec<ElementData>,
66    text: Option<String>,
67}
68
69impl ElementData {
70    /// Project this element into a `HeapValue::TypedObject(...)` with
71    /// the `XmlNode` schema (W17-out-of-bundle-A-followups, 2026-05-12).
72    /// Children are recursively projected through this method and form
73    /// a `TypedArrayData::TypedObject` array — no polymorphic
74    /// `Array<HashMap>` carrier. Per C+ precedent the schema is
75    /// auto-registered via `register_predeclared_any_schema`.
76    ///
77    /// Field order matches `XML_NODE_FIELDS` (name, attributes,
78    /// children, text). `text` is always present at the slot level
79    /// (empty string when the source XML had no text node) so the
80    /// schema is fixed-arity and the type is exhaustive — no Option
81    /// indirection at the storage layer.
82    fn into_typed_object_arc(self) -> Arc<HeapValue> {
83        // Wave 2 Round 3b C2-joint ckpt-4 (2026-05-14): build the XML
84        // attributes HashMap via the per-V mutation API on
85        // `HashMapData<*const StringObj>` (V = string). Each (k, v) pair
86        // becomes one fresh StringObj insert; the wrapper carries one
87        // refcount share per element. ADR-006 §2.7.24 Q25.B SUPERSEDED.
88        let mut attrs_data: HashMapData<*const shape_value::v2::string_obj::StringObj> =
89            HashMapData::new();
90        for (k, v) in &self.attributes {
91            let v_obj = shape_value::v2::string_obj::StringObj::new(v.as_str())
92                as *const shape_value::v2::string_obj::StringObj;
93            unsafe { attrs_data.insert(k.as_str(), v_obj) };
94        }
95        let attrs_data: shape_value::heap_value::HashMapKindedRef =
96            shape_value::heap_value::HashMapKindedRef::String(Arc::new(attrs_data));
97        // Recurse: each child becomes its own TypedObject. The child raw
98        // `*const TypedObjectStorage` pointers are packed into a
99        // `*mut TypedArray<*const TypedObjectStorage>` flat-struct carrier
100        // per V3-S5 ckpt-5-prime²c Migration shape (a) (the deleted
101        // `TypedArrayData::TypedObject` enum-arm shape). The
102        // `TypedObjectStorage` type impls `v2::heap_element::HeapElement`
103        // (`heap_value.rs:3971`), so per-element retain/release dispatches
104        // through `v2_retain` / `v2_release` on the on-header refcount.
105        //
106        // Each child `into_typed_object_arc()` returns an `Arc<HeapValue>`
107        // wrapping `HeapValue::TypedObject(TypedObjectPtr)` — we extract
108        // the inner raw pointer via `into_raw()` (transferring the
109        // wrapper's one refcount share to the raw pointer, which the
110        // `TypedArray` takes ownership of as an element).
111        let child_ptrs: Vec<*const TypedObjectStorage> = self
112            .children
113            .into_iter()
114            .map(|c| {
115                let child_hv = c.into_typed_object_arc();
116                // Extract inner TypedObjectPtr by cloning out and consuming.
117                let to_ptr = match &*child_hv {
118                    HeapValue::TypedObject(s) => s.clone(),
119                    _ => unreachable!(
120                        "into_typed_object_arc must return HeapValue::TypedObject"
121                    ),
122                };
123                to_ptr.into_raw()
124            })
125            .collect();
126        let children_arr: *mut TypedArray<*const TypedObjectStorage> =
127            TypedArray::<*const TypedObjectStorage>::from_slice(&child_ptrs);
128        // `from_slice` copies each `*const TypedObjectStorage` bit-for-bit
129        // (raw pointers are Copy). The refcount shares were transferred
130        // from the source `TypedObjectPtr` wrappers into raw pointers
131        // already; the source `Vec<*const _>` doesn't own any share, so
132        // ordinary Drop suffices for the source Vec's heap allocation.
133        // Element-share ownership now lives with the array.
134
135        let schema_id = ensure_xml_node_schema();
136        // Field-order: name(0), attributes(1), children(2), text(3).
137        // Heap mask: name(String), attributes(HashMap), children(TypedArray),
138        // text(String) — all 4 fields are heap-resident.
139        let name_arc = Arc::new(self.name);
140        let attrs_arc = Arc::new(attrs_data);
141        let text_arc = Arc::new(self.text.unwrap_or_default());
142
143        let slots: Box<[ValueSlot]> = Box::new([
144            ValueSlot::from_string_arc(name_arc),
145            ValueSlot::from_hashmap(attrs_arc),
146            // V3-S5 ckpt-5-prime²c (2026-05-15) Migration shape (a): the
147            // `ValueSlot::from_typed_array(Arc<TypedArrayData>)` constructor
148            // is deleted; per-element-kind constructors aren't landed yet
149            // (Round 2 follow-up). Store the raw `*mut TypedArray<T>`
150            // pointer directly via `ValueSlot::from_u64` — this is the
151            // canonical slot-bit shape for `NativeKind::Ptr(HeapKind::
152            // TypedArray)` per `docs/runtime-v2-spec.md`. The schema's
153            // field_kinds[2] = `Ptr(HeapKind::TypedArray)` controls
154            // drop dispatch at slot release time.
155            ValueSlot::from_u64(children_arr as u64),
156            ValueSlot::from_string_arc(text_arc),
157        ]);
158        let field_kinds: Arc<[NativeKind]> = Arc::from(
159            vec![
160                NativeKind::String,
161                NativeKind::Ptr(HeapKind::HashMap),
162                NativeKind::Ptr(HeapKind::TypedArray),
163                NativeKind::String,
164            ]
165            .into_boxed_slice(),
166        );
167        let heap_mask: u64 = 0b1111; // all 4 fields heap-resident
168        // Wave 2 Round 4 D4 ckpt-final-prime² (2026-05-14): variant signature
169        // flipped to `HeapValue::TypedObject(TypedObjectPtr)`. The
170        // `_new`-returned raw pointer (refcount=1) is wrapped in
171        // `TypedObjectPtr`, transferring the share to the wrapper.
172        let storage = TypedObjectStorage::_new(
173            schema_id as u64,
174            slots,
175            heap_mask,
176            field_kinds,
177        );
178        Arc::new(HeapValue::TypedObject(
179            shape_value::heap_value::TypedObjectPtr::new(storage),
180        ))
181    }
182
183    /// Project this element's TOP-LEVEL form as a `Vec<(String,
184    /// ConcreteReturn)>` pair-list, suitable for `TypedReturn::OkObjectPairs`.
185    /// Used only for the root element of `xml.parse`'s return value;
186    /// nested elements go through `into_typed_object_arc` instead.
187    fn into_root_pairs(self) -> Vec<(String, ConcreteReturn)> {
188        let attrs_pairs: Vec<(String, String)> = self.attributes;
189        // Each child is now an `Arc<HeapValue::TypedObject>`. The marshal
190        // boundary's `ConcreteReturn::ArrayHeapValue` consumer routes
191        // through `TypedArrayData::build_specialized_from_heap_arcs`,
192        // which already dispatches the `HeapValue::TypedObject` arm to
193        // `TypedArrayData::TypedObject` per ADR-006 §2.7.24 Q25.A. No
194        // out-of-territory follow-up: the rewire is structurally
195        // resolved by C+ precedent.
196        let children_arc: Vec<Arc<HeapValue>> = self
197            .children
198            .into_iter()
199            .map(ElementData::into_typed_object_arc)
200            .collect();
201
202        let mut pairs = vec![
203            ("name".to_string(), ConcreteReturn::String(self.name)),
204            (
205                "attributes".to_string(),
206                ConcreteReturn::HashMapStringString(attrs_pairs),
207            ),
208            (
209                "children".to_string(),
210                ConcreteReturn::ArrayHeapValue(children_arc),
211            ),
212        ];
213        // `text?` follows the regex.rs precedent: emit empty string when
214        // absent. Keeps the schema fixed at 4 fields when text is present
215        // and 3 fields when absent — variable-length pair list per the
216        // ObjectPairs contract.
217        if let Some(text) = self.text {
218            pairs.push(("text".to_string(), ConcreteReturn::String(text)));
219        }
220        pairs
221    }
222}
223
224/// Register the `XmlNode` predeclared schema (auto-registered on first
225/// use; subsequent calls return the cached SchemaId via the registry's
226/// own deduplication). Returns the raw `u32` schema id used by
227/// `TypedObjectStorage::schema_id`.
228fn ensure_xml_node_schema() -> u32 {
229    let owned: Vec<String> = XML_NODE_FIELDS.iter().map(|s| s.to_string()).collect();
230    register_predeclared_any_schema(&owned)
231}
232
233/// Parse an XML element recursively from a quick-xml reader.
234fn parse_element(
235    reader: &mut Reader<&[u8]>,
236    start: &BytesStart,
237) -> Result<ElementData, String> {
238    let name = std::str::from_utf8(start.name().as_ref())
239        .map_err(|e| format!("Invalid UTF-8 in element name: {}", e))?
240        .to_string();
241
242    let mut attributes = Vec::new();
243    for attr in start.attributes() {
244        let attr = attr.map_err(|e| format!("Invalid attribute: {}", e))?;
245        let key = std::str::from_utf8(attr.key.as_ref())
246            .map_err(|e| format!("Invalid UTF-8 in attribute key: {}", e))?
247            .to_string();
248        let value = attr
249            .unescape_value()
250            .map_err(|e| format!("Invalid attribute value: {}", e))?
251            .to_string();
252        attributes.push((key, value));
253    }
254
255    let mut children = Vec::new();
256    let mut text_parts = Vec::new();
257    let mut buf = Vec::new();
258
259    loop {
260        match reader.read_event_into(&mut buf) {
261            Ok(Event::Start(ref e)) => {
262                let child = parse_element(reader, e)?;
263                children.push(child);
264            }
265            Ok(Event::Empty(ref e)) => {
266                let child = parse_empty_element(e)?;
267                children.push(child);
268            }
269            Ok(Event::Text(ref e)) => {
270                let t = e
271                    .unescape()
272                    .map_err(|err| format!("Error unescaping text: {}", err))?
273                    .to_string();
274                let trimmed = t.trim().to_string();
275                if !trimmed.is_empty() {
276                    text_parts.push(trimmed);
277                }
278            }
279            Ok(Event::CData(ref e)) => {
280                let t = std::str::from_utf8(e.as_ref())
281                    .map_err(|err| format!("Invalid UTF-8 in CDATA: {}", err))?
282                    .to_string();
283                if !t.trim().is_empty() {
284                    text_parts.push(t);
285                }
286            }
287            Ok(Event::End(_)) => break,
288            Ok(Event::Eof) => {
289                return Err("Unexpected end of XML".to_string());
290            }
291            Ok(_) => {} // Skip comments, PI, etc.
292            Err(e) => return Err(format!("XML parse error: {}", e)),
293        }
294        buf.clear();
295    }
296
297    Ok(ElementData {
298        name,
299        attributes,
300        children,
301        text: if text_parts.is_empty() {
302            None
303        } else {
304            Some(text_parts.join(""))
305        },
306    })
307}
308
309/// Parse a self-closing XML element (e.g. `<br/>`).
310fn parse_empty_element(start: &BytesStart) -> Result<ElementData, String> {
311    let name = std::str::from_utf8(start.name().as_ref())
312        .map_err(|e| format!("Invalid UTF-8 in element name: {}", e))?
313        .to_string();
314
315    let mut attributes = Vec::new();
316    for attr in start.attributes() {
317        let attr = attr.map_err(|e| format!("Invalid attribute: {}", e))?;
318        let key = std::str::from_utf8(attr.key.as_ref())
319            .map_err(|e| format!("Invalid UTF-8 in attribute key: {}", e))?
320            .to_string();
321        let value = attr
322            .unescape_value()
323            .map_err(|e| format!("Invalid attribute value: {}", e))?
324            .to_string();
325        attributes.push((key, value));
326    }
327
328    Ok(ElementData {
329        name,
330        attributes,
331        children: Vec::new(),
332        text: None,
333    })
334}
335
336/// Walk a top-level node — represented as a `(keys, values)` pair-list
337/// from the marshal boundary — and emit the corresponding XML via the
338/// writer. The top-level input from `xml.stringify` is still keyed by
339/// field name (the `Vec<(Arc<String>, Arc<HeapValue>)>` FromSlot
340/// shape); children recurse through `write_typed_object_node` against
341/// `HeapValue::TypedObject` arms now that `into_typed_object_arc`
342/// produces TypedObject per child (W17-out-of-bundle-A-followups,
343/// 2026-05-12).
344fn write_node_pairs(
345    writer: &mut Writer<Cursor<Vec<u8>>>,
346    pairs: &[(Arc<String>, Arc<HeapValue>)],
347) -> Result<(), String> {
348    // V3-S5 ckpt-5-prime²c (2026-05-15) SURFACE: the top-level pair-list
349    // shape carries `Arc<HeapValue>` values, but the `HeapValue::TypedArray`
350    // outer arm is deleted (V3-S5 ckpt-5). The `children` field now arrives
351    // as a `*mut TypedArray<TypedObjectPtr>` raw pointer, which has no
352    // `HeapValue::*` wrapper — `Vec<(Arc<String>, Arc<HeapValue>)>` cannot
353    // express it. xml.stringify's top-level reader thus requires the Round 2
354    // `Vec<Arc<HeapValue>>` rewire follow-up to add a per-element-T marshal
355    // path (pairs with `from_typed_array_<T>` constructor wave at
356    // `crates/shape-value/src/slot.rs:142`).
357    let _ = (writer, pairs);
358    let _ = write_xml_element; // keep helper reachable
359    Err(
360        "xml.method stringify() -> V3-S5 ckpt-5-prime²c SURFACE — top-level \
361         pair-list reader needs Vec<Arc<HeapValue>> rewire for the deleted \
362         outer-array-arm. Round 2 follow-up (pairs with per-element-kind \
363         constructor wave). ADR-006 §2.7.24 Q25.A SUPERSEDED."
364            .to_string(),
365    )
366}
367
368/// Walk a child node — represented as an `Arc<TypedObjectStorage>` with
369/// the `XmlNode` schema. Reads each field via `field_index_in_schema`
370/// since the schema is auto-registered and field-order is locked to
371/// `XML_NODE_FIELDS`.
372///
373/// W17-out-of-bundle-A-followups (2026-05-12): replaces the previous
374/// `write_node_heap` HashMap-element reader. The construction side
375/// (`ElementData::into_typed_object_arc`) builds TypedObjects per
376/// child, so the array's elements arrive here as TypedObjects, not
377/// HashMaps.
378fn write_typed_object_node(
379    writer: &mut Writer<Cursor<Vec<u8>>>,
380    storage: &TypedObjectStorage,
381) -> Result<(), String> {
382    // Match field order from `XML_NODE_FIELDS`. The construction side
383    // writes slots in this exact order; the schema registration uses
384    // the same field list, so positional access is sound.
385    if storage.slots.len() != XML_NODE_FIELDS.len() {
386        return Err(format!(
387            "xml.stringify(): child TypedObject has {} slots, expected {}",
388            storage.slots.len(),
389            XML_NODE_FIELDS.len()
390        ));
391    }
392    let name_slot = &storage.slots[0];
393    let attrs_slot = &storage.slots[1];
394    let children_slot = &storage.slots[2];
395    let text_slot = &storage.slots[3];
396
397    // SAFETY for each slot: the construction-side contract in
398    // `ElementData::into_typed_object_arc` writes each slot as
399    // `ValueSlot::from_string_arc` / `from_hashmap` / `from_typed_array`
400    // — the bits are `Arc::into_raw::<T>` for the matching `T`. We
401    // bump the strong count, recover via `Arc::from_raw`, then drop the
402    // bumped share after extracting a clone of the payload (the
403    // storage's own share remains intact; this is the canonical
404    // 5-arm receiver-recovery shape from `3ac2f11`).
405    let name: String = unsafe {
406        let bits = name_slot.raw();
407        if bits == 0 {
408            return Err("xml.method stringify() -> TypedObject name slot is null".to_string());
409        }
410        let arc_ptr = bits as *const String;
411        Arc::increment_strong_count(arc_ptr);
412        let arc = Arc::from_raw(arc_ptr);
413        let owned = (*arc).clone();
414        // `arc` Drop here releases our bumped share; storage's share
415        // is untouched.
416        owned
417    };
418    let attrs_kref: Option<shape_value::heap_value::HashMapKindedRef> = unsafe {
419        let bits = attrs_slot.raw();
420        if bits == 0 {
421            None
422        } else {
423            // Wave 2 Round 3b C2-joint ckpt-2 (2026-05-14): the
424            // `ValueSlot::from_hashmap` shape stores
425            // `Arc::into_raw(Arc<HashMapKindedRef>)` per ADR-006
426            // §2.7.24 Q25.B SUPERSEDED. Bump and clone-out the
427            // kinded ref (single Arc share); the storage's share
428            // is untouched (`5-arm receiver-recovery` shape from
429            // `3ac2f11`).
430            let arc_ptr = bits as *const shape_value::heap_value::HashMapKindedRef;
431            Arc::increment_strong_count(arc_ptr);
432            let arc = Arc::from_raw(arc_ptr);
433            let cloned = (*arc).clone();
434            // `arc` Drop here releases our bumped outer Arc share.
435            Some(cloned)
436        }
437    };
438    // V3-S5 ckpt-5-prime²c (2026-05-15): the children slot now holds a
439    // raw `*mut TypedArray<*const TypedObjectStorage>` per Migration
440    // shape (a) — no outer Arc/wrapper. Element-kind enforcement is by
441    // the storage's `field_kinds[2] = Ptr(HeapKind::TypedArray)` +
442    // body-side element-`T` choice. `TypedObjectStorage` impls
443    // `v2::heap_element::HeapElement` (`heap_value.rs:3971`), so the
444    // element pointers carry on-header refcount shares.
445    let children_ptr: *const TypedArray<*const TypedObjectStorage> = {
446        let bits = children_slot.raw();
447        if bits == 0 {
448            std::ptr::null()
449        } else {
450            bits as usize as *const TypedArray<*const TypedObjectStorage>
451        }
452    };
453    let text: Option<String> = unsafe {
454        let bits = text_slot.raw();
455        if bits == 0 {
456            None
457        } else {
458            let arc_ptr = bits as *const String;
459            Arc::increment_strong_count(arc_ptr);
460            let arc = Arc::from_raw(arc_ptr);
461            let owned = (*arc).clone();
462            if owned.is_empty() {
463                None
464            } else {
465                Some(owned)
466            }
467        }
468    };
469
470    write_xml_element(
471        writer,
472        Some(name),
473        attrs_kref.as_ref(),
474        children_ptr,
475        text.as_deref(),
476    )
477}
478
479/// Shared element-writer body — emits the XML representation of a node
480/// given the four parsed XmlNode fields. Pulled out so the top-level
481/// `write_node_pairs` path and the recursive
482/// `write_typed_object_node` path share the same output discipline.
483///
484/// V3-S5 ckpt-5-prime²c (2026-05-15): `children` is the raw
485/// `*const TypedArray<TypedObjectPtr>` carrier per Migration shape (a).
486/// Null pointer means "no children".
487fn write_xml_element(
488    writer: &mut Writer<Cursor<Vec<u8>>>,
489    name: Option<String>,
490    attrs: Option<&shape_value::heap_value::HashMapKindedRef>,
491    children: *const TypedArray<*const TypedObjectStorage>,
492    text: Option<&str>,
493) -> Result<(), String> {
494    let name = name.ok_or_else(|| "xml.stringify(): node missing 'name' field".to_string())?;
495
496    let mut elem = BytesStart::new(name.clone());
497
498    if let Some(attrs) = attrs {
499        // Wave 2 Round 3b C2-joint ckpt-4 (2026-05-14): per-V walk —
500        // attributes are always `HashMap<string, string>` (V = String).
501        // Other V variants are a producer-side type error (xml.stringify
502        // declares the attribute slot type at the marshal boundary).
503        use shape_value::heap_value::HashMapKindedRef;
504        match attrs {
505            HashMapKindedRef::String(arc) => {
506                let n = arc.len();
507                for i in 0..n {
508                    let key: String = unsafe {
509                        let ptr = shape_value::v2::typed_array::TypedArray::get_unchecked(
510                            arc.keys, i as u32,
511                        );
512                        shape_value::v2::string_obj::StringObj::as_str(ptr).to_owned()
513                    };
514                    let val: String = unsafe {
515                        let v_ptr: *const shape_value::v2::string_obj::StringObj =
516                            *(*arc.values).data.add(i);
517                        shape_value::v2::string_obj::StringObj::as_str(v_ptr).to_owned()
518                    };
519                    elem.push_attribute((key.as_bytes(), val.as_bytes()));
520                }
521            }
522            other => {
523                return Err(format!(
524                    "xml.stringify(): attributes HashMap must be HashMap<string, string>, \
525                     got V={:?}",
526                    other.values_kind()
527                ));
528            }
529        }
530    }
531
532    // V3-S5 ckpt-5-prime²c (2026-05-15) Migration shape (a): children
533    // carrier is `*const TypedArray<TypedObjectPtr>`. Null means "no
534    // children". Element discriminator is the body-side `T` choice +
535    // schema's `field_kinds[2] = Ptr(HeapKind::TypedArray)`.
536    let child_count: u32 = if children.is_null() {
537        0
538    } else {
539        unsafe { TypedArray::<*const TypedObjectStorage>::len(children) }
540    };
541    let has_children = child_count > 0;
542    let has_text = text.is_some();
543
544    if !has_children && !has_text {
545        writer
546            .write_event(Event::Empty(elem))
547            .map_err(|e| format!("xml.stringify() write error: {}", e))?;
548    } else {
549        writer
550            .write_event(Event::Start(elem.clone()))
551            .map_err(|e| format!("xml.stringify() write error: {}", e))?;
552
553        if let Some(text) = text {
554            writer
555                .write_event(Event::Text(BytesText::new(text)))
556                .map_err(|e| format!("xml.stringify() write error: {}", e))?;
557        }
558
559        if has_children {
560            // SAFETY: `children` is non-null (checked above) and points to
561            // a live `TypedArray<*const TypedObjectStorage>` per the
562            // construction contract in `ElementData::into_typed_object_arc`.
563            let slice = unsafe {
564                TypedArray::<*const TypedObjectStorage>::as_slice(children)
565            };
566            for &child_ptr in slice.iter() {
567                // SAFETY: per the construction contract each element is a
568                // live `*const TypedObjectStorage` with refcount >= 1 owed
569                // to the array's element slot.
570                unsafe {
571                    write_typed_object_node(writer, &*child_ptr)?;
572                }
573            }
574        }
575
576        writer
577            .write_event(Event::End(BytesEnd::new(name)))
578            .map_err(|e| format!("xml.stringify() write error: {}", e))?;
579    }
580
581    Ok(())
582}
583
584/// Create the `xml` module with XML parsing and serialization functions.
585pub fn create_xml_module() -> ModuleExports {
586    let mut module = ModuleExports::new("std::core::xml");
587    module.description = "XML parsing and serialization".to_string();
588
589    // xml.parse(text: string) -> Result<HashMap>
590    register_typed_fn_1::<_, Arc<String>>(
591        &mut module,
592        "parse",
593        "Parse an XML string into a Shape HashMap node",
594        "text",
595        "string",
596        ConcreteType::Result(Box::new(ConcreteType::HashMap)),
597        |text, _ctx| {
598            let mut reader = Reader::from_str(text.as_str());
599            reader.config_mut().trim_text(true);
600            let mut buf = Vec::new();
601
602            loop {
603                match reader.read_event_into(&mut buf) {
604                    Ok(Event::Start(ref e)) => {
605                        let inner = parse_element(&mut reader, e)?;
606                        return Ok(TypedReturn::OkObjectPairs(inner.into_root_pairs()));
607                    }
608                    Ok(Event::Empty(ref e)) => {
609                        let inner = parse_empty_element(e)?;
610                        return Ok(TypedReturn::OkObjectPairs(inner.into_root_pairs()));
611                    }
612                    Ok(Event::Eof) => {
613                        return Err("xml.parse(): no root element found".to_string());
614                    }
615                    Ok(_) => {} // Skip declaration, comments, PI
616                    Err(e) => {
617                        return Err(format!("xml.parse() failed: {}", e));
618                    }
619                }
620                buf.clear();
621            }
622        },
623    );
624
625    // xml.stringify(value: HashMap<string, any>) -> Result<string>
626    register_typed_fn_1_full::<_, Vec<(Arc<String>, Arc<HeapValue>)>>(
627        &mut module,
628        "stringify",
629        "Serialize a Shape HashMap node to an XML string",
630        [ModuleParam {
631            name: "value".to_string(),
632            type_name: "HashMap<string, any>".to_string(),
633            required: true,
634            description:
635                "Node value to serialize (with name, attributes, children, text? fields)"
636                    .to_string(),
637            ..Default::default()
638        }],
639        ConcreteType::Result(Box::new(ConcreteType::String)),
640        |pairs: Vec<(Arc<String>, Arc<HeapValue>)>, _ctx| {
641            let mut writer = Writer::new(Cursor::new(Vec::new()));
642            write_node_pairs(&mut writer, &pairs)?;
643
644            let output = String::from_utf8(writer.into_inner().into_inner())
645                .map_err(|e| format!("xml.stringify(): invalid UTF-8 output: {}", e))?;
646
647            Ok(TypedReturn::Ok(ConcreteReturn::String(output)))
648        },
649    );
650
651    module
652}