shape_runtime/stdlib/xml.rs
1//! Native `xml` module for XML parsing and serialization.
2//!
3//! Exports: xml.parse(text), xml.stringify(value)
4//!
5//! XML nodes are represented as Shape TypedObjects with the `XmlNode`
6//! schema: `{ name: string, attributes: HashMap<string, string>,
7//! children: Array<XmlNode>, text: string }`
8//!
9//! W17-out-of-bundle-A-followups (2026-05-12): children rewire per the
10//! C+ precedent recorded in `phase-2d-playbook.md` §3
11//! ("Bundle-A checkpoint-2 amendment"). Pre-rewire, each child was an
12//! `Arc<HeapValue::HashMap>` carried inside the deleted
13//! `TypedArrayData::HeapValue` arm. Post-rewire, each child is an
14//! `Arc<HeapValue::TypedObject>` with the registered `XmlNode` schema,
15//! and the outer children array lowers to `TypedArrayData::TypedObject`
16//! per ADR-006 §2.7.24 Q25.A's specialized list.
17//!
18//! User-visible API: `node.children[i].name` / `.attributes` / `.text`
19//! continue to work via TypedObject field access (same shape as the
20//! prior HashMap dispatch). The `text` field is now always present
21//! (empty string when absent); the prior optional-field shape was
22//! already flattened.
23//!
24//! Stage C HashMap-marshal P1(b) historical context (2026-05-07):
25//! - `xml.parse` returns the root element as `TypedReturn::OkObjectPairs`
26//! per Cluster #4 β shape (mirrors `arrow.metadata` / http.rs precedents).
27//! - `xml.stringify` takes `value: HashMap<string, *>` typed input via
28//! `Vec<(Arc<String>, Arc<HeapValue>)>` FromSlot from Step 1 P1(b)
29//! infrastructure (commit `36519f6`). Walks the recursive HeapValue
30//! tree using direct pattern matching — no marshal-boundary
31//! re-entry per element. The reader now dispatches the `children`
32//! field through `TypedArrayData::TypedObject` per the post-rewire
33//! construction shape.
34//! - Attributes (`HashMap<string, string>`) carried via
35//! `ConcreteReturn::HashMapStringString` on output and read directly
36//! from `HeapValue::HashMap(d)` on input.
37//!
38//! Tests deleted along with the legacy ValueWord-based fixtures, mirroring
39//! the csv_module migration (commit `9f6b1d3`). New typed-marshal test
40//! harness arrives with the shape-vm cleanup workstream.
41
42use crate::marshal::{register_typed_fn_1, register_typed_fn_1_full};
43use crate::module_exports::{ModuleExports, ModuleParam};
44use crate::type_schema::register_predeclared_any_schema;
45use crate::typed_module_exports::{ConcreteReturn, ConcreteType, TypedReturn};
46use quick_xml::events::{BytesEnd, BytesStart, BytesText, Event};
47use quick_xml::{Reader, Writer};
48use shape_value::heap_value::{HashMapData, HeapValue, TypedObjectStorage};
49use shape_value::v2::typed_array::TypedArray;
50use shape_value::{HeapKind, NativeKind, ValueSlot};
51use std::io::Cursor;
52use std::sync::Arc;
53
54/// XmlNode schema field order: matches `into_typed_object_arc` field-pair
55/// order. The schema is auto-registered via
56/// `register_predeclared_any_schema` on first use so the field list is the
57/// single source of truth.
58const XML_NODE_FIELDS: &[&str] = &["name", "attributes", "children", "text"];
59
60/// Parsed XML element data: a recursive structure where each element has
61/// a name, attribute pairs, child elements, and optional text content.
62struct ElementData {
63 name: String,
64 attributes: Vec<(String, String)>,
65 children: Vec<ElementData>,
66 text: Option<String>,
67}
68
69impl ElementData {
70 /// Project this element into a `HeapValue::TypedObject(...)` with
71 /// the `XmlNode` schema (W17-out-of-bundle-A-followups, 2026-05-12).
72 /// Children are recursively projected through this method and form
73 /// a `TypedArrayData::TypedObject` array — no polymorphic
74 /// `Array<HashMap>` carrier. Per C+ precedent the schema is
75 /// auto-registered via `register_predeclared_any_schema`.
76 ///
77 /// Field order matches `XML_NODE_FIELDS` (name, attributes,
78 /// children, text). `text` is always present at the slot level
79 /// (empty string when the source XML had no text node) so the
80 /// schema is fixed-arity and the type is exhaustive — no Option
81 /// indirection at the storage layer.
82 fn into_typed_object_arc(self) -> Arc<HeapValue> {
83 // Wave 2 Round 3b C2-joint ckpt-4 (2026-05-14): build the XML
84 // attributes HashMap via the per-V mutation API on
85 // `HashMapData<*const StringObj>` (V = string). Each (k, v) pair
86 // becomes one fresh StringObj insert; the wrapper carries one
87 // refcount share per element. ADR-006 §2.7.24 Q25.B SUPERSEDED.
88 let mut attrs_data: HashMapData<*const shape_value::v2::string_obj::StringObj> =
89 HashMapData::new();
90 for (k, v) in &self.attributes {
91 let v_obj = shape_value::v2::string_obj::StringObj::new(v.as_str())
92 as *const shape_value::v2::string_obj::StringObj;
93 unsafe { attrs_data.insert(k.as_str(), v_obj) };
94 }
95 let attrs_data: shape_value::heap_value::HashMapKindedRef =
96 shape_value::heap_value::HashMapKindedRef::String(Arc::new(attrs_data));
97 // Recurse: each child becomes its own TypedObject. The child raw
98 // `*const TypedObjectStorage` pointers are packed into a
99 // `*mut TypedArray<*const TypedObjectStorage>` flat-struct carrier
100 // per V3-S5 ckpt-5-prime²c Migration shape (a) (the deleted
101 // `TypedArrayData::TypedObject` enum-arm shape). The
102 // `TypedObjectStorage` type impls `v2::heap_element::HeapElement`
103 // (`heap_value.rs:3971`), so per-element retain/release dispatches
104 // through `v2_retain` / `v2_release` on the on-header refcount.
105 //
106 // Each child `into_typed_object_arc()` returns an `Arc<HeapValue>`
107 // wrapping `HeapValue::TypedObject(TypedObjectPtr)` — we extract
108 // the inner raw pointer via `into_raw()` (transferring the
109 // wrapper's one refcount share to the raw pointer, which the
110 // `TypedArray` takes ownership of as an element).
111 let child_ptrs: Vec<*const TypedObjectStorage> = self
112 .children
113 .into_iter()
114 .map(|c| {
115 let child_hv = c.into_typed_object_arc();
116 // Extract inner TypedObjectPtr by cloning out and consuming.
117 let to_ptr = match &*child_hv {
118 HeapValue::TypedObject(s) => s.clone(),
119 _ => unreachable!(
120 "into_typed_object_arc must return HeapValue::TypedObject"
121 ),
122 };
123 to_ptr.into_raw()
124 })
125 .collect();
126 let children_arr: *mut TypedArray<*const TypedObjectStorage> =
127 TypedArray::<*const TypedObjectStorage>::from_slice(&child_ptrs);
128 // `from_slice` copies each `*const TypedObjectStorage` bit-for-bit
129 // (raw pointers are Copy). The refcount shares were transferred
130 // from the source `TypedObjectPtr` wrappers into raw pointers
131 // already; the source `Vec<*const _>` doesn't own any share, so
132 // ordinary Drop suffices for the source Vec's heap allocation.
133 // Element-share ownership now lives with the array.
134
135 let schema_id = ensure_xml_node_schema();
136 // Field-order: name(0), attributes(1), children(2), text(3).
137 // Heap mask: name(String), attributes(HashMap), children(TypedArray),
138 // text(String) — all 4 fields are heap-resident.
139 let name_arc = Arc::new(self.name);
140 let attrs_arc = Arc::new(attrs_data);
141 let text_arc = Arc::new(self.text.unwrap_or_default());
142
143 let slots: Box<[ValueSlot]> = Box::new([
144 ValueSlot::from_string_arc(name_arc),
145 ValueSlot::from_hashmap(attrs_arc),
146 // V3-S5 ckpt-5-prime²c (2026-05-15) Migration shape (a): the
147 // `ValueSlot::from_typed_array(Arc<TypedArrayData>)` constructor
148 // is deleted; per-element-kind constructors aren't landed yet
149 // (Round 2 follow-up). Store the raw `*mut TypedArray<T>`
150 // pointer directly via `ValueSlot::from_u64` — this is the
151 // canonical slot-bit shape for `NativeKind::Ptr(HeapKind::
152 // TypedArray)` per `docs/runtime-v2-spec.md`. The schema's
153 // field_kinds[2] = `Ptr(HeapKind::TypedArray)` controls
154 // drop dispatch at slot release time.
155 ValueSlot::from_u64(children_arr as u64),
156 ValueSlot::from_string_arc(text_arc),
157 ]);
158 let field_kinds: Arc<[NativeKind]> = Arc::from(
159 vec![
160 NativeKind::String,
161 NativeKind::Ptr(HeapKind::HashMap),
162 NativeKind::Ptr(HeapKind::TypedArray),
163 NativeKind::String,
164 ]
165 .into_boxed_slice(),
166 );
167 let heap_mask: u64 = 0b1111; // all 4 fields heap-resident
168 // Wave 2 Round 4 D4 ckpt-final-prime² (2026-05-14): variant signature
169 // flipped to `HeapValue::TypedObject(TypedObjectPtr)`. The
170 // `_new`-returned raw pointer (refcount=1) is wrapped in
171 // `TypedObjectPtr`, transferring the share to the wrapper.
172 let storage = TypedObjectStorage::_new(
173 schema_id as u64,
174 slots,
175 heap_mask,
176 field_kinds,
177 );
178 Arc::new(HeapValue::TypedObject(
179 shape_value::heap_value::TypedObjectPtr::new(storage),
180 ))
181 }
182
183 /// Project this element's TOP-LEVEL form as a `Vec<(String,
184 /// ConcreteReturn)>` pair-list, suitable for `TypedReturn::OkObjectPairs`.
185 /// Used only for the root element of `xml.parse`'s return value;
186 /// nested elements go through `into_typed_object_arc` instead.
187 fn into_root_pairs(self) -> Vec<(String, ConcreteReturn)> {
188 let attrs_pairs: Vec<(String, String)> = self.attributes;
189 // Each child is now an `Arc<HeapValue::TypedObject>`. The marshal
190 // boundary's `ConcreteReturn::ArrayHeapValue` consumer routes
191 // through `TypedArrayData::build_specialized_from_heap_arcs`,
192 // which already dispatches the `HeapValue::TypedObject` arm to
193 // `TypedArrayData::TypedObject` per ADR-006 §2.7.24 Q25.A. No
194 // out-of-territory follow-up: the rewire is structurally
195 // resolved by C+ precedent.
196 let children_arc: Vec<Arc<HeapValue>> = self
197 .children
198 .into_iter()
199 .map(ElementData::into_typed_object_arc)
200 .collect();
201
202 let mut pairs = vec![
203 ("name".to_string(), ConcreteReturn::String(self.name)),
204 (
205 "attributes".to_string(),
206 ConcreteReturn::HashMapStringString(attrs_pairs),
207 ),
208 (
209 "children".to_string(),
210 ConcreteReturn::ArrayHeapValue(children_arc),
211 ),
212 ];
213 // `text?` follows the regex.rs precedent: emit empty string when
214 // absent. Keeps the schema fixed at 4 fields when text is present
215 // and 3 fields when absent — variable-length pair list per the
216 // ObjectPairs contract.
217 if let Some(text) = self.text {
218 pairs.push(("text".to_string(), ConcreteReturn::String(text)));
219 }
220 pairs
221 }
222}
223
224/// Register the `XmlNode` predeclared schema (auto-registered on first
225/// use; subsequent calls return the cached SchemaId via the registry's
226/// own deduplication). Returns the raw `u32` schema id used by
227/// `TypedObjectStorage::schema_id`.
228fn ensure_xml_node_schema() -> u32 {
229 let owned: Vec<String> = XML_NODE_FIELDS.iter().map(|s| s.to_string()).collect();
230 register_predeclared_any_schema(&owned)
231}
232
233/// Parse an XML element recursively from a quick-xml reader.
234fn parse_element(
235 reader: &mut Reader<&[u8]>,
236 start: &BytesStart,
237) -> Result<ElementData, String> {
238 let name = std::str::from_utf8(start.name().as_ref())
239 .map_err(|e| format!("Invalid UTF-8 in element name: {}", e))?
240 .to_string();
241
242 let mut attributes = Vec::new();
243 for attr in start.attributes() {
244 let attr = attr.map_err(|e| format!("Invalid attribute: {}", e))?;
245 let key = std::str::from_utf8(attr.key.as_ref())
246 .map_err(|e| format!("Invalid UTF-8 in attribute key: {}", e))?
247 .to_string();
248 let value = attr
249 .unescape_value()
250 .map_err(|e| format!("Invalid attribute value: {}", e))?
251 .to_string();
252 attributes.push((key, value));
253 }
254
255 let mut children = Vec::new();
256 let mut text_parts = Vec::new();
257 let mut buf = Vec::new();
258
259 loop {
260 match reader.read_event_into(&mut buf) {
261 Ok(Event::Start(ref e)) => {
262 let child = parse_element(reader, e)?;
263 children.push(child);
264 }
265 Ok(Event::Empty(ref e)) => {
266 let child = parse_empty_element(e)?;
267 children.push(child);
268 }
269 Ok(Event::Text(ref e)) => {
270 let t = e
271 .unescape()
272 .map_err(|err| format!("Error unescaping text: {}", err))?
273 .to_string();
274 let trimmed = t.trim().to_string();
275 if !trimmed.is_empty() {
276 text_parts.push(trimmed);
277 }
278 }
279 Ok(Event::CData(ref e)) => {
280 let t = std::str::from_utf8(e.as_ref())
281 .map_err(|err| format!("Invalid UTF-8 in CDATA: {}", err))?
282 .to_string();
283 if !t.trim().is_empty() {
284 text_parts.push(t);
285 }
286 }
287 Ok(Event::End(_)) => break,
288 Ok(Event::Eof) => {
289 return Err("Unexpected end of XML".to_string());
290 }
291 Ok(_) => {} // Skip comments, PI, etc.
292 Err(e) => return Err(format!("XML parse error: {}", e)),
293 }
294 buf.clear();
295 }
296
297 Ok(ElementData {
298 name,
299 attributes,
300 children,
301 text: if text_parts.is_empty() {
302 None
303 } else {
304 Some(text_parts.join(""))
305 },
306 })
307}
308
309/// Parse a self-closing XML element (e.g. `<br/>`).
310fn parse_empty_element(start: &BytesStart) -> Result<ElementData, String> {
311 let name = std::str::from_utf8(start.name().as_ref())
312 .map_err(|e| format!("Invalid UTF-8 in element name: {}", e))?
313 .to_string();
314
315 let mut attributes = Vec::new();
316 for attr in start.attributes() {
317 let attr = attr.map_err(|e| format!("Invalid attribute: {}", e))?;
318 let key = std::str::from_utf8(attr.key.as_ref())
319 .map_err(|e| format!("Invalid UTF-8 in attribute key: {}", e))?
320 .to_string();
321 let value = attr
322 .unescape_value()
323 .map_err(|e| format!("Invalid attribute value: {}", e))?
324 .to_string();
325 attributes.push((key, value));
326 }
327
328 Ok(ElementData {
329 name,
330 attributes,
331 children: Vec::new(),
332 text: None,
333 })
334}
335
336/// Walk a top-level node — represented as a `(keys, values)` pair-list
337/// from the marshal boundary — and emit the corresponding XML via the
338/// writer. The top-level input from `xml.stringify` is still keyed by
339/// field name (the `Vec<(Arc<String>, Arc<HeapValue>)>` FromSlot
340/// shape); children recurse through `write_typed_object_node` against
341/// `HeapValue::TypedObject` arms now that `into_typed_object_arc`
342/// produces TypedObject per child (W17-out-of-bundle-A-followups,
343/// 2026-05-12).
344fn write_node_pairs(
345 writer: &mut Writer<Cursor<Vec<u8>>>,
346 pairs: &[(Arc<String>, Arc<HeapValue>)],
347) -> Result<(), String> {
348 // V3-S5 ckpt-5-prime²c (2026-05-15) SURFACE: the top-level pair-list
349 // shape carries `Arc<HeapValue>` values, but the `HeapValue::TypedArray`
350 // outer arm is deleted (V3-S5 ckpt-5). The `children` field now arrives
351 // as a `*mut TypedArray<TypedObjectPtr>` raw pointer, which has no
352 // `HeapValue::*` wrapper — `Vec<(Arc<String>, Arc<HeapValue>)>` cannot
353 // express it. xml.stringify's top-level reader thus requires the Round 2
354 // `Vec<Arc<HeapValue>>` rewire follow-up to add a per-element-T marshal
355 // path (pairs with `from_typed_array_<T>` constructor wave at
356 // `crates/shape-value/src/slot.rs:142`).
357 let _ = (writer, pairs);
358 let _ = write_xml_element; // keep helper reachable
359 Err(
360 "xml.method stringify() -> V3-S5 ckpt-5-prime²c SURFACE — top-level \
361 pair-list reader needs Vec<Arc<HeapValue>> rewire for the deleted \
362 outer-array-arm. Round 2 follow-up (pairs with per-element-kind \
363 constructor wave). ADR-006 §2.7.24 Q25.A SUPERSEDED."
364 .to_string(),
365 )
366}
367
368/// Walk a child node — represented as an `Arc<TypedObjectStorage>` with
369/// the `XmlNode` schema. Reads each field via `field_index_in_schema`
370/// since the schema is auto-registered and field-order is locked to
371/// `XML_NODE_FIELDS`.
372///
373/// W17-out-of-bundle-A-followups (2026-05-12): replaces the previous
374/// `write_node_heap` HashMap-element reader. The construction side
375/// (`ElementData::into_typed_object_arc`) builds TypedObjects per
376/// child, so the array's elements arrive here as TypedObjects, not
377/// HashMaps.
378fn write_typed_object_node(
379 writer: &mut Writer<Cursor<Vec<u8>>>,
380 storage: &TypedObjectStorage,
381) -> Result<(), String> {
382 // Match field order from `XML_NODE_FIELDS`. The construction side
383 // writes slots in this exact order; the schema registration uses
384 // the same field list, so positional access is sound.
385 if storage.slots.len() != XML_NODE_FIELDS.len() {
386 return Err(format!(
387 "xml.stringify(): child TypedObject has {} slots, expected {}",
388 storage.slots.len(),
389 XML_NODE_FIELDS.len()
390 ));
391 }
392 let name_slot = &storage.slots[0];
393 let attrs_slot = &storage.slots[1];
394 let children_slot = &storage.slots[2];
395 let text_slot = &storage.slots[3];
396
397 // SAFETY for each slot: the construction-side contract in
398 // `ElementData::into_typed_object_arc` writes each slot as
399 // `ValueSlot::from_string_arc` / `from_hashmap` / `from_typed_array`
400 // — the bits are `Arc::into_raw::<T>` for the matching `T`. We
401 // bump the strong count, recover via `Arc::from_raw`, then drop the
402 // bumped share after extracting a clone of the payload (the
403 // storage's own share remains intact; this is the canonical
404 // 5-arm receiver-recovery shape from `3ac2f11`).
405 let name: String = unsafe {
406 let bits = name_slot.raw();
407 if bits == 0 {
408 return Err("xml.method stringify() -> TypedObject name slot is null".to_string());
409 }
410 let arc_ptr = bits as *const String;
411 Arc::increment_strong_count(arc_ptr);
412 let arc = Arc::from_raw(arc_ptr);
413 let owned = (*arc).clone();
414 // `arc` Drop here releases our bumped share; storage's share
415 // is untouched.
416 owned
417 };
418 let attrs_kref: Option<shape_value::heap_value::HashMapKindedRef> = unsafe {
419 let bits = attrs_slot.raw();
420 if bits == 0 {
421 None
422 } else {
423 // Wave 2 Round 3b C2-joint ckpt-2 (2026-05-14): the
424 // `ValueSlot::from_hashmap` shape stores
425 // `Arc::into_raw(Arc<HashMapKindedRef>)` per ADR-006
426 // §2.7.24 Q25.B SUPERSEDED. Bump and clone-out the
427 // kinded ref (single Arc share); the storage's share
428 // is untouched (`5-arm receiver-recovery` shape from
429 // `3ac2f11`).
430 let arc_ptr = bits as *const shape_value::heap_value::HashMapKindedRef;
431 Arc::increment_strong_count(arc_ptr);
432 let arc = Arc::from_raw(arc_ptr);
433 let cloned = (*arc).clone();
434 // `arc` Drop here releases our bumped outer Arc share.
435 Some(cloned)
436 }
437 };
438 // V3-S5 ckpt-5-prime²c (2026-05-15): the children slot now holds a
439 // raw `*mut TypedArray<*const TypedObjectStorage>` per Migration
440 // shape (a) — no outer Arc/wrapper. Element-kind enforcement is by
441 // the storage's `field_kinds[2] = Ptr(HeapKind::TypedArray)` +
442 // body-side element-`T` choice. `TypedObjectStorage` impls
443 // `v2::heap_element::HeapElement` (`heap_value.rs:3971`), so the
444 // element pointers carry on-header refcount shares.
445 let children_ptr: *const TypedArray<*const TypedObjectStorage> = {
446 let bits = children_slot.raw();
447 if bits == 0 {
448 std::ptr::null()
449 } else {
450 bits as usize as *const TypedArray<*const TypedObjectStorage>
451 }
452 };
453 let text: Option<String> = unsafe {
454 let bits = text_slot.raw();
455 if bits == 0 {
456 None
457 } else {
458 let arc_ptr = bits as *const String;
459 Arc::increment_strong_count(arc_ptr);
460 let arc = Arc::from_raw(arc_ptr);
461 let owned = (*arc).clone();
462 if owned.is_empty() {
463 None
464 } else {
465 Some(owned)
466 }
467 }
468 };
469
470 write_xml_element(
471 writer,
472 Some(name),
473 attrs_kref.as_ref(),
474 children_ptr,
475 text.as_deref(),
476 )
477}
478
479/// Shared element-writer body — emits the XML representation of a node
480/// given the four parsed XmlNode fields. Pulled out so the top-level
481/// `write_node_pairs` path and the recursive
482/// `write_typed_object_node` path share the same output discipline.
483///
484/// V3-S5 ckpt-5-prime²c (2026-05-15): `children` is the raw
485/// `*const TypedArray<TypedObjectPtr>` carrier per Migration shape (a).
486/// Null pointer means "no children".
487fn write_xml_element(
488 writer: &mut Writer<Cursor<Vec<u8>>>,
489 name: Option<String>,
490 attrs: Option<&shape_value::heap_value::HashMapKindedRef>,
491 children: *const TypedArray<*const TypedObjectStorage>,
492 text: Option<&str>,
493) -> Result<(), String> {
494 let name = name.ok_or_else(|| "xml.stringify(): node missing 'name' field".to_string())?;
495
496 let mut elem = BytesStart::new(name.clone());
497
498 if let Some(attrs) = attrs {
499 // Wave 2 Round 3b C2-joint ckpt-4 (2026-05-14): per-V walk —
500 // attributes are always `HashMap<string, string>` (V = String).
501 // Other V variants are a producer-side type error (xml.stringify
502 // declares the attribute slot type at the marshal boundary).
503 use shape_value::heap_value::HashMapKindedRef;
504 match attrs {
505 HashMapKindedRef::String(arc) => {
506 let n = arc.len();
507 for i in 0..n {
508 let key: String = unsafe {
509 let ptr = shape_value::v2::typed_array::TypedArray::get_unchecked(
510 arc.keys, i as u32,
511 );
512 shape_value::v2::string_obj::StringObj::as_str(ptr).to_owned()
513 };
514 let val: String = unsafe {
515 let v_ptr: *const shape_value::v2::string_obj::StringObj =
516 *(*arc.values).data.add(i);
517 shape_value::v2::string_obj::StringObj::as_str(v_ptr).to_owned()
518 };
519 elem.push_attribute((key.as_bytes(), val.as_bytes()));
520 }
521 }
522 other => {
523 return Err(format!(
524 "xml.stringify(): attributes HashMap must be HashMap<string, string>, \
525 got V={:?}",
526 other.values_kind()
527 ));
528 }
529 }
530 }
531
532 // V3-S5 ckpt-5-prime²c (2026-05-15) Migration shape (a): children
533 // carrier is `*const TypedArray<TypedObjectPtr>`. Null means "no
534 // children". Element discriminator is the body-side `T` choice +
535 // schema's `field_kinds[2] = Ptr(HeapKind::TypedArray)`.
536 let child_count: u32 = if children.is_null() {
537 0
538 } else {
539 unsafe { TypedArray::<*const TypedObjectStorage>::len(children) }
540 };
541 let has_children = child_count > 0;
542 let has_text = text.is_some();
543
544 if !has_children && !has_text {
545 writer
546 .write_event(Event::Empty(elem))
547 .map_err(|e| format!("xml.stringify() write error: {}", e))?;
548 } else {
549 writer
550 .write_event(Event::Start(elem.clone()))
551 .map_err(|e| format!("xml.stringify() write error: {}", e))?;
552
553 if let Some(text) = text {
554 writer
555 .write_event(Event::Text(BytesText::new(text)))
556 .map_err(|e| format!("xml.stringify() write error: {}", e))?;
557 }
558
559 if has_children {
560 // SAFETY: `children` is non-null (checked above) and points to
561 // a live `TypedArray<*const TypedObjectStorage>` per the
562 // construction contract in `ElementData::into_typed_object_arc`.
563 let slice = unsafe {
564 TypedArray::<*const TypedObjectStorage>::as_slice(children)
565 };
566 for &child_ptr in slice.iter() {
567 // SAFETY: per the construction contract each element is a
568 // live `*const TypedObjectStorage` with refcount >= 1 owed
569 // to the array's element slot.
570 unsafe {
571 write_typed_object_node(writer, &*child_ptr)?;
572 }
573 }
574 }
575
576 writer
577 .write_event(Event::End(BytesEnd::new(name)))
578 .map_err(|e| format!("xml.stringify() write error: {}", e))?;
579 }
580
581 Ok(())
582}
583
584/// Create the `xml` module with XML parsing and serialization functions.
585pub fn create_xml_module() -> ModuleExports {
586 let mut module = ModuleExports::new("std::core::xml");
587 module.description = "XML parsing and serialization".to_string();
588
589 // xml.parse(text: string) -> Result<HashMap>
590 register_typed_fn_1::<_, Arc<String>>(
591 &mut module,
592 "parse",
593 "Parse an XML string into a Shape HashMap node",
594 "text",
595 "string",
596 ConcreteType::Result(Box::new(ConcreteType::HashMap)),
597 |text, _ctx| {
598 let mut reader = Reader::from_str(text.as_str());
599 reader.config_mut().trim_text(true);
600 let mut buf = Vec::new();
601
602 loop {
603 match reader.read_event_into(&mut buf) {
604 Ok(Event::Start(ref e)) => {
605 let inner = parse_element(&mut reader, e)?;
606 return Ok(TypedReturn::OkObjectPairs(inner.into_root_pairs()));
607 }
608 Ok(Event::Empty(ref e)) => {
609 let inner = parse_empty_element(e)?;
610 return Ok(TypedReturn::OkObjectPairs(inner.into_root_pairs()));
611 }
612 Ok(Event::Eof) => {
613 return Err("xml.parse(): no root element found".to_string());
614 }
615 Ok(_) => {} // Skip declaration, comments, PI
616 Err(e) => {
617 return Err(format!("xml.parse() failed: {}", e));
618 }
619 }
620 buf.clear();
621 }
622 },
623 );
624
625 // xml.stringify(value: HashMap<string, any>) -> Result<string>
626 register_typed_fn_1_full::<_, Vec<(Arc<String>, Arc<HeapValue>)>>(
627 &mut module,
628 "stringify",
629 "Serialize a Shape HashMap node to an XML string",
630 [ModuleParam {
631 name: "value".to_string(),
632 type_name: "HashMap<string, any>".to_string(),
633 required: true,
634 description:
635 "Node value to serialize (with name, attributes, children, text? fields)"
636 .to_string(),
637 ..Default::default()
638 }],
639 ConcreteType::Result(Box::new(ConcreteType::String)),
640 |pairs: Vec<(Arc<String>, Arc<HeapValue>)>, _ctx| {
641 let mut writer = Writer::new(Cursor::new(Vec::new()));
642 write_node_pairs(&mut writer, &pairs)?;
643
644 let output = String::from_utf8(writer.into_inner().into_inner())
645 .map_err(|e| format!("xml.stringify(): invalid UTF-8 output: {}", e))?;
646
647 Ok(TypedReturn::Ok(ConcreteReturn::String(output)))
648 },
649 );
650
651 module
652}