facet_format_xml/
serializer.rs

1extern crate alloc;
2
3use alloc::{format, string::String, vec::Vec};
4use std::collections::HashMap;
5
6use facet_core::Facet;
7use facet_format::{FormatSerializer, ScalarValue, SerializeError, serialize_root};
8use facet_reflect::Peek;
9
10/// Well-known XML namespace URIs and their conventional prefixes.
11#[allow(dead_code)] // Used in Phase 4 namespace serialization (partial implementation)
12const WELL_KNOWN_NAMESPACES: &[(&str, &str)] = &[
13    ("http://www.w3.org/2001/XMLSchema-instance", "xsi"),
14    ("http://www.w3.org/2001/XMLSchema", "xs"),
15    ("http://www.w3.org/XML/1998/namespace", "xml"),
16    ("http://www.w3.org/1999/xlink", "xlink"),
17    ("http://www.w3.org/2000/svg", "svg"),
18    ("http://www.w3.org/1999/xhtml", "xhtml"),
19    ("http://schemas.xmlsoap.org/soap/envelope/", "soap"),
20    ("http://www.w3.org/2003/05/soap-envelope", "soap12"),
21    ("http://schemas.android.com/apk/res/android", "android"),
22];
23
24#[derive(Debug)]
25pub struct XmlSerializeError {
26    msg: &'static str,
27}
28
29impl core::fmt::Display for XmlSerializeError {
30    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
31        f.write_str(self.msg)
32    }
33}
34
35impl std::error::Error for XmlSerializeError {}
36
37#[derive(Debug)]
38enum Ctx {
39    Root { kind: Option<Kind> },
40    Struct { close: Option<String> },
41    Seq { close: Option<String> },
42}
43
44#[derive(Debug, Clone, Copy, PartialEq, Eq)]
45enum Kind {
46    Struct,
47    Seq,
48}
49
50/// Minimal XML serializer for the codex prototype.
51///
52/// The output is designed to round-trip through `facet-format-xml`'s parser:
53/// - structs are elements whose children are field elements
54/// - sequences are elements whose children are repeated `<item>` elements
55/// - element names are treated as map keys; the root element name is ignored
56pub struct XmlSerializer {
57    out: Vec<u8>,
58    stack: Vec<Ctx>,
59    pending_field: Option<String>,
60    /// Pending namespace for the next field to be serialized
61    pending_namespace: Option<String>,
62    /// True if the current field is an attribute (vs element)
63    pending_is_attribute: bool,
64    /// Container-level default namespace (from xml::ns_all) for current struct
65    current_ns_all: Option<String>,
66    /// Buffered attributes for the current element (name, value, namespace_opt)
67    pending_attributes: Vec<(String, String, Option<String>)>,
68    item_tag: &'static str,
69    /// Namespace URI -> prefix mapping for already-declared namespaces.
70    declared_namespaces: HashMap<String, String>,
71    /// Counter for auto-generating namespace prefixes (ns0, ns1, ...).
72    next_ns_index: usize,
73    /// The currently active default namespace (from xmlns="..." on an ancestor).
74    #[allow(dead_code)] // Will be used for optimizing namespace declarations
75    current_default_ns: Option<String>,
76    /// True if we've written the opening `<root>` tag
77    root_tag_written: bool,
78}
79
80impl XmlSerializer {
81    pub fn new() -> Self {
82        Self {
83            out: Vec::new(),
84            stack: vec![Ctx::Root { kind: None }],
85            pending_field: None,
86            pending_namespace: None,
87            pending_is_attribute: false,
88            current_ns_all: None,
89            pending_attributes: Vec::new(),
90            item_tag: "item",
91            declared_namespaces: HashMap::new(),
92            next_ns_index: 0,
93            current_default_ns: None,
94            root_tag_written: false,
95        }
96    }
97
98    pub fn finish(mut self) -> Vec<u8> {
99        // Ensure root tag is written (even if struct is empty)
100        self.ensure_root_tag_written();
101
102        // Close any remaining non-root elements.
103        while let Some(ctx) = self.stack.pop() {
104            match ctx {
105                Ctx::Root { .. } => break,
106                Ctx::Struct { close } | Ctx::Seq { close } => {
107                    if let Some(name) = close {
108                        self.write_close_tag(&name);
109                    }
110                }
111            }
112        }
113        self.out.extend_from_slice(b"</root>");
114        self.out
115    }
116
117    fn write_open_tag(&mut self, name: &str) {
118        self.out.push(b'<');
119
120        // Check if we have a pending namespace for this field
121        if let Some(ns_uri) = self.pending_namespace.take() {
122            // Get or create a prefix for this namespace
123            let prefix = self.get_or_create_prefix(&ns_uri);
124
125            // Write prefixed element name
126            self.out.extend_from_slice(prefix.as_bytes());
127            self.out.push(b':');
128            self.out.extend_from_slice(name.as_bytes());
129
130            // Write xmlns declaration
131            self.out.extend_from_slice(b" xmlns:");
132            self.out.extend_from_slice(prefix.as_bytes());
133            self.out.extend_from_slice(b"=\"");
134            self.out.extend_from_slice(ns_uri.as_bytes());
135            self.out.push(b'"');
136        } else {
137            // No namespace - just write the element name
138            self.out.extend_from_slice(name.as_bytes());
139        }
140
141        // Write buffered attributes
142        // Drain attributes first to avoid borrow checker issues
143        let attrs: Vec<_> = self.pending_attributes.drain(..).collect();
144
145        // Now resolve prefixes for namespaced attributes
146        let mut attrs_with_prefixes = Vec::new();
147        for (name, value, ns) in attrs {
148            let prefix = ns.as_ref().map(|uri| self.get_or_create_prefix(uri));
149            attrs_with_prefixes.push((name, value, ns, prefix));
150        }
151
152        for (attr_name, attr_value, attr_ns, prefix_opt) in attrs_with_prefixes {
153            self.out.push(b' ');
154
155            if let (Some(ns_uri), Some(prefix)) = (attr_ns, prefix_opt) {
156                // Namespaced attribute - write xmlns declaration first
157                self.out.extend_from_slice(b"xmlns:");
158                self.out.extend_from_slice(prefix.as_bytes());
159                self.out.extend_from_slice(b"=\"");
160                self.out.extend_from_slice(ns_uri.as_bytes());
161                self.out.extend_from_slice(b"\" ");
162
163                // Now write the prefixed attribute
164                self.out.extend_from_slice(prefix.as_bytes());
165                self.out.push(b':');
166            }
167
168            self.out.extend_from_slice(attr_name.as_bytes());
169            self.out.extend_from_slice(b"=\"");
170            // Escape attribute value
171            for b in attr_value.as_bytes() {
172                match *b {
173                    b'&' => self.out.extend_from_slice(b"&amp;"),
174                    b'<' => self.out.extend_from_slice(b"&lt;"),
175                    b'>' => self.out.extend_from_slice(b"&gt;"),
176                    b'"' => self.out.extend_from_slice(b"&quot;"),
177                    _ => self.out.push(*b),
178                }
179            }
180            self.out.push(b'"');
181        }
182
183        self.out.push(b'>');
184    }
185
186    fn write_close_tag(&mut self, name: &str) {
187        self.out.extend_from_slice(b"</");
188        self.out.extend_from_slice(name.as_bytes());
189        self.out.push(b'>');
190    }
191
192    fn write_text_escaped(&mut self, text: &str) {
193        for b in text.as_bytes() {
194            match *b {
195                b'&' => self.out.extend_from_slice(b"&amp;"),
196                b'<' => self.out.extend_from_slice(b"&lt;"),
197                b'>' => self.out.extend_from_slice(b"&gt;"),
198                _ => self.out.push(*b),
199            }
200        }
201    }
202
203    fn ensure_root_tag_written(&mut self) {
204        if !self.root_tag_written {
205            self.out.extend_from_slice(b"<root");
206
207            // Write buffered attributes if any (for root-level attributes)
208            let attrs: Vec<_> = self.pending_attributes.drain(..).collect();
209            let mut attrs_with_prefixes = Vec::new();
210            for (name, value, ns) in attrs {
211                let prefix = ns.as_ref().map(|uri| self.get_or_create_prefix(uri));
212                attrs_with_prefixes.push((name, value, ns, prefix));
213            }
214
215            for (attr_name, attr_value, attr_ns, prefix_opt) in attrs_with_prefixes {
216                self.out.push(b' ');
217
218                if let (Some(ns_uri), Some(prefix)) = (attr_ns, prefix_opt) {
219                    // Namespaced attribute - write xmlns declaration first
220                    self.out.extend_from_slice(b"xmlns:");
221                    self.out.extend_from_slice(prefix.as_bytes());
222                    self.out.extend_from_slice(b"=\"");
223                    self.out.extend_from_slice(ns_uri.as_bytes());
224                    self.out.extend_from_slice(b"\" ");
225
226                    // Now write the prefixed attribute
227                    self.out.extend_from_slice(prefix.as_bytes());
228                    self.out.push(b':');
229                }
230
231                self.out.extend_from_slice(attr_name.as_bytes());
232                self.out.extend_from_slice(b"=\"");
233                // Escape attribute value
234                for b in attr_value.as_bytes() {
235                    match *b {
236                        b'&' => self.out.extend_from_slice(b"&amp;"),
237                        b'<' => self.out.extend_from_slice(b"&lt;"),
238                        b'>' => self.out.extend_from_slice(b"&gt;"),
239                        b'"' => self.out.extend_from_slice(b"&quot;"),
240                        _ => self.out.push(*b),
241                    }
242                }
243                self.out.push(b'"');
244            }
245
246            self.out.push(b'>');
247            self.root_tag_written = true;
248        }
249    }
250
251    fn open_value_element_if_needed(&mut self) -> Result<Option<String>, XmlSerializeError> {
252        self.ensure_root_tag_written();
253        match self.stack.last() {
254            Some(Ctx::Root { .. }) => Ok(None),
255            Some(Ctx::Struct { .. }) => {
256                let Some(name) = self.pending_field.take() else {
257                    return Err(XmlSerializeError {
258                        msg: "value emitted in struct without field key",
259                    });
260                };
261
262                // Compute the full tag name (with prefix if namespaced) for closing
263                let full_name = if let Some(ns_uri) = self.pending_namespace.clone() {
264                    let prefix = self.get_or_create_prefix(&ns_uri);
265                    format!("{}:{}", prefix, name)
266                } else {
267                    name.clone()
268                };
269
270                self.write_open_tag(&name);
271                Ok(Some(full_name))
272            }
273            Some(Ctx::Seq { .. }) => {
274                let name = self.item_tag.to_string();
275                self.write_open_tag(&name);
276                Ok(Some(name))
277            }
278            None => Err(XmlSerializeError {
279                msg: "serializer state missing root context",
280            }),
281        }
282    }
283
284    fn enter_struct_root(&mut self) {
285        if let Some(Ctx::Root { kind }) = self.stack.last_mut() {
286            *kind = Some(Kind::Struct);
287        }
288        self.stack.push(Ctx::Struct { close: None });
289    }
290
291    fn enter_seq_root(&mut self) {
292        if let Some(Ctx::Root { kind }) = self.stack.last_mut() {
293            *kind = Some(Kind::Seq);
294        }
295        self.stack.push(Ctx::Seq { close: None });
296    }
297
298    /// Get or create a prefix for the given namespace URI.
299    /// Returns the prefix (without colon).
300    fn get_or_create_prefix(&mut self, namespace_uri: &str) -> String {
301        // Check if we've already assigned a prefix to this URI
302        if let Some(prefix) = self.declared_namespaces.get(namespace_uri) {
303            return prefix.clone();
304        }
305
306        // Try well-known namespaces
307        let prefix = WELL_KNOWN_NAMESPACES
308            .iter()
309            .find(|(uri, _)| *uri == namespace_uri)
310            .map(|(_, prefix)| (*prefix).to_string())
311            .unwrap_or_else(|| {
312                // Auto-generate a prefix
313                let prefix = format!("ns{}", self.next_ns_index);
314                self.next_ns_index += 1;
315                prefix
316            });
317
318        // Ensure the prefix isn't already in use for a different namespace
319        let final_prefix = if self.declared_namespaces.values().any(|p| p == &prefix) {
320            // Conflict! Generate a new one
321            let prefix = format!("ns{}", self.next_ns_index);
322            self.next_ns_index += 1;
323            prefix
324        } else {
325            prefix
326        };
327
328        self.declared_namespaces
329            .insert(namespace_uri.to_string(), final_prefix.clone());
330        final_prefix
331    }
332}
333
334impl Default for XmlSerializer {
335    fn default() -> Self {
336        Self::new()
337    }
338}
339
340impl FormatSerializer for XmlSerializer {
341    type Error = XmlSerializeError;
342
343    fn begin_struct(&mut self) -> Result<(), Self::Error> {
344        match self.stack.last() {
345            Some(Ctx::Root { kind: None }) => {
346                self.enter_struct_root();
347                Ok(())
348            }
349            Some(Ctx::Root {
350                kind: Some(Kind::Struct),
351            }) => Err(XmlSerializeError {
352                msg: "multiple root values are not supported",
353            }),
354            Some(Ctx::Root {
355                kind: Some(Kind::Seq),
356            })
357            | Some(Ctx::Seq { .. })
358            | Some(Ctx::Struct { .. }) => {
359                let close = self.open_value_element_if_needed()?;
360                self.stack.push(Ctx::Struct { close });
361                Ok(())
362            }
363            None => Err(XmlSerializeError {
364                msg: "serializer state missing root context",
365            }),
366        }
367    }
368
369    fn field_key(&mut self, key: &str) -> Result<(), Self::Error> {
370        self.pending_field = Some(key.to_string());
371        Ok(())
372    }
373
374    fn end_struct(&mut self) -> Result<(), Self::Error> {
375        match self.stack.pop() {
376            Some(Ctx::Struct { close }) => {
377                if let Some(name) = close {
378                    self.write_close_tag(&name);
379                }
380                Ok(())
381            }
382            _ => Err(XmlSerializeError {
383                msg: "end_struct called without matching begin_struct",
384            }),
385        }
386    }
387
388    fn begin_seq(&mut self) -> Result<(), Self::Error> {
389        match self.stack.last() {
390            Some(Ctx::Root { kind: None }) => {
391                self.enter_seq_root();
392                Ok(())
393            }
394            Some(Ctx::Root {
395                kind: Some(Kind::Seq),
396            }) => Err(XmlSerializeError {
397                msg: "multiple root values are not supported",
398            }),
399            Some(Ctx::Root {
400                kind: Some(Kind::Struct),
401            })
402            | Some(Ctx::Seq { .. })
403            | Some(Ctx::Struct { .. }) => {
404                let close = self.open_value_element_if_needed()?;
405                self.stack.push(Ctx::Seq { close });
406                Ok(())
407            }
408            None => Err(XmlSerializeError {
409                msg: "serializer state missing root context",
410            }),
411        }
412    }
413
414    fn end_seq(&mut self) -> Result<(), Self::Error> {
415        match self.stack.pop() {
416            Some(Ctx::Seq { close }) => {
417                if let Some(name) = close {
418                    self.write_close_tag(&name);
419                }
420                Ok(())
421            }
422            _ => Err(XmlSerializeError {
423                msg: "end_seq called without matching begin_seq",
424            }),
425        }
426    }
427
428    fn scalar(&mut self, scalar: ScalarValue<'_>) -> Result<(), Self::Error> {
429        // If this is an attribute, buffer it instead of writing as a child element
430        if self.pending_is_attribute {
431            let name = self.pending_field.take().ok_or(XmlSerializeError {
432                msg: "attribute value without field name",
433            })?;
434            let namespace = self.pending_namespace.take();
435
436            // Convert scalar to string for attribute value
437            let value = match scalar {
438                ScalarValue::Null => "null".to_string(),
439                ScalarValue::Bool(v) => if v { "true" } else { "false" }.to_string(),
440                ScalarValue::I64(v) => v.to_string(),
441                ScalarValue::U64(v) => v.to_string(),
442                ScalarValue::F64(v) => v.to_string(),
443                ScalarValue::Str(s) => s.into_owned(),
444                ScalarValue::Bytes(_) => {
445                    return Err(XmlSerializeError {
446                        msg: "bytes serialization unsupported for xml",
447                    });
448                }
449            };
450
451            self.pending_attributes.push((name, value, namespace));
452            self.pending_is_attribute = false;
453            return Ok(());
454        }
455
456        // Regular child element
457        let close = self.open_value_element_if_needed()?;
458
459        match scalar {
460            ScalarValue::Null => {
461                // Encode as the literal "null" to round-trip through parse_scalar.
462                self.write_text_escaped("null");
463            }
464            ScalarValue::Bool(v) => self.write_text_escaped(if v { "true" } else { "false" }),
465            ScalarValue::I64(v) => self.write_text_escaped(&v.to_string()),
466            ScalarValue::U64(v) => self.write_text_escaped(&v.to_string()),
467            ScalarValue::F64(v) => self.write_text_escaped(&v.to_string()),
468            ScalarValue::Str(s) => self.write_text_escaped(&s),
469            ScalarValue::Bytes(_) => {
470                return Err(XmlSerializeError {
471                    msg: "bytes serialization unsupported for xml",
472                });
473            }
474        }
475
476        if let Some(name) = close {
477            self.write_close_tag(&name);
478        }
479
480        Ok(())
481    }
482
483    fn field_metadata(&mut self, field: &facet_reflect::FieldItem) -> Result<(), Self::Error> {
484        // Check if this field is an attribute
485        self.pending_is_attribute = field.field.get_attr(Some("xml"), "attribute").is_some();
486
487        // Extract xml::ns attribute from the field
488        if let Some(ns_attr) = field.field.get_attr(Some("xml"), "ns")
489            && let Some(ns_uri) = ns_attr.get_as::<&str>().copied()
490        {
491            self.pending_namespace = Some(ns_uri.to_string());
492            return Ok(());
493        }
494
495        // If field doesn't have explicit xml::ns, check for container-level xml::ns_all
496        // Only apply ns_all to elements, not attributes (per XML spec)
497        if !self.pending_is_attribute
498            && let Some(ns_all) = &self.current_ns_all
499        {
500            self.pending_namespace = Some(ns_all.clone());
501        }
502
503        Ok(())
504    }
505
506    fn struct_metadata(&mut self, shape: &facet_core::Shape) -> Result<(), Self::Error> {
507        // Extract xml::ns_all attribute from the struct
508        self.current_ns_all = shape
509            .attributes
510            .iter()
511            .find(|attr| attr.ns == Some("xml") && attr.key == "ns_all")
512            .and_then(|attr| attr.get_as::<&str>().copied())
513            .map(String::from);
514        Ok(())
515    }
516
517    fn preferred_field_order(&self) -> facet_format::FieldOrdering {
518        facet_format::FieldOrdering::AttributesFirst
519    }
520}
521
522pub fn to_vec<'facet, T>(value: &'_ T) -> Result<Vec<u8>, SerializeError<XmlSerializeError>>
523where
524    T: Facet<'facet> + ?Sized,
525{
526    let mut serializer = XmlSerializer::new();
527    serialize_root(&mut serializer, Peek::new(value))?;
528    Ok(serializer.finish())
529}