xee_interpreter/sequence/
serialization.rs

1use ahash::HashMap;
2use rust_decimal::Decimal;
3use xot::{xmlname::OwnedName, Xot};
4
5use xee_schema_type::Xs;
6
7use crate::{
8    atomic, context, error,
9    function::{self, Map},
10};
11
12use super::{
13    core::Sequence,
14    item::Item,
15    opc::{OptionParameterConverter, QNameOrString},
16};
17
18pub struct SerializationParameters {
19    pub allow_duplicate_names: bool,
20    pub byte_order_mark: bool,
21    pub cdata_section_elements: Vec<OwnedName>,
22    pub doctype_public: Option<String>,
23    pub doctype_system: Option<String>,
24    pub encoding: String,
25    pub escape_uri_attributes: bool,
26    pub html_version: Decimal,
27    pub include_content_type: bool,
28    pub indent: bool,
29    pub item_separator: String,
30    pub json_node_output_method: QNameOrString,
31    pub media_type: Option<String>,
32    pub method: QNameOrString,
33    pub normalization_form: Option<String>,
34    pub omit_xml_declaration: bool,
35    pub standalone: Option<bool>,
36    pub suppress_indentation: Vec<OwnedName>,
37    pub undeclare_prefixes: bool,
38    pub use_character_maps: HashMap<char, String>,
39    pub version: String,
40}
41
42impl SerializationParameters {
43    // default values are as used in XSLT 3.0
44    pub fn new() -> Self {
45        Self {
46            allow_duplicate_names: false,
47            byte_order_mark: false,
48            cdata_section_elements: Vec::new(),
49            doctype_public: None,
50            doctype_system: None,
51            encoding: "utf-8".to_string(),
52            escape_uri_attributes: true,
53            html_version: Decimal::from_str_exact("5.0").unwrap(),
54            include_content_type: true,
55            indent: false,
56            item_separator: " ".to_string(),
57            json_node_output_method: QNameOrString::String("xml".to_string()),
58            media_type: Some("text/xml".to_string()),
59            method: QNameOrString::String("xml".to_string()),
60            normalization_form: None,
61            omit_xml_declaration: false,
62            standalone: None,
63            suppress_indentation: Vec::new(),
64            undeclare_prefixes: false,
65            use_character_maps: HashMap::default(),
66            version: "1.0".to_string(),
67        }
68    }
69
70    pub(crate) fn from_map(
71        map: Map,
72        static_context: &context::StaticContext,
73        xot: &Xot,
74    ) -> error::Result<Self> {
75        let c = OptionParameterConverter::new(&map, static_context, xot);
76        let allow_duplicate_names =
77            c.option_with_default("allow-duplicate-names", Xs::Boolean, false)?;
78
79        let byte_order_mark = c.option_with_default("byte-order-mark", Xs::Boolean, false)?;
80
81        let cdata_section_elements = c.many("cdata-section-elements", Xs::QName)?;
82
83        let doctype_public = c.option("doctype-public", Xs::String)?;
84
85        let doctype_system = c.option("doctype-system", Xs::String)?;
86
87        let encoding = c.option_with_default("encoding", Xs::String, "utf-8".to_string())?;
88
89        let escape_uri_attributes =
90            c.option_with_default("escape-uri-attributes", Xs::Boolean, true)?;
91
92        let html_version = c.option_with_default(
93            "html-version",
94            Xs::Decimal,
95            Decimal::from_str_exact("5.0").unwrap(),
96        )?;
97
98        let include_content_type =
99            c.option_with_default("include-content-type", Xs::Boolean, true)?;
100
101        let indent = c.option_with_default("indent", Xs::Boolean, false)?;
102
103        let item_separator =
104            c.option_with_default("item-separator", Xs::String, " ".to_string())?;
105
106        let json_node_output_method = c.qname_or_string(
107            "json-node-output-method",
108            QNameOrString::String("xml".to_string()),
109        )?;
110
111        let media_type = c.option("media-type", Xs::String)?;
112
113        let method = c.qname_or_string("method", QNameOrString::String("xml".to_string()))?;
114
115        let normalization_form = c.option("normalization-form", Xs::String)?;
116
117        let omit_xml_declaration =
118            c.option_with_default("omit-xml-declaration", Xs::Boolean, true)?;
119
120        let standalone = c.option("standalone", Xs::Boolean)?;
121
122        let suppress_indentation = c.many("suppress-indentation", Xs::QName)?;
123
124        let undeclare_prefixes = c.option_with_default("undeclare-prefixes", Xs::Boolean, false)?;
125
126        // TODO: use-character-maps
127
128        let version = c.option_with_default("version", Xs::String, "1.0".to_string())?;
129
130        Ok(Self {
131            allow_duplicate_names,
132            byte_order_mark,
133            cdata_section_elements,
134            doctype_public,
135            doctype_system,
136            encoding,
137            escape_uri_attributes,
138            html_version,
139            include_content_type,
140            indent,
141            item_separator,
142            json_node_output_method,
143            media_type,
144            method,
145            normalization_form,
146            omit_xml_declaration,
147            standalone,
148            suppress_indentation,
149            undeclare_prefixes,
150            use_character_maps: HashMap::default(),
151            version,
152        })
153    }
154
155    pub(crate) fn xml_in_json_serialization(method: &QNameOrString) -> Self {
156        Self {
157            // use the method given
158            method: method.clone(),
159            // the only thing set according to the specification
160            omit_xml_declaration: true,
161            // keep this around just in case, though I don't think we
162            // can end up in json output from XML output
163            json_node_output_method: method.clone(),
164            allow_duplicate_names: false,
165            byte_order_mark: false,
166            cdata_section_elements: Vec::new(),
167            doctype_public: None,
168            doctype_system: None,
169            encoding: "utf-8".to_string(),
170            escape_uri_attributes: false,
171            html_version: Decimal::from_str_exact("5.0").unwrap(),
172            include_content_type: false,
173            indent: false,
174            item_separator: " ".to_string(),
175            media_type: None,
176            normalization_form: None,
177            standalone: None,
178            suppress_indentation: Vec::new(),
179            undeclare_prefixes: false,
180            use_character_maps: HashMap::default(),
181            version: "1.0".to_string(),
182        }
183    }
184}
185
186impl Default for SerializationParameters {
187    fn default() -> Self {
188        Self::new()
189    }
190}
191
192pub(crate) fn serialize_sequence(
193    arg: &Sequence,
194    parameters: SerializationParameters,
195    xot: &mut Xot,
196) -> error::Result<String> {
197    if let Some(local_name) = parameters.method.local_name() {
198        match local_name {
199            "xml" => serialize_xml(arg, parameters, xot),
200            "html" => serialize_html(arg, parameters, xot),
201            "json" => serialize_json(arg, parameters, xot),
202            _ => Err(error::Error::SEPM0016),
203        }
204    } else {
205        Err(error::Error::SEPM0016)
206    }
207}
208
209fn serialize_xml(
210    arg: &Sequence,
211    parameters: SerializationParameters,
212    xot: &mut Xot,
213) -> Result<String, error::Error> {
214    let node = arg.normalize(&parameters.item_separator, xot)?;
215    let indentation = xot_indentation(&parameters, xot);
216    let cdata_section_elements = xot_names(&parameters.cdata_section_elements, xot);
217    let declaration = if !parameters.omit_xml_declaration {
218        Some(xot::output::xml::Declaration {
219            encoding: Some(parameters.encoding.to_string()),
220            standalone: parameters.standalone,
221        })
222    } else {
223        None
224    };
225    let doctype = match (parameters.doctype_public, parameters.doctype_system) {
226        (Some(public), Some(system)) => Some(xot::output::xml::DocType::Public { public, system }),
227        (None, Some(system)) => Some(xot::output::xml::DocType::System { system }),
228        // TODO: this should really not happen?
229        (Some(public), None) => Some(xot::output::xml::DocType::Public {
230            public,
231            system: "".to_string(),
232        }),
233        (None, None) => None,
234    };
235    let output_parameters = xot::output::xml::Parameters {
236        indentation,
237        cdata_section_elements,
238        declaration,
239        doctype,
240        ..Default::default()
241    };
242
243    Ok(xot.serialize_xml_string(output_parameters, node)?)
244}
245
246fn serialize_html(
247    arg: &Sequence,
248    parameters: SerializationParameters,
249    xot: &mut Xot,
250) -> Result<String, error::Error> {
251    let node = arg.normalize(&parameters.item_separator, xot)?;
252    // TODO: no check yet for html version rejecting versions that aren't 5
253    let cdata_section_elements = xot_names(&parameters.cdata_section_elements, xot);
254    let indentation = xot_indentation(&parameters, xot);
255    let html5 = xot.html5();
256    let output_parameters = xot::output::html5::Parameters {
257        indentation,
258        cdata_section_elements,
259    };
260    Ok(html5.serialize_string(output_parameters, node)?)
261}
262
263fn serialize_json(
264    arg: &Sequence,
265    parameters: SerializationParameters,
266    xot: &mut Xot,
267) -> Result<String, error::Error> {
268    let r = serialize_json_sequence(arg, &parameters, xot)?;
269    Ok(r.dump())
270}
271
272fn serialize_json_sequence(
273    arg: &Sequence,
274    parameters: &SerializationParameters,
275    xot: &mut Xot,
276) -> Result<json::JsonValue, error::Error> {
277    match arg {
278        Sequence::One(item) => serialize_json_item(item.item(), parameters, xot),
279        Sequence::Empty(_) => Ok(json::JsonValue::Null),
280        Sequence::Many(_) | Sequence::Range(_) => Err(error::Error::SERE0023),
281    }
282}
283
284fn serialize_json_item(
285    item: &Item,
286    parameters: &SerializationParameters,
287    xot: &mut Xot,
288) -> Result<json::JsonValue, error::Error> {
289    match item {
290        Item::Atomic(atomic) => serialize_json_atomic(atomic, parameters),
291        Item::Node(node) => serialize_json_node(*node, parameters, xot),
292        Item::Function(function) => serialize_json_function(function, parameters, xot),
293    }
294}
295
296fn serialize_json_atomic(
297    atomic: &atomic::Atomic,
298    parameters: &SerializationParameters,
299) -> Result<json::JsonValue, error::Error> {
300    match atomic {
301        atomic::Atomic::Float(float) => {
302            let f = float.into_inner();
303            if f.is_infinite() || f.is_nan() {
304                return Err(error::Error::SERE0020);
305            }
306            Ok(json::JsonValue::Number(f.into()))
307        }
308        atomic::Atomic::Double(double) => {
309            let d = double.into_inner();
310            if d.is_infinite() || d.is_nan() {
311                return Err(error::Error::SERE0020);
312            }
313            Ok(json::JsonValue::Number(d.into()))
314        }
315        atomic::Atomic::Decimal(decimal) => {
316            let d: f64 = (*decimal.as_ref())
317                .try_into()
318                .map_err(|_| error::Error::SERE0020)?;
319            Ok(json::JsonValue::Number(d.into()))
320        }
321        atomic::Atomic::Integer(_t, integer) => {
322            let i: f64 = integer.to_f64();
323            Ok(json::JsonValue::Number(i.into()))
324        }
325        atomic::Atomic::Boolean(b) => Ok(json::JsonValue::Boolean(*b)),
326        _ => {
327            let s = atomic.string_value();
328            Ok(serialize_json_string(s, parameters))
329        }
330    }
331}
332
333fn serialize_json_string(s: String, _parameters: &SerializationParameters) -> json::JsonValue {
334    // TODO: normalization-form
335
336    // NOTE: tests serialize-json-127 and serialize-json-128 fail because
337    // the forward slash (solidus) character is not escaped. This is because
338    // the json crate does not do so. This is consistent with the JSON RFC
339    // https://softwareengineering.stackexchange.com/questions/444480/json-rfc8259-escape-forward-slash-or-not
340    // but not consistent with the serialization spec which wrongfully manadates
341    // it anyway.
342    // https://www.w3.org/TR/xslt-xquery-serialization-31/#json-output
343    json::JsonValue::String(s)
344}
345
346fn serialize_json_node(
347    node: xot::Node,
348    parameters: &SerializationParameters,
349    xot: &mut Xot,
350) -> Result<json::JsonValue, error::Error> {
351    match parameters.json_node_output_method.local_name() {
352        Some("xml") | Some("html") => {
353            let xml_parameters = SerializationParameters::xml_in_json_serialization(
354                &parameters.json_node_output_method,
355            );
356            let sequence: Sequence = vec![node].into();
357            let s = serialize_sequence(&sequence, xml_parameters, xot)?;
358            Ok(serialize_json_string(s, parameters))
359        }
360        _ => todo!(),
361    }
362}
363
364fn serialize_json_function(
365    function: &function::Function,
366    parameters: &SerializationParameters,
367    xot: &mut Xot,
368) -> Result<json::JsonValue, error::Error> {
369    match function {
370        function::Function::Array(array) => serialize_json_array(array, parameters, xot),
371        function::Function::Map(map) => serialize_json_map(map, parameters, xot),
372        _ => Err(error::Error::SERE0021),
373    }
374}
375
376fn serialize_json_array(
377    array: &function::Array,
378    parameters: &SerializationParameters,
379    xot: &mut Xot,
380) -> Result<json::JsonValue, error::Error> {
381    let mut result = Vec::with_capacity(array.len());
382    for entry in array.iter() {
383        let serialized = serialize_json_sequence(entry, parameters, xot)?;
384        result.push(serialized);
385    }
386    Ok(json::JsonValue::Array(result))
387}
388
389fn serialize_json_map(
390    map: &function::Map,
391    parameters: &SerializationParameters,
392    xot: &mut Xot,
393) -> Result<json::JsonValue, error::Error> {
394    let mut result = json::object::Object::new();
395    for key in map.keys() {
396        let key_s = key.string_value();
397        let value = map.get(key).unwrap();
398        let value = serialize_json_sequence(value, parameters, xot)?;
399        result.insert(&key_s, value);
400    }
401    Ok(json::JsonValue::Object(result))
402}
403
404fn xot_indentation(
405    parameters: &SerializationParameters,
406    xot: &mut Xot,
407) -> Option<xot::output::Indentation> {
408    if !parameters.indent {
409        return None;
410    }
411    let suppress = xot_names(&parameters.suppress_indentation, xot);
412    Some(xot::output::Indentation { suppress })
413}
414
415fn xot_names(names: &[xot::xmlname::OwnedName], xot: &mut Xot) -> Vec<xot::NameId> {
416    names
417        .iter()
418        .map(|owned_name| owned_name.to_ref(xot).name_id())
419        .collect()
420}
421
422#[cfg(test)]
423mod tests {
424    use crate::{atomic, sequence};
425
426    use super::*;
427
428    #[test]
429    fn test_allow_duplicate_names_true() {
430        let map = Map::new(vec![(
431            "allow-duplicate-names".to_string().into(),
432            sequence::Sequence::from(vec![atomic::Atomic::Boolean(true)]),
433        )])
434        .unwrap();
435        let static_context = context::StaticContext::default();
436        let xot = Xot::new();
437        let params = SerializationParameters::from_map(map, &static_context, &xot).unwrap();
438        assert!(params.allow_duplicate_names);
439    }
440
441    #[test]
442    fn test_allow_duplicate_names_false() {
443        let map = Map::new(vec![(
444            "allow-duplicate-names".to_string().into(),
445            sequence::Sequence::from(vec![atomic::Atomic::Boolean(false)]),
446        )])
447        .unwrap();
448        let static_context = context::StaticContext::default();
449        let xot = Xot::new();
450        let params = SerializationParameters::from_map(map, &static_context, &xot).unwrap();
451        assert!(!params.allow_duplicate_names);
452    }
453
454    #[test]
455    fn test_allow_duplicate_names_default_empty_sequence() {
456        let map = Map::new(vec![(
457            "allow-duplicate-names".to_string().into(),
458            sequence::Sequence::default(),
459        )])
460        .unwrap();
461        let static_context = context::StaticContext::default();
462        let xot = Xot::new();
463        let params = SerializationParameters::from_map(map, &static_context, &xot).unwrap();
464        assert!(!params.allow_duplicate_names);
465    }
466
467    #[test]
468    fn test_allow_duplicate_names_missing() {
469        let map = Map::new(vec![]).unwrap();
470        let static_context = context::StaticContext::default();
471        let xot = Xot::new();
472        let params = SerializationParameters::from_map(map, &static_context, &xot).unwrap();
473        assert!(!params.allow_duplicate_names);
474    }
475
476    #[test]
477    fn test_cdata_section_elements() {
478        let html = OwnedName::new("html".to_string(), "".to_string(), "".to_string());
479        let script = OwnedName::new("script".to_string(), "".to_string(), "".to_string());
480        let map = Map::new(vec![(
481            "cdata-section-elements".to_string().into(),
482            sequence::Sequence::from(vec![
483                atomic::Atomic::QName(html.clone().into()),
484                atomic::Atomic::QName(script.clone().into()),
485            ]),
486        )])
487        .unwrap();
488        let static_context = context::StaticContext::default();
489        let xot = Xot::new();
490        let params = SerializationParameters::from_map(map, &static_context, &xot).unwrap();
491        assert_eq!(params.cdata_section_elements.len(), 2);
492        assert_eq!(params.cdata_section_elements[0], html);
493        assert_eq!(params.cdata_section_elements[1], script);
494    }
495
496    #[test]
497    fn test_qname_or_string_string() {
498        let json: atomic::Atomic = "json".to_string().into();
499        let map = Map::new(vec![(
500            "json-node-output-method".to_string().into(),
501            sequence::Sequence::from(vec![json]),
502        )])
503        .unwrap();
504        let static_context = context::StaticContext::default();
505        let xot = Xot::new();
506        let params = SerializationParameters::from_map(map, &static_context, &xot).unwrap();
507        assert_eq!(
508            params.json_node_output_method,
509            QNameOrString::String("json".to_string())
510        );
511    }
512
513    #[test]
514    fn test_qname_or_string_qname() {
515        let owned_name = OwnedName::new("json".to_string(), "".to_string(), "".to_string());
516        let json: atomic::Atomic = owned_name.clone().into();
517        let map = Map::new(vec![(
518            "json-node-output-method".to_string().into(),
519            sequence::Sequence::from(vec![json]),
520        )])
521        .unwrap();
522        let static_context = context::StaticContext::default();
523        let xot = Xot::new();
524        let params = SerializationParameters::from_map(map, &static_context, &xot).unwrap();
525        assert_eq!(
526            params.json_node_output_method,
527            QNameOrString::QName(owned_name)
528        );
529    }
530
531    #[test]
532    fn test_qname_or_string_default_empty_sequence() {
533        let map = Map::new(vec![(
534            "json-node-output-method".to_string().into(),
535            sequence::Sequence::default(),
536        )])
537        .unwrap();
538        let static_context = context::StaticContext::default();
539        let xot = Xot::new();
540        let params = SerializationParameters::from_map(map, &static_context, &xot).unwrap();
541        assert_eq!(
542            params.json_node_output_method,
543            QNameOrString::String("xml".to_string())
544        );
545    }
546
547    #[test]
548    fn test_qname_or_string_default_missing() {
549        let map = Map::new(vec![]).unwrap();
550        let static_context = context::StaticContext::default();
551        let xot = Xot::new();
552        let params = SerializationParameters::from_map(map, &static_context, &xot).unwrap();
553        assert_eq!(
554            params.json_node_output_method,
555            QNameOrString::String("xml".to_string())
556        );
557    }
558}