nu_command/formats/to/
xml.rs

1use crate::formats::nu_xml_format::{COLUMN_ATTRS_NAME, COLUMN_CONTENT_NAME, COLUMN_TAG_NAME};
2use indexmap::IndexMap;
3use nu_engine::command_prelude::*;
4
5use quick_xml::{
6    escape,
7    events::{BytesEnd, BytesPI, BytesStart, BytesText, Event},
8};
9use std::{borrow::Cow, io::Cursor};
10
11#[derive(Clone)]
12pub struct ToXml;
13
14impl Command for ToXml {
15    fn name(&self) -> &str {
16        "to xml"
17    }
18
19    fn signature(&self) -> Signature {
20        Signature::build("to xml")
21            .input_output_types(vec![(Type::record(), Type::String)])
22            .named(
23                "indent",
24                SyntaxShape::Int,
25                "Formats the XML text with the provided indentation setting",
26                Some('i'),
27            )
28            .switch(
29                "partial-escape",
30                "Only escape mandatory characters in text and attributes",
31                Some('p'),
32            )
33            .switch(
34                "self-closed",
35                "Output empty tags as self closing",
36                Some('s'),
37            )
38            .category(Category::Formats)
39    }
40
41    fn extra_description(&self) -> &str {
42        r#"Every XML entry is represented via a record with tag, attribute and content fields.
43To represent different types of entries different values must be written to this fields:
441. Tag entry: `{tag: <tag name> attributes: {<attr name>: "<string value>" ...} content: [<entries>]}`
452. Comment entry: `{tag: '!' attributes: null content: "<comment string>"}`
463. Processing instruction (PI): `{tag: '?<pi name>' attributes: null content: "<pi content string>"}`
474. Text: `{tag: null attributes: null content: "<text>"}`. Or as plain `<text>` instead of record.
48
49Additionally any field which is: empty record, empty list or null, can be omitted."#
50    }
51
52    fn examples(&self) -> Vec<Example> {
53        vec![
54            Example {
55                description: "Outputs an XML string representing the contents of this table",
56                example: r#"{tag: note attributes: {} content : [{tag: remember attributes: {} content : [{tag: null attributes: null content : Event}]}]} | to xml"#,
57                result: Some(Value::test_string(
58                    "<note><remember>Event</remember></note>",
59                )),
60            },
61            Example {
62                description: "When formatting xml null and empty record fields can be omitted and strings can be written without a wrapping record",
63                example: r#"{tag: note content : [{tag: remember content : [Event]}]} | to xml"#,
64                result: Some(Value::test_string(
65                    "<note><remember>Event</remember></note>",
66                )),
67            },
68            Example {
69                description: "Optionally, formats the text with a custom indentation setting",
70                example: r#"{tag: note content : [{tag: remember content : [Event]}]} | to xml --indent 3"#,
71                result: Some(Value::test_string(
72                    "<note>\n   <remember>Event</remember>\n</note>",
73                )),
74            },
75            Example {
76                description: "Produce less escaping sequences in resulting xml",
77                example: r#"{tag: note attributes: {a: "'qwe'\\"} content: ["\"'"]} | to xml --partial-escape"#,
78                result: Some(Value::test_string(r#"<note a="'qwe'\">"'</note>"#)),
79            },
80            Example {
81                description: "Save space using self-closed tags",
82                example: r#"{tag: root content: [[tag]; [a] [b] [c]]} | to xml --self-closed"#,
83                result: Some(Value::test_string(r#"<root><a/><b/><c/></root>"#)),
84            },
85        ]
86    }
87
88    fn description(&self) -> &str {
89        "Convert special record structure into .xml text."
90    }
91
92    fn run(
93        &self,
94        engine_state: &EngineState,
95        stack: &mut Stack,
96        call: &Call,
97        input: PipelineData,
98    ) -> Result<PipelineData, ShellError> {
99        let head = call.head;
100        let indent: Option<Spanned<i64>> = call.get_flag(engine_state, stack, "indent")?;
101        let partial_escape = call.has_flag(engine_state, stack, "partial-escape")?;
102        let self_closed = call.has_flag(engine_state, stack, "self-closed")?;
103
104        let job = Job::new(indent, partial_escape, self_closed);
105        let input = input.try_expand_range()?;
106        job.run(input, head)
107    }
108}
109
110struct Job {
111    writer: quick_xml::Writer<Cursor<Vec<u8>>>,
112    partial_escape: bool,
113    self_closed: bool,
114}
115
116impl Job {
117    fn new(indent: Option<Spanned<i64>>, partial_escape: bool, self_closed: bool) -> Self {
118        let writer = indent.as_ref().map_or_else(
119            || quick_xml::Writer::new(Cursor::new(Vec::new())),
120            |p| quick_xml::Writer::new_with_indent(Cursor::new(Vec::new()), b' ', p.item as usize),
121        );
122
123        Self {
124            writer,
125            partial_escape,
126            self_closed,
127        }
128    }
129
130    fn run(mut self, input: PipelineData, head: Span) -> Result<PipelineData, ShellError> {
131        let metadata = input
132            .metadata()
133            .unwrap_or_default()
134            .with_content_type(Some("application/xml".into()));
135        let value = input.into_value(head)?;
136
137        self.write_xml_entry(value, true).and_then(|_| {
138            let b = self.writer.into_inner().into_inner();
139            let s = if let Ok(s) = String::from_utf8(b) {
140                s
141            } else {
142                return Err(ShellError::NonUtf8 { span: head });
143            };
144            Ok(Value::string(s, head).into_pipeline_data_with_metadata(Some(metadata)))
145        })
146    }
147
148    fn add_attributes<'a>(
149        &self,
150        element: &mut BytesStart<'a>,
151        attributes: &'a IndexMap<String, String>,
152    ) {
153        for (k, v) in attributes {
154            if self.partial_escape {
155                element.push_attribute((k.as_bytes(), Self::partial_escape_attribute(v).as_ref()))
156            } else {
157                element.push_attribute((k.as_bytes(), escape::escape(v).as_bytes()))
158            };
159        }
160    }
161
162    fn partial_escape_attribute(raw: &str) -> Cow<[u8]> {
163        let bytes = raw.as_bytes();
164        let mut escaped: Vec<u8> = Vec::new();
165        let mut iter = bytes.iter().enumerate();
166        let mut pos = 0;
167        while let Some((new_pos, byte)) =
168            iter.find(|(_, ch)| matches!(ch, b'<' | b'>' | b'&' | b'"'))
169        {
170            escaped.extend_from_slice(&bytes[pos..new_pos]);
171            match byte {
172                b'<' => escaped.extend_from_slice(b"&lt;"),
173                b'>' => escaped.extend_from_slice(b"&gt;"),
174                b'&' => escaped.extend_from_slice(b"&amp;"),
175                b'"' => escaped.extend_from_slice(b"&quot;"),
176
177                _ => unreachable!("Only '<', '>','&', '\"' are escaped"),
178            }
179            pos = new_pos + 1;
180        }
181
182        if !escaped.is_empty() {
183            if let Some(raw) = bytes.get(pos..) {
184                escaped.extend_from_slice(raw);
185            }
186
187            Cow::Owned(escaped)
188        } else {
189            Cow::Borrowed(bytes)
190        }
191    }
192
193    fn write_xml_entry(&mut self, entry: Value, top_level: bool) -> Result<(), ShellError> {
194        let entry_span = entry.span();
195        let span = entry.span();
196
197        // Allow using strings directly as content.
198        // So user can write
199        // {tag: a content: ['qwe']}
200        // instead of longer
201        // {tag: a content: [{content: 'qwe'}]}
202        if let (Value::String { val, .. }, false) = (&entry, top_level) {
203            return self.write_xml_text(val.as_str(), span);
204        }
205
206        if let Value::Record { val: record, .. } = &entry {
207            if let Some(bad_column) = Self::find_invalid_column(record) {
208                return Err(ShellError::CantConvert {
209                    to_type: "XML".into(),
210                    from_type: "record".into(),
211                    span: entry_span,
212                    help: Some(format!(
213                        "Invalid column \"{bad_column}\" in xml entry. Only \"{COLUMN_TAG_NAME}\", \"{COLUMN_ATTRS_NAME}\" and \"{COLUMN_CONTENT_NAME}\" are permitted"
214                    )),
215                });
216            }
217            // If key is not found it is assumed to be nothing. This way
218            // user can write a tag like {tag: a content: [...]} instead
219            // of longer {tag: a attributes: {} content: [...]}
220            let tag = record
221                .get(COLUMN_TAG_NAME)
222                .cloned()
223                .unwrap_or_else(|| Value::nothing(Span::unknown()));
224            let attrs = record
225                .get(COLUMN_ATTRS_NAME)
226                .cloned()
227                .unwrap_or_else(|| Value::nothing(Span::unknown()));
228            let content = record
229                .get(COLUMN_CONTENT_NAME)
230                .cloned()
231                .unwrap_or_else(|| Value::nothing(Span::unknown()));
232
233            let content_span = content.span();
234            let tag_span = tag.span();
235            match (tag, attrs, content) {
236                (Value::Nothing { .. }, Value::Nothing { .. }, Value::String { val, .. }) => {
237                    // Strings can not appear on top level of document
238                    if top_level {
239                        return Err(ShellError::CantConvert {
240                            to_type: "XML".into(),
241                            from_type: entry.get_type().to_string(),
242                            span: entry_span,
243                            help: Some("Strings can not be a root element of document".into()),
244                        });
245                    }
246                    self.write_xml_text(val.as_str(), content_span)
247                }
248                (Value::String { val: tag_name, .. }, attrs, children) => {
249                    self.write_tag_like(entry_span, tag_name, tag_span, attrs, children, top_level)
250                }
251                _ => Err(ShellError::CantConvert {
252                    to_type: "XML".into(),
253                    from_type: "record".into(),
254                    span: entry_span,
255                    help: Some("Tag missing or is not a string".into()),
256                }),
257            }
258        } else {
259            Err(ShellError::CantConvert {
260                to_type: "XML".into(),
261                from_type: entry.get_type().to_string(),
262                span: entry_span,
263                help: Some("Xml entry expected to be a record".into()),
264            })
265        }
266    }
267
268    fn find_invalid_column(record: &Record) -> Option<&String> {
269        const VALID_COLS: [&str; 3] = [COLUMN_TAG_NAME, COLUMN_ATTRS_NAME, COLUMN_CONTENT_NAME];
270        record
271            .columns()
272            .find(|col| !VALID_COLS.contains(&col.as_str()))
273    }
274
275    /// Convert record to tag-like entry: tag, PI, comment.
276    fn write_tag_like(
277        &mut self,
278        entry_span: Span,
279        tag: String,
280        tag_span: Span,
281        attrs: Value,
282        content: Value,
283        top_level: bool,
284    ) -> Result<(), ShellError> {
285        if tag == "!" {
286            // Comments can not appear on top level of document
287            if top_level {
288                return Err(ShellError::CantConvert {
289                    to_type: "XML".into(),
290                    from_type: "record".into(),
291                    span: entry_span,
292                    help: Some("Comments can not be a root element of document".into()),
293                });
294            }
295
296            self.write_comment(entry_span, attrs, content)
297        } else if let Some(tag) = tag.strip_prefix('?') {
298            // PIs can not appear on top level of document
299            if top_level {
300                return Err(ShellError::CantConvert {
301                    to_type: "XML".into(),
302                    from_type: Type::record().to_string(),
303                    span: entry_span,
304                    help: Some("PIs can not be a root element of document".into()),
305                });
306            }
307
308            let content: String = match content {
309                Value::String { val, .. } => val,
310                Value::Nothing { .. } => "".into(),
311                _ => {
312                    return Err(ShellError::CantConvert {
313                        to_type: "XML".into(),
314                        from_type: Type::record().to_string(),
315                        span: content.span(),
316                        help: Some("PI content expected to be a string".into()),
317                    });
318                }
319            };
320
321            self.write_processing_instruction(entry_span, tag, attrs, content)
322        } else {
323            // Allow tag to have no attributes or content for short hand input
324            // alternatives like {tag: a attributes: {} content: []}, {tag: a attribbutes: null
325            // content: null}, {tag: a}. See to_xml_entry for more
326            let attrs = match attrs {
327                Value::Record { val, .. } => val.into_owned(),
328                Value::Nothing { .. } => Record::new(),
329                _ => {
330                    return Err(ShellError::CantConvert {
331                        to_type: "XML".into(),
332                        from_type: attrs.get_type().to_string(),
333                        span: attrs.span(),
334                        help: Some("Tag attributes expected to be a record".into()),
335                    });
336                }
337            };
338
339            let content = match content {
340                Value::List { vals, .. } => vals,
341                Value::Nothing { .. } => Vec::new(),
342                _ => {
343                    return Err(ShellError::CantConvert {
344                        to_type: "XML".into(),
345                        from_type: content.get_type().to_string(),
346                        span: content.span(),
347                        help: Some("Tag content expected to be a list".into()),
348                    });
349                }
350            };
351
352            self.write_tag(entry_span, tag, tag_span, attrs, content)
353        }
354    }
355
356    fn write_comment(
357        &mut self,
358        entry_span: Span,
359        attrs: Value,
360        content: Value,
361    ) -> Result<(), ShellError> {
362        match (attrs, content) {
363            (Value::Nothing { .. }, Value::String { val, .. }) => {
364                // Text in comments must NOT be escaped
365                // https://www.w3.org/TR/xml/#sec-comments
366                let comment_content = BytesText::from_escaped(val.as_str());
367                self.writer
368                    .write_event(Event::Comment(comment_content))
369                    .map_err(|_| ShellError::CantConvert {
370                        to_type: "XML".to_string(),
371                        from_type: Type::record().to_string(),
372                        span: entry_span,
373                        help: Some("Failure writing comment to xml".into()),
374                    })
375            }
376            (_, content) => Err(ShellError::CantConvert {
377                to_type: "XML".into(),
378                from_type: content.get_type().to_string(),
379                span: entry_span,
380                help: Some("Comment expected to have string content and no attributes".into()),
381            }),
382        }
383    }
384
385    fn write_processing_instruction(
386        &mut self,
387        entry_span: Span,
388        tag: &str,
389        attrs: Value,
390        content: String,
391    ) -> Result<(), ShellError> {
392        if !matches!(attrs, Value::Nothing { .. }) {
393            return Err(ShellError::CantConvert {
394                to_type: "XML".into(),
395                from_type: Type::record().to_string(),
396                span: entry_span,
397                help: Some("PIs do not have attributes".into()),
398            });
399        }
400
401        let content_text = format!("{tag} {content}");
402        // PI content must NOT be escaped
403        // https://www.w3.org/TR/xml/#sec-pi
404        let pi_content = BytesPI::new(content_text.as_str());
405
406        self.writer
407            .write_event(Event::PI(pi_content))
408            .map_err(|_| ShellError::CantConvert {
409                to_type: "XML".to_string(),
410                from_type: Type::record().to_string(),
411                span: entry_span,
412                help: Some("Failure writing PI to xml".into()),
413            })
414    }
415
416    fn write_tag(
417        &mut self,
418        entry_span: Span,
419        tag: String,
420        tag_span: Span,
421        attrs: Record,
422        children: Vec<Value>,
423    ) -> Result<(), ShellError> {
424        if tag.starts_with('!') || tag.starts_with('?') {
425            return Err(ShellError::CantConvert {
426                to_type: "XML".to_string(),
427                from_type: Type::record().to_string(),
428                span: tag_span,
429                help: Some(format!(
430                    "Incorrect tag name {tag}, tag name can not start with ! or ?"
431                )),
432            });
433        }
434
435        let self_closed = self.self_closed && children.is_empty();
436        let attributes = Self::parse_attributes(attrs)?;
437        let mut open_tag = BytesStart::new(tag.clone());
438        self.add_attributes(&mut open_tag, &attributes);
439        let open_tag_event = if self_closed {
440            Event::Empty(open_tag)
441        } else {
442            Event::Start(open_tag)
443        };
444
445        self.writer
446            .write_event(open_tag_event)
447            .map_err(|_| ShellError::CantConvert {
448                to_type: "XML".to_string(),
449                from_type: Type::record().to_string(),
450                span: entry_span,
451                help: Some("Failure writing tag to xml".into()),
452            })?;
453
454        children
455            .into_iter()
456            .try_for_each(|child| self.write_xml_entry(child, false))?;
457
458        if !self_closed {
459            let close_tag_event = Event::End(BytesEnd::new(tag));
460            self.writer
461                .write_event(close_tag_event)
462                .map_err(|_| ShellError::CantConvert {
463                    to_type: "XML".to_string(),
464                    from_type: Type::record().to_string(),
465                    span: entry_span,
466                    help: Some("Failure writing tag to xml".into()),
467                })?;
468        }
469        Ok(())
470    }
471
472    fn parse_attributes(attrs: Record) -> Result<IndexMap<String, String>, ShellError> {
473        let mut h = IndexMap::new();
474        for (k, v) in attrs {
475            if let Value::String { val, .. } = v {
476                h.insert(k, val);
477            } else {
478                return Err(ShellError::CantConvert {
479                    to_type: "XML".to_string(),
480                    from_type: v.get_type().to_string(),
481                    span: v.span(),
482                    help: Some("Attribute value expected to be a string".into()),
483                });
484            }
485        }
486        Ok(h)
487    }
488
489    fn write_xml_text(&mut self, val: &str, span: Span) -> Result<(), ShellError> {
490        let text = Event::Text(if self.partial_escape {
491            BytesText::from_escaped(escape::partial_escape(val))
492        } else {
493            BytesText::new(val)
494        });
495
496        self.writer
497            .write_event(text)
498            .map_err(|_| ShellError::CantConvert {
499                to_type: "XML".to_string(),
500                from_type: Type::String.to_string(),
501                span,
502                help: Some("Failure writing string to xml".into()),
503            })
504    }
505}
506
507#[cfg(test)]
508mod test {
509    use nu_cmd_lang::eval_pipeline_without_terminal_expression;
510
511    use crate::{Get, Metadata};
512
513    use super::*;
514
515    #[test]
516    fn test_examples() {
517        use crate::test_examples;
518
519        test_examples(ToXml {})
520    }
521
522    #[test]
523    fn test_content_type_metadata() {
524        let mut engine_state = Box::new(EngineState::new());
525        let delta = {
526            // Base functions that are needed for testing
527            // Try to keep this working set small to keep tests running as fast as possible
528            let mut working_set = StateWorkingSet::new(&engine_state);
529
530            working_set.add_decl(Box::new(ToXml {}));
531            working_set.add_decl(Box::new(Metadata {}));
532            working_set.add_decl(Box::new(Get {}));
533
534            working_set.render()
535        };
536
537        engine_state
538            .merge_delta(delta)
539            .expect("Error merging delta");
540
541        let cmd = "{tag: note attributes: {} content : [{tag: remember attributes: {} content : [{tag: null attributes: null content : Event}]}]} | to xml | metadata | get content_type | $in";
542        let result = eval_pipeline_without_terminal_expression(
543            cmd,
544            std::env::temp_dir().as_ref(),
545            &mut engine_state,
546        );
547        assert_eq!(
548            Value::test_string("application/xml"),
549            result.expect("There should be a result")
550        );
551    }
552}