nu_command/formats/to/
xml.rs

1use crate::formats::nu_xml_format::{COLUMN_ATTRS_NAME, COLUMN_CONTENT_NAME, COLUMN_TAG_NAME};
2use indexmap::IndexMap;
3use nu_engine::command_prelude::*;
4
5use quick_xml::{
6    escape,
7    events::{BytesEnd, BytesPI, BytesStart, BytesText, Event},
8};
9use std::{borrow::Cow, io::Cursor};
10
11#[derive(Clone)]
12pub struct ToXml;
13
14impl Command for ToXml {
15    fn name(&self) -> &str {
16        "to xml"
17    }
18
19    fn signature(&self) -> Signature {
20        Signature::build("to xml")
21            .input_output_types(vec![(Type::record(), Type::String)])
22            .named(
23                "indent",
24                SyntaxShape::Int,
25                "Formats the XML text with the provided indentation setting",
26                Some('i'),
27            )
28            .switch(
29                "partial-escape",
30                "Only escape mandatory characters in text and attributes",
31                Some('p'),
32            )
33            .switch(
34                "self-closed",
35                "Output empty tags as self closing",
36                Some('s'),
37            )
38            .category(Category::Formats)
39    }
40
41    fn extra_description(&self) -> &str {
42        r#"Every XML entry is represented via a record with tag, attribute and content fields.
43To represent different types of entries different values must be written to this fields:
441. Tag entry: `{tag: <tag name> attributes: {<attr name>: "<string value>" ...} content: [<entries>]}`
452. Comment entry: `{tag: '!' attributes: null content: "<comment string>"}`
463. Processing instruction (PI): `{tag: '?<pi name>' attributes: null content: "<pi content string>"}`
474. Text: `{tag: null attributes: null content: "<text>"}`. Or as plain `<text>` instead of record.
48
49Additionally any field which is: empty record, empty list or null, can be omitted."#
50    }
51
52    fn examples(&self) -> Vec<Example> {
53        vec![
54            Example {
55                description: "Outputs an XML string representing the contents of this table",
56                example: r#"{tag: note attributes: {} content : [{tag: remember attributes: {} content : [{tag: null attributes: null content : Event}]}]} | to xml"#,
57                result: Some(Value::test_string(
58                    "<note><remember>Event</remember></note>",
59                )),
60            },
61            Example {
62                description: "When formatting xml null and empty record fields can be omitted and strings can be written without a wrapping record",
63                example: r#"{tag: note content : [{tag: remember content : [Event]}]} | to xml"#,
64                result: Some(Value::test_string(
65                    "<note><remember>Event</remember></note>",
66                )),
67            },
68            Example {
69                description: "Optionally, formats the text with a custom indentation setting",
70                example: r#"{tag: note content : [{tag: remember content : [Event]}]} | to xml --indent 3"#,
71                result: Some(Value::test_string(
72                    "<note>\n   <remember>Event</remember>\n</note>",
73                )),
74            },
75            Example {
76                description: "Produce less escaping sequences in resulting xml",
77                example: r#"{tag: note attributes: {a: "'qwe'\\"} content: ["\"'"]} | to xml --partial-escape"#,
78                result: Some(Value::test_string(
79                    r#"<note a="'qwe'\">"'</note>"#
80                ))
81            },
82            Example {
83                description: "Save space using self-closed tags",
84                example: r#"{tag: root content: [[tag]; [a] [b] [c]]} | to xml --self-closed"#,
85                result: Some(Value::test_string(
86                    r#"<root><a/><b/><c/></root>"#
87                ))
88            }
89        ]
90    }
91
92    fn description(&self) -> &str {
93        "Convert special record structure into .xml text."
94    }
95
96    fn run(
97        &self,
98        engine_state: &EngineState,
99        stack: &mut Stack,
100        call: &Call,
101        input: PipelineData,
102    ) -> Result<PipelineData, ShellError> {
103        let head = call.head;
104        let indent: Option<Spanned<i64>> = call.get_flag(engine_state, stack, "indent")?;
105        let partial_escape = call.has_flag(engine_state, stack, "partial-escape")?;
106        let self_closed = call.has_flag(engine_state, stack, "self-closed")?;
107
108        let job = Job::new(indent, partial_escape, self_closed);
109        let input = input.try_expand_range()?;
110        job.run(input, head)
111    }
112}
113
114struct Job {
115    writer: quick_xml::Writer<Cursor<Vec<u8>>>,
116    partial_escape: bool,
117    self_closed: bool,
118}
119
120impl Job {
121    fn new(indent: Option<Spanned<i64>>, partial_escape: bool, self_closed: bool) -> Self {
122        let writer = indent.as_ref().map_or_else(
123            || quick_xml::Writer::new(Cursor::new(Vec::new())),
124            |p| quick_xml::Writer::new_with_indent(Cursor::new(Vec::new()), b' ', p.item as usize),
125        );
126
127        Self {
128            writer,
129            partial_escape,
130            self_closed,
131        }
132    }
133
134    fn run(mut self, input: PipelineData, head: Span) -> Result<PipelineData, ShellError> {
135        let metadata = input
136            .metadata()
137            .unwrap_or_default()
138            .with_content_type(Some("application/xml".into()));
139        let value = input.into_value(head)?;
140
141        self.write_xml_entry(value, true).and_then(|_| {
142            let b = self.writer.into_inner().into_inner();
143            let s = if let Ok(s) = String::from_utf8(b) {
144                s
145            } else {
146                return Err(ShellError::NonUtf8 { span: head });
147            };
148            Ok(Value::string(s, head).into_pipeline_data_with_metadata(Some(metadata)))
149        })
150    }
151
152    fn add_attributes<'a>(
153        &self,
154        element: &mut BytesStart<'a>,
155        attributes: &'a IndexMap<String, String>,
156    ) {
157        for (k, v) in attributes {
158            if self.partial_escape {
159                element.push_attribute((k.as_bytes(), Self::partial_escape_attribute(v).as_ref()))
160            } else {
161                element.push_attribute((k.as_bytes(), escape::escape(v).as_bytes()))
162            };
163        }
164    }
165
166    fn partial_escape_attribute(raw: &str) -> Cow<[u8]> {
167        let bytes = raw.as_bytes();
168        let mut escaped: Vec<u8> = Vec::new();
169        let mut iter = bytes.iter().enumerate();
170        let mut pos = 0;
171        while let Some((new_pos, byte)) =
172            iter.find(|(_, &ch)| matches!(ch, b'<' | b'>' | b'&' | b'"'))
173        {
174            escaped.extend_from_slice(&bytes[pos..new_pos]);
175            match byte {
176                b'<' => escaped.extend_from_slice(b"&lt;"),
177                b'>' => escaped.extend_from_slice(b"&gt;"),
178                b'&' => escaped.extend_from_slice(b"&amp;"),
179                b'"' => escaped.extend_from_slice(b"&quot;"),
180
181                _ => unreachable!("Only '<', '>','&', '\"' are escaped"),
182            }
183            pos = new_pos + 1;
184        }
185
186        if !escaped.is_empty() {
187            if let Some(raw) = bytes.get(pos..) {
188                escaped.extend_from_slice(raw);
189            }
190
191            Cow::Owned(escaped)
192        } else {
193            Cow::Borrowed(bytes)
194        }
195    }
196
197    fn write_xml_entry(&mut self, entry: Value, top_level: bool) -> Result<(), ShellError> {
198        let entry_span = entry.span();
199        let span = entry.span();
200
201        // Allow using strings directly as content.
202        // So user can write
203        // {tag: a content: ['qwe']}
204        // instead of longer
205        // {tag: a content: [{content: 'qwe'}]}
206        if let (Value::String { val, .. }, false) = (&entry, top_level) {
207            return self.write_xml_text(val.as_str(), span);
208        }
209
210        if let Value::Record { val: record, .. } = &entry {
211            if let Some(bad_column) = Self::find_invalid_column(record) {
212                return Err(ShellError::CantConvert {
213                to_type: "XML".into(),
214                from_type: "record".into(),
215                span: entry_span,
216                help: Some(format!(
217                    "Invalid column \"{}\" in xml entry. Only \"{}\", \"{}\" and \"{}\" are permitted",
218                    bad_column, COLUMN_TAG_NAME, COLUMN_ATTRS_NAME, COLUMN_CONTENT_NAME
219                )),
220            });
221            }
222            // If key is not found it is assumed to be nothing. This way
223            // user can write a tag like {tag: a content: [...]} instead
224            // of longer {tag: a attributes: {} content: [...]}
225            let tag = record
226                .get(COLUMN_TAG_NAME)
227                .cloned()
228                .unwrap_or_else(|| Value::nothing(Span::unknown()));
229            let attrs = record
230                .get(COLUMN_ATTRS_NAME)
231                .cloned()
232                .unwrap_or_else(|| Value::nothing(Span::unknown()));
233            let content = record
234                .get(COLUMN_CONTENT_NAME)
235                .cloned()
236                .unwrap_or_else(|| Value::nothing(Span::unknown()));
237
238            let content_span = content.span();
239            let tag_span = tag.span();
240            match (tag, attrs, content) {
241                (Value::Nothing { .. }, Value::Nothing { .. }, Value::String { val, .. }) => {
242                    // Strings can not appear on top level of document
243                    if top_level {
244                        return Err(ShellError::CantConvert {
245                            to_type: "XML".into(),
246                            from_type: entry.get_type().to_string(),
247                            span: entry_span,
248                            help: Some("Strings can not be a root element of document".into()),
249                        });
250                    }
251                    self.write_xml_text(val.as_str(), content_span)
252                }
253                (Value::String { val: tag_name, .. }, attrs, children) => {
254                    self.write_tag_like(entry_span, tag_name, tag_span, attrs, children, top_level)
255                }
256                _ => Err(ShellError::CantConvert {
257                    to_type: "XML".into(),
258                    from_type: "record".into(),
259                    span: entry_span,
260                    help: Some("Tag missing or is not a string".into()),
261                }),
262            }
263        } else {
264            Err(ShellError::CantConvert {
265                to_type: "XML".into(),
266                from_type: entry.get_type().to_string(),
267                span: entry_span,
268                help: Some("Xml entry expected to be a record".into()),
269            })
270        }
271    }
272
273    fn find_invalid_column(record: &Record) -> Option<&String> {
274        const VALID_COLS: [&str; 3] = [COLUMN_TAG_NAME, COLUMN_ATTRS_NAME, COLUMN_CONTENT_NAME];
275        record
276            .columns()
277            .find(|col| !VALID_COLS.contains(&col.as_str()))
278    }
279
280    /// Convert record to tag-like entry: tag, PI, comment.
281    fn write_tag_like(
282        &mut self,
283        entry_span: Span,
284        tag: String,
285        tag_span: Span,
286        attrs: Value,
287        content: Value,
288        top_level: bool,
289    ) -> Result<(), ShellError> {
290        if tag == "!" {
291            // Comments can not appear on top level of document
292            if top_level {
293                return Err(ShellError::CantConvert {
294                    to_type: "XML".into(),
295                    from_type: "record".into(),
296                    span: entry_span,
297                    help: Some("Comments can not be a root element of document".into()),
298                });
299            }
300
301            self.write_comment(entry_span, attrs, content)
302        } else if let Some(tag) = tag.strip_prefix('?') {
303            // PIs can not appear on top level of document
304            if top_level {
305                return Err(ShellError::CantConvert {
306                    to_type: "XML".into(),
307                    from_type: Type::record().to_string(),
308                    span: entry_span,
309                    help: Some("PIs can not be a root element of document".into()),
310                });
311            }
312
313            let content: String = match content {
314                Value::String { val, .. } => val,
315                Value::Nothing { .. } => "".into(),
316                _ => {
317                    return Err(ShellError::CantConvert {
318                        to_type: "XML".into(),
319                        from_type: Type::record().to_string(),
320                        span: content.span(),
321                        help: Some("PI content expected to be a string".into()),
322                    });
323                }
324            };
325
326            self.write_processing_instruction(entry_span, tag, attrs, content)
327        } else {
328            // Allow tag to have no attributes or content for short hand input
329            // alternatives like {tag: a attributes: {} content: []}, {tag: a attribbutes: null
330            // content: null}, {tag: a}. See to_xml_entry for more
331            let attrs = match attrs {
332                Value::Record { val, .. } => val.into_owned(),
333                Value::Nothing { .. } => Record::new(),
334                _ => {
335                    return Err(ShellError::CantConvert {
336                        to_type: "XML".into(),
337                        from_type: attrs.get_type().to_string(),
338                        span: attrs.span(),
339                        help: Some("Tag attributes expected to be a record".into()),
340                    });
341                }
342            };
343
344            let content = match content {
345                Value::List { vals, .. } => vals,
346                Value::Nothing { .. } => Vec::new(),
347                _ => {
348                    return Err(ShellError::CantConvert {
349                        to_type: "XML".into(),
350                        from_type: content.get_type().to_string(),
351                        span: content.span(),
352                        help: Some("Tag content expected to be a list".into()),
353                    });
354                }
355            };
356
357            self.write_tag(entry_span, tag, tag_span, attrs, content)
358        }
359    }
360
361    fn write_comment(
362        &mut self,
363        entry_span: Span,
364        attrs: Value,
365        content: Value,
366    ) -> Result<(), ShellError> {
367        match (attrs, content) {
368            (Value::Nothing { .. }, Value::String { val, .. }) => {
369                // Text in comments must NOT be escaped
370                // https://www.w3.org/TR/xml/#sec-comments
371                let comment_content = BytesText::from_escaped(val.as_str());
372                self.writer
373                    .write_event(Event::Comment(comment_content))
374                    .map_err(|_| ShellError::CantConvert {
375                        to_type: "XML".to_string(),
376                        from_type: Type::record().to_string(),
377                        span: entry_span,
378                        help: Some("Failure writing comment to xml".into()),
379                    })
380            }
381            (_, content) => Err(ShellError::CantConvert {
382                to_type: "XML".into(),
383                from_type: content.get_type().to_string(),
384                span: entry_span,
385                help: Some("Comment expected to have string content and no attributes".into()),
386            }),
387        }
388    }
389
390    fn write_processing_instruction(
391        &mut self,
392        entry_span: Span,
393        tag: &str,
394        attrs: Value,
395        content: String,
396    ) -> Result<(), ShellError> {
397        if !matches!(attrs, Value::Nothing { .. }) {
398            return Err(ShellError::CantConvert {
399                to_type: "XML".into(),
400                from_type: Type::record().to_string(),
401                span: entry_span,
402                help: Some("PIs do not have attributes".into()),
403            });
404        }
405
406        let content_text = format!("{} {}", tag, content);
407        // PI content must NOT be escaped
408        // https://www.w3.org/TR/xml/#sec-pi
409        let pi_content = BytesPI::new(content_text.as_str());
410
411        self.writer
412            .write_event(Event::PI(pi_content))
413            .map_err(|_| ShellError::CantConvert {
414                to_type: "XML".to_string(),
415                from_type: Type::record().to_string(),
416                span: entry_span,
417                help: Some("Failure writing PI to xml".into()),
418            })
419    }
420
421    fn write_tag(
422        &mut self,
423        entry_span: Span,
424        tag: String,
425        tag_span: Span,
426        attrs: Record,
427        children: Vec<Value>,
428    ) -> Result<(), ShellError> {
429        if tag.starts_with('!') || tag.starts_with('?') {
430            return Err(ShellError::CantConvert {
431                to_type: "XML".to_string(),
432                from_type: Type::record().to_string(),
433                span: tag_span,
434                help: Some(format!(
435                    "Incorrect tag name {}, tag name can not start with ! or ?",
436                    tag
437                )),
438            });
439        }
440
441        let self_closed = self.self_closed && children.is_empty();
442        let attributes = Self::parse_attributes(attrs)?;
443        let mut open_tag = BytesStart::new(tag.clone());
444        self.add_attributes(&mut open_tag, &attributes);
445        let open_tag_event = if self_closed {
446            Event::Empty(open_tag)
447        } else {
448            Event::Start(open_tag)
449        };
450
451        self.writer
452            .write_event(open_tag_event)
453            .map_err(|_| ShellError::CantConvert {
454                to_type: "XML".to_string(),
455                from_type: Type::record().to_string(),
456                span: entry_span,
457                help: Some("Failure writing tag to xml".into()),
458            })?;
459
460        children
461            .into_iter()
462            .try_for_each(|child| self.write_xml_entry(child, false))?;
463
464        if !self_closed {
465            let close_tag_event = Event::End(BytesEnd::new(tag));
466            self.writer
467                .write_event(close_tag_event)
468                .map_err(|_| ShellError::CantConvert {
469                    to_type: "XML".to_string(),
470                    from_type: Type::record().to_string(),
471                    span: entry_span,
472                    help: Some("Failure writing tag to xml".into()),
473                })?;
474        }
475        Ok(())
476    }
477
478    fn parse_attributes(attrs: Record) -> Result<IndexMap<String, String>, ShellError> {
479        let mut h = IndexMap::new();
480        for (k, v) in attrs {
481            if let Value::String { val, .. } = v {
482                h.insert(k, val);
483            } else {
484                return Err(ShellError::CantConvert {
485                    to_type: "XML".to_string(),
486                    from_type: v.get_type().to_string(),
487                    span: v.span(),
488                    help: Some("Attribute value expected to be a string".into()),
489                });
490            }
491        }
492        Ok(h)
493    }
494
495    fn write_xml_text(&mut self, val: &str, span: Span) -> Result<(), ShellError> {
496        let text = Event::Text(if self.partial_escape {
497            BytesText::from_escaped(escape::partial_escape(val))
498        } else {
499            BytesText::new(val)
500        });
501
502        self.writer
503            .write_event(text)
504            .map_err(|_| ShellError::CantConvert {
505                to_type: "XML".to_string(),
506                from_type: Type::String.to_string(),
507                span,
508                help: Some("Failure writing string to xml".into()),
509            })
510    }
511}
512
513#[cfg(test)]
514mod test {
515    use nu_cmd_lang::eval_pipeline_without_terminal_expression;
516
517    use crate::{Get, Metadata};
518
519    use super::*;
520
521    #[test]
522    fn test_examples() {
523        use crate::test_examples;
524
525        test_examples(ToXml {})
526    }
527
528    #[test]
529    fn test_content_type_metadata() {
530        let mut engine_state = Box::new(EngineState::new());
531        let delta = {
532            // Base functions that are needed for testing
533            // Try to keep this working set small to keep tests running as fast as possible
534            let mut working_set = StateWorkingSet::new(&engine_state);
535
536            working_set.add_decl(Box::new(ToXml {}));
537            working_set.add_decl(Box::new(Metadata {}));
538            working_set.add_decl(Box::new(Get {}));
539
540            working_set.render()
541        };
542
543        engine_state
544            .merge_delta(delta)
545            .expect("Error merging delta");
546
547        let cmd = "{tag: note attributes: {} content : [{tag: remember attributes: {} content : [{tag: null attributes: null content : Event}]}]} | to xml | metadata | get content_type";
548        let result = eval_pipeline_without_terminal_expression(
549            cmd,
550            std::env::temp_dir().as_ref(),
551            &mut engine_state,
552        );
553        assert_eq!(
554            Value::test_record(record!("content_type" => Value::test_string("application/xml"))),
555            result.expect("There should be a result")
556        );
557    }
558}