nu_command/formats/to/
xml.rs

1use crate::formats::nu_xml_format::{COLUMN_ATTRS_NAME, COLUMN_CONTENT_NAME, COLUMN_TAG_NAME};
2use indexmap::IndexMap;
3use nu_engine::command_prelude::*;
4
5use quick_xml::{
6    escape,
7    events::{BytesEnd, BytesPI, BytesStart, BytesText, Event},
8};
9use std::{borrow::Cow, io::Cursor};
10
11#[derive(Clone)]
12pub struct ToXml;
13
14impl Command for ToXml {
15    fn name(&self) -> &str {
16        "to xml"
17    }
18
19    fn signature(&self) -> Signature {
20        Signature::build("to xml")
21            .input_output_types(vec![(Type::record(), Type::String)])
22            .named(
23                "indent",
24                SyntaxShape::Int,
25                "Formats the XML text with the provided indentation setting",
26                Some('i'),
27            )
28            .switch(
29                "partial-escape",
30                "Only escape mandatory characters in text and attributes",
31                Some('p'),
32            )
33            .switch(
34                "self-closed",
35                "Output empty tags as self closing",
36                Some('s'),
37            )
38            .category(Category::Formats)
39    }
40
41    fn extra_description(&self) -> &str {
42        r#"Every XML entry is represented via a record with tag, attribute and content fields.
43To represent different types of entries different values must be written to this fields:
441. Tag entry: `{tag: <tag name> attributes: {<attr name>: "<string value>" ...} content: [<entries>]}`
452. Comment entry: `{tag: '!' attributes: null content: "<comment string>"}`
463. Processing instruction (PI): `{tag: '?<pi name>' attributes: null content: "<pi content string>"}`
474. Text: `{tag: null attributes: null content: "<text>"}`. Or as plain `<text>` instead of record.
48
49Additionally any field which is: empty record, empty list or null, can be omitted."#
50    }
51
52    fn examples(&self) -> Vec<Example> {
53        vec![
54            Example {
55                description: "Outputs an XML string representing the contents of this table",
56                example: r#"{tag: note attributes: {} content : [{tag: remember attributes: {} content : [{tag: null attributes: null content : Event}]}]} | to xml"#,
57                result: Some(Value::test_string(
58                    "<note><remember>Event</remember></note>",
59                )),
60            },
61            Example {
62                description: "When formatting xml null and empty record fields can be omitted and strings can be written without a wrapping record",
63                example: r#"{tag: note content : [{tag: remember content : [Event]}]} | to xml"#,
64                result: Some(Value::test_string(
65                    "<note><remember>Event</remember></note>",
66                )),
67            },
68            Example {
69                description: "Optionally, formats the text with a custom indentation setting",
70                example: r#"{tag: note content : [{tag: remember content : [Event]}]} | to xml --indent 3"#,
71                result: Some(Value::test_string(
72                    "<note>\n   <remember>Event</remember>\n</note>",
73                )),
74            },
75            Example {
76                description: "Produce less escaping sequences in resulting xml",
77                example: r#"{tag: note attributes: {a: "'qwe'\\"} content: ["\"'"]} | to xml --partial-escape"#,
78                result: Some(Value::test_string(r#"<note a="'qwe'\">"'</note>"#)),
79            },
80            Example {
81                description: "Save space using self-closed tags",
82                example: r#"{tag: root content: [[tag]; [a] [b] [c]]} | to xml --self-closed"#,
83                result: Some(Value::test_string(r#"<root><a/><b/><c/></root>"#)),
84            },
85        ]
86    }
87
88    fn description(&self) -> &str {
89        "Convert special record structure into .xml text."
90    }
91
92    fn run(
93        &self,
94        engine_state: &EngineState,
95        stack: &mut Stack,
96        call: &Call,
97        input: PipelineData,
98    ) -> Result<PipelineData, ShellError> {
99        let head = call.head;
100        let indent: Option<Spanned<i64>> = call.get_flag(engine_state, stack, "indent")?;
101        let partial_escape = call.has_flag(engine_state, stack, "partial-escape")?;
102        let self_closed = call.has_flag(engine_state, stack, "self-closed")?;
103
104        let job = Job::new(indent, partial_escape, self_closed);
105        let input = input.try_expand_range()?;
106        job.run(input, head)
107    }
108}
109
110struct Job {
111    writer: quick_xml::Writer<Cursor<Vec<u8>>>,
112    partial_escape: bool,
113    self_closed: bool,
114}
115
116impl Job {
117    fn new(indent: Option<Spanned<i64>>, partial_escape: bool, self_closed: bool) -> Self {
118        let writer = indent.as_ref().map_or_else(
119            || quick_xml::Writer::new(Cursor::new(Vec::new())),
120            |p| quick_xml::Writer::new_with_indent(Cursor::new(Vec::new()), b' ', p.item as usize),
121        );
122
123        Self {
124            writer,
125            partial_escape,
126            self_closed,
127        }
128    }
129
130    fn run(mut self, input: PipelineData, head: Span) -> Result<PipelineData, ShellError> {
131        let metadata = input
132            .metadata()
133            .unwrap_or_default()
134            .with_content_type(Some("application/xml".into()));
135        let value = input.into_value(head)?;
136
137        self.write_xml_entry(value, true).and_then(|_| {
138            let b = self.writer.into_inner().into_inner();
139            let s = if let Ok(s) = String::from_utf8(b) {
140                s
141            } else {
142                return Err(ShellError::NonUtf8 { span: head });
143            };
144            Ok(Value::string(s, head).into_pipeline_data_with_metadata(Some(metadata)))
145        })
146    }
147
148    fn add_attributes<'a>(
149        &self,
150        element: &mut BytesStart<'a>,
151        attributes: &'a IndexMap<String, String>,
152    ) {
153        for (k, v) in attributes {
154            if self.partial_escape {
155                element.push_attribute((k.as_bytes(), Self::partial_escape_attribute(v).as_ref()))
156            } else {
157                element.push_attribute((k.as_bytes(), escape::escape(v).as_bytes()))
158            };
159        }
160    }
161
162    fn partial_escape_attribute(raw: &str) -> Cow<[u8]> {
163        let bytes = raw.as_bytes();
164        let mut escaped: Vec<u8> = Vec::new();
165        let mut iter = bytes.iter().enumerate();
166        let mut pos = 0;
167        while let Some((new_pos, byte)) =
168            iter.find(|(_, ch)| matches!(ch, b'<' | b'>' | b'&' | b'"'))
169        {
170            escaped.extend_from_slice(&bytes[pos..new_pos]);
171            match byte {
172                b'<' => escaped.extend_from_slice(b"&lt;"),
173                b'>' => escaped.extend_from_slice(b"&gt;"),
174                b'&' => escaped.extend_from_slice(b"&amp;"),
175                b'"' => escaped.extend_from_slice(b"&quot;"),
176
177                _ => unreachable!("Only '<', '>','&', '\"' are escaped"),
178            }
179            pos = new_pos + 1;
180        }
181
182        if !escaped.is_empty() {
183            if let Some(raw) = bytes.get(pos..) {
184                escaped.extend_from_slice(raw);
185            }
186
187            Cow::Owned(escaped)
188        } else {
189            Cow::Borrowed(bytes)
190        }
191    }
192
193    fn write_xml_entry(&mut self, entry: Value, top_level: bool) -> Result<(), ShellError> {
194        let entry_span = entry.span();
195        let span = entry.span();
196
197        // Allow using strings directly as content.
198        // So user can write
199        // {tag: a content: ['qwe']}
200        // instead of longer
201        // {tag: a content: [{content: 'qwe'}]}
202        if let (Value::String { val, .. }, false) = (&entry, top_level) {
203            return self.write_xml_text(val.as_str(), span);
204        }
205
206        if let Value::Record { val: record, .. } = &entry {
207            if let Some(bad_column) = Self::find_invalid_column(record) {
208                return Err(ShellError::CantConvert {
209                    to_type: "XML".into(),
210                    from_type: "record".into(),
211                    span: entry_span,
212                    help: Some(format!(
213                        "Invalid column \"{}\" in xml entry. Only \"{}\", \"{}\" and \"{}\" are permitted",
214                        bad_column, COLUMN_TAG_NAME, COLUMN_ATTRS_NAME, COLUMN_CONTENT_NAME
215                    )),
216                });
217            }
218            // If key is not found it is assumed to be nothing. This way
219            // user can write a tag like {tag: a content: [...]} instead
220            // of longer {tag: a attributes: {} content: [...]}
221            let tag = record
222                .get(COLUMN_TAG_NAME)
223                .cloned()
224                .unwrap_or_else(|| Value::nothing(Span::unknown()));
225            let attrs = record
226                .get(COLUMN_ATTRS_NAME)
227                .cloned()
228                .unwrap_or_else(|| Value::nothing(Span::unknown()));
229            let content = record
230                .get(COLUMN_CONTENT_NAME)
231                .cloned()
232                .unwrap_or_else(|| Value::nothing(Span::unknown()));
233
234            let content_span = content.span();
235            let tag_span = tag.span();
236            match (tag, attrs, content) {
237                (Value::Nothing { .. }, Value::Nothing { .. }, Value::String { val, .. }) => {
238                    // Strings can not appear on top level of document
239                    if top_level {
240                        return Err(ShellError::CantConvert {
241                            to_type: "XML".into(),
242                            from_type: entry.get_type().to_string(),
243                            span: entry_span,
244                            help: Some("Strings can not be a root element of document".into()),
245                        });
246                    }
247                    self.write_xml_text(val.as_str(), content_span)
248                }
249                (Value::String { val: tag_name, .. }, attrs, children) => {
250                    self.write_tag_like(entry_span, tag_name, tag_span, attrs, children, top_level)
251                }
252                _ => Err(ShellError::CantConvert {
253                    to_type: "XML".into(),
254                    from_type: "record".into(),
255                    span: entry_span,
256                    help: Some("Tag missing or is not a string".into()),
257                }),
258            }
259        } else {
260            Err(ShellError::CantConvert {
261                to_type: "XML".into(),
262                from_type: entry.get_type().to_string(),
263                span: entry_span,
264                help: Some("Xml entry expected to be a record".into()),
265            })
266        }
267    }
268
269    fn find_invalid_column(record: &Record) -> Option<&String> {
270        const VALID_COLS: [&str; 3] = [COLUMN_TAG_NAME, COLUMN_ATTRS_NAME, COLUMN_CONTENT_NAME];
271        record
272            .columns()
273            .find(|col| !VALID_COLS.contains(&col.as_str()))
274    }
275
276    /// Convert record to tag-like entry: tag, PI, comment.
277    fn write_tag_like(
278        &mut self,
279        entry_span: Span,
280        tag: String,
281        tag_span: Span,
282        attrs: Value,
283        content: Value,
284        top_level: bool,
285    ) -> Result<(), ShellError> {
286        if tag == "!" {
287            // Comments can not appear on top level of document
288            if top_level {
289                return Err(ShellError::CantConvert {
290                    to_type: "XML".into(),
291                    from_type: "record".into(),
292                    span: entry_span,
293                    help: Some("Comments can not be a root element of document".into()),
294                });
295            }
296
297            self.write_comment(entry_span, attrs, content)
298        } else if let Some(tag) = tag.strip_prefix('?') {
299            // PIs can not appear on top level of document
300            if top_level {
301                return Err(ShellError::CantConvert {
302                    to_type: "XML".into(),
303                    from_type: Type::record().to_string(),
304                    span: entry_span,
305                    help: Some("PIs can not be a root element of document".into()),
306                });
307            }
308
309            let content: String = match content {
310                Value::String { val, .. } => val,
311                Value::Nothing { .. } => "".into(),
312                _ => {
313                    return Err(ShellError::CantConvert {
314                        to_type: "XML".into(),
315                        from_type: Type::record().to_string(),
316                        span: content.span(),
317                        help: Some("PI content expected to be a string".into()),
318                    });
319                }
320            };
321
322            self.write_processing_instruction(entry_span, tag, attrs, content)
323        } else {
324            // Allow tag to have no attributes or content for short hand input
325            // alternatives like {tag: a attributes: {} content: []}, {tag: a attribbutes: null
326            // content: null}, {tag: a}. See to_xml_entry for more
327            let attrs = match attrs {
328                Value::Record { val, .. } => val.into_owned(),
329                Value::Nothing { .. } => Record::new(),
330                _ => {
331                    return Err(ShellError::CantConvert {
332                        to_type: "XML".into(),
333                        from_type: attrs.get_type().to_string(),
334                        span: attrs.span(),
335                        help: Some("Tag attributes expected to be a record".into()),
336                    });
337                }
338            };
339
340            let content = match content {
341                Value::List { vals, .. } => vals,
342                Value::Nothing { .. } => Vec::new(),
343                _ => {
344                    return Err(ShellError::CantConvert {
345                        to_type: "XML".into(),
346                        from_type: content.get_type().to_string(),
347                        span: content.span(),
348                        help: Some("Tag content expected to be a list".into()),
349                    });
350                }
351            };
352
353            self.write_tag(entry_span, tag, tag_span, attrs, content)
354        }
355    }
356
357    fn write_comment(
358        &mut self,
359        entry_span: Span,
360        attrs: Value,
361        content: Value,
362    ) -> Result<(), ShellError> {
363        match (attrs, content) {
364            (Value::Nothing { .. }, Value::String { val, .. }) => {
365                // Text in comments must NOT be escaped
366                // https://www.w3.org/TR/xml/#sec-comments
367                let comment_content = BytesText::from_escaped(val.as_str());
368                self.writer
369                    .write_event(Event::Comment(comment_content))
370                    .map_err(|_| ShellError::CantConvert {
371                        to_type: "XML".to_string(),
372                        from_type: Type::record().to_string(),
373                        span: entry_span,
374                        help: Some("Failure writing comment to xml".into()),
375                    })
376            }
377            (_, content) => Err(ShellError::CantConvert {
378                to_type: "XML".into(),
379                from_type: content.get_type().to_string(),
380                span: entry_span,
381                help: Some("Comment expected to have string content and no attributes".into()),
382            }),
383        }
384    }
385
386    fn write_processing_instruction(
387        &mut self,
388        entry_span: Span,
389        tag: &str,
390        attrs: Value,
391        content: String,
392    ) -> Result<(), ShellError> {
393        if !matches!(attrs, Value::Nothing { .. }) {
394            return Err(ShellError::CantConvert {
395                to_type: "XML".into(),
396                from_type: Type::record().to_string(),
397                span: entry_span,
398                help: Some("PIs do not have attributes".into()),
399            });
400        }
401
402        let content_text = format!("{} {}", tag, content);
403        // PI content must NOT be escaped
404        // https://www.w3.org/TR/xml/#sec-pi
405        let pi_content = BytesPI::new(content_text.as_str());
406
407        self.writer
408            .write_event(Event::PI(pi_content))
409            .map_err(|_| ShellError::CantConvert {
410                to_type: "XML".to_string(),
411                from_type: Type::record().to_string(),
412                span: entry_span,
413                help: Some("Failure writing PI to xml".into()),
414            })
415    }
416
417    fn write_tag(
418        &mut self,
419        entry_span: Span,
420        tag: String,
421        tag_span: Span,
422        attrs: Record,
423        children: Vec<Value>,
424    ) -> Result<(), ShellError> {
425        if tag.starts_with('!') || tag.starts_with('?') {
426            return Err(ShellError::CantConvert {
427                to_type: "XML".to_string(),
428                from_type: Type::record().to_string(),
429                span: tag_span,
430                help: Some(format!(
431                    "Incorrect tag name {}, tag name can not start with ! or ?",
432                    tag
433                )),
434            });
435        }
436
437        let self_closed = self.self_closed && children.is_empty();
438        let attributes = Self::parse_attributes(attrs)?;
439        let mut open_tag = BytesStart::new(tag.clone());
440        self.add_attributes(&mut open_tag, &attributes);
441        let open_tag_event = if self_closed {
442            Event::Empty(open_tag)
443        } else {
444            Event::Start(open_tag)
445        };
446
447        self.writer
448            .write_event(open_tag_event)
449            .map_err(|_| ShellError::CantConvert {
450                to_type: "XML".to_string(),
451                from_type: Type::record().to_string(),
452                span: entry_span,
453                help: Some("Failure writing tag to xml".into()),
454            })?;
455
456        children
457            .into_iter()
458            .try_for_each(|child| self.write_xml_entry(child, false))?;
459
460        if !self_closed {
461            let close_tag_event = Event::End(BytesEnd::new(tag));
462            self.writer
463                .write_event(close_tag_event)
464                .map_err(|_| ShellError::CantConvert {
465                    to_type: "XML".to_string(),
466                    from_type: Type::record().to_string(),
467                    span: entry_span,
468                    help: Some("Failure writing tag to xml".into()),
469                })?;
470        }
471        Ok(())
472    }
473
474    fn parse_attributes(attrs: Record) -> Result<IndexMap<String, String>, ShellError> {
475        let mut h = IndexMap::new();
476        for (k, v) in attrs {
477            if let Value::String { val, .. } = v {
478                h.insert(k, val);
479            } else {
480                return Err(ShellError::CantConvert {
481                    to_type: "XML".to_string(),
482                    from_type: v.get_type().to_string(),
483                    span: v.span(),
484                    help: Some("Attribute value expected to be a string".into()),
485                });
486            }
487        }
488        Ok(h)
489    }
490
491    fn write_xml_text(&mut self, val: &str, span: Span) -> Result<(), ShellError> {
492        let text = Event::Text(if self.partial_escape {
493            BytesText::from_escaped(escape::partial_escape(val))
494        } else {
495            BytesText::new(val)
496        });
497
498        self.writer
499            .write_event(text)
500            .map_err(|_| ShellError::CantConvert {
501                to_type: "XML".to_string(),
502                from_type: Type::String.to_string(),
503                span,
504                help: Some("Failure writing string to xml".into()),
505            })
506    }
507}
508
509#[cfg(test)]
510mod test {
511    use nu_cmd_lang::eval_pipeline_without_terminal_expression;
512
513    use crate::{Get, Metadata};
514
515    use super::*;
516
517    #[test]
518    fn test_examples() {
519        use crate::test_examples;
520
521        test_examples(ToXml {})
522    }
523
524    #[test]
525    fn test_content_type_metadata() {
526        let mut engine_state = Box::new(EngineState::new());
527        let delta = {
528            // Base functions that are needed for testing
529            // Try to keep this working set small to keep tests running as fast as possible
530            let mut working_set = StateWorkingSet::new(&engine_state);
531
532            working_set.add_decl(Box::new(ToXml {}));
533            working_set.add_decl(Box::new(Metadata {}));
534            working_set.add_decl(Box::new(Get {}));
535
536            working_set.render()
537        };
538
539        engine_state
540            .merge_delta(delta)
541            .expect("Error merging delta");
542
543        let cmd = "{tag: note attributes: {} content : [{tag: remember attributes: {} content : [{tag: null attributes: null content : Event}]}]} | to xml | metadata | get content_type";
544        let result = eval_pipeline_without_terminal_expression(
545            cmd,
546            std::env::temp_dir().as_ref(),
547            &mut engine_state,
548        );
549        assert_eq!(
550            Value::test_record(record!("content_type" => Value::test_string("application/xml"))),
551            result.expect("There should be a result")
552        );
553    }
554}